@ GP32 "running code from cache" example
@ (To allow code execution at 133MHz)
@
@ !!!!!!! NOTE !!!!!!!!!!!
@ The CPU speed setting routine below scrambles the LCD
@ display because besides changing the CPU speed it also
@ adjusts other timings as well. This will be fixed as soon
@ as I know what I'm actually doing. :P
@
@ Note also that all code that is run at 132MHz must be
@ in instruction cache first or the gp32 will crash.
@ To get it into instruction cache you must run it first
@ at low speed.

@ Div()/DivRem()
@ Entry:
@    R0 = (s32) Numerator
@    R1 = (s32) Denominator
@ Exit:
@    R0 = (s32) Numerator / Denominator
@    R1 = (s32) Numerator % Denominator
@    R3 = (u32) | Numerator / Denominator |

        .ALIGN
        .GLOBAL  FastDivide

FastDivide:
        ands     r3,r1,#0x80000000
        rsbmi    r1,r1,#0
        eors     r12,r3,r0,asr #32
        rsbcs    r0,r0,#0
        movs     r2,r1
l3c8:
        cmp      r2,r0,lsr #1
        movls    r2,r2,lsl #1
        bcc      l3c8
l3d4:
        cmp      r0,r2
        adc      r3,r3,r3
        subcs    r0,r0,r2
        teq      r2,r1
        movne    r2,r2,lsr #1
        bne      l3d4

        mov      r1,r0
        mov      r0,r3
        movs     r12,r12,lsl #1
        rsbcs    r0,r0,#0
        rsbmi    r1,r1,#0
        bx       r14

        .ALIGN
        .GLOBAL  DivideTest1

DivideTest1:
        stmdb sp!,{r4,lr}
        ldr      r4,=500000
DT1:
        ldr      r0,=1029
        mov      r1,#11
        bl       FastDivide
        subs     r4,r4,#1
        bne      DT1

        ldmia sp!,{r4,pc}

        .ALIGN
        .GLOBAL  DivideTest2

DivideTest2:
        stmdb sp!,{r4-r11,lr}

        mov      r0,#0
        bl       __ClockSet

        ldr      r0,=1029
        mov      r1,#11
        bl       FastDivide

        mov      r0,#1
        bl       __ClockSet

        ldr      r4,=500000
DT2:
        ldr      r0,=1029
        mov      r1,#11
        bl       FastDivide
        subs     r4,r4,#1
        bne      DT2

        mov      r0,#0
        bl       __ClockSet

        ldmia sp!,{r4-r11,pc}

@ Set CPU speed
@ Entry: r0 = 0, 67.8MHz (default)
@        r0 = 1, 132MHz

__ClockSet:

    ldr   r2,=1         @ clock mode

    ldr   r4,=0x69032   @ div factor
    ldr   r5,=67800000  @ Fout=67.8MHz
    cmp   r0,#0
    moveq r1,r4
    moveq r3,r5

    ldr   r4,=0x66031   @24001   @ div factor
    ldr   r5,=132000000 @ Fout=132MHz
    cmp   r0,#1
    moveq r1,r4
    moveq r3,r5

    ldr   r0,=0x1090
    ldr   r0,[r0]       @ get lock time

    ldr   r5,=0x14800000

		cmp		r2,#2
    blt   1f

@ set 1:2 async bus mode
    mrc   p15,0,r0,c1,c0,0
    orr   r0,r0,#0xc0000000
    mcr   p15,0,r0,c1,c0,0

    b     2f
1:
@  check for 1:1 fast bus mode?
		ldr		r4,[r5,#0x14]
		cmp		r4,#2
    blt   3f

    ldr   r4,=0x5c0a1     @ MPLL = 50MHz
    str   r4,[r5,#4]
		str		r2,[r5,#0x14]

@ set fast bus mode
    mrc   p15,0,r0,c1,c0,0
    bic   r0,r0,#0xc0000000
    mcr   p15,0,r0,c1,c0,0

    b   2f
3:
@ set fast bus mode
    mrc   p15,0,r0,c1,c0,0
    bic   r0,r0,#0xc0000000
    mcr   p15,0,r0,c1,c0,0

2:
    ldr   r0,=0x1090
    ldr   r0,[r0]          @ get lock time
@    ldr   r0,[sp]

    str   r2,[r5,#0x14]
		str		r0,[r5]
		str		r1,[r5,#4]

		mov		r0,r3
		mov		r4,r2
    ldr   r5,=0x0c7b0000
    cmp   r4,#0         @ 1:1:1
		moveq	r1,r0
		moveq	r2,r0
    cmp   r4,#1         @ 1:1:2
		moveq	r1,r0
		moveq	r2,r0,lsr #1
    cmp   r4,#2         @ 1:2:2
		moveq	r1,r0,lsr #1
		moveq	r2,r1
    cmp   r4,#3         @ 1:2:4
		moveq	r1,r0,lsr #1
		moveq	r2,r0,lsr #2
    stmia r5!,{r0-r2}   @ r1 = HCLK

@ Dix SDRAM refresh counter
		mov		r2,r1,lsr #10
		mov		r3,#156
		mul		r4,r2,r3
		mov		r2,r4
		ldr		r3,=10000

@@@@@
@ r0 = r2 / r3
@
    mov   r0,#0
4:
		cmp		r2,r3
    blt   5f
		add		r0,r0,#1
		sub		r2,r2,r3
    b     4b
5:
@
@@@@@

		mov		r1,#1
    mov   r1,r1,lsl #11  @ r1 = 2^11
		add		r1,r1,#1
    sub   r1,r1,r0       @ r1 = 2^11 + 1 - HCLK*15.6 = refresh counter
    ldr   r0,=0x14000000
		ldr		r2,[r0,#0x24]
		mov		r2,r2,lsr #11
		orr		r1,r1,r2,lsl #11
		str		r1,[r0,#0x24]

    bx lr

