;
;	ITG/640 80386 core - NASM version for Djgpp, Linux and BeOS 4
;

%include "os.inc"

BAL_15BPP equ 0111101111011110b
BAL_16BPP equ 1111011111011110b
SCREENSIZE equ 153600

[BITS 32]

[SECTION .text]

; Set external symbols

defextern I32_clut
defextern I32_dlut
defextern I32_llut

        ;
        ; Balance 50/50, old colour and new colour equal strength
        ;

        %macro BALANCE_5050 1

        ; Halve the colour value of both data items

        and ebx,%1 ; V2
        shr ebx,1  ; V2

        and ecx,%1 ; Halve other too
        shr ecx,1  ;
        %endmacro

        ;
        ; Balance 25/75, old colour more prominent than new
        ;

        %macro BALANCE_2575 1

        ; Halve the colour value of just the new data item

        and ebx,%1 ; V2
        shr ebx,1  ; V2
        %endmacro

        ;
        ; Balance 75/25, new colour more prominent than old
        ;

        %macro BALANCE_7525 0

        ; Halve the colour value of just the old data item

        and ecx,%1 ; V2
        shr ecx,1  ; V2
        %endmacro

;
; off
;

proc off
	mov eax,3h	;set AX to be service 0, mode 3
	int 10h		;trip the graphics interrupt
	ret		;go back to C program


;
;  Dot
;

proc dot
	push ebp
	mov ebp,esp
	pushad

	mov edi,[ebp+20]	; screen
	mov eax,[ebp+12]        ; Y coord
	mov edx,[ebp+8]         ; X coord
	lea eax,[eax+eax*4]     ; multiply eax by 5 quickly :)
	shl eax,8
        shl edx,1
	add edi,eax
	add edi,edx
	mov eax,[ebp+16]
	mov word [edi],ax
	popad
	pop ebp
	ret


;
;	get
;

proc I32get1
	push ebp
	mov ebp,esp
	pushad

	mov esi,[ebp+28]	;screen
	mov edi,[ebp+24]	;sprite
	mov eax,[ebp+12]        ;Y
	mov edx,[ebp+8]         ;X
	lea eax,[eax+eax*4]
	shl eax,8
	shl edx,1
	add esi,eax
	add esi,edx

        mov ebx,[ebp+16]     ; EBX = Width
	mov edx,[ebp+20]     ; H

	mov eax,1280
	sub eax,ebx          ; EAX is now the scanline skip value (1280-W)
        test bl,2
        jnz gl_odd

        shr ebx,2            ; Quarter width (for two two-byte pixels)

gloop:
	mov ecx,ebx
;	rep movsw	     ; do a line
	rep movsd	     ; do a line
	add esi,eax	     ; skip to the start of the next line
	dec edx              ; next line
	jnz gloop
        jmp gl_end

gl_odd:
        shr ebx,2            ; Quarter width (for two two-byte pixels)
gloop2:
	mov ecx,ebx
	rep movsd	     ; do a line
        movsw                ; do last pixel
	add esi,eax	     ; skip to the start of the next line
	dec edx              ; next line
	jnz gloop2

gl_end:
	popad
	pop ebp
	ret

;
;	block put
;

proc I32put1
	push ebp
	mov ebp,esp
	pushad

	mov edi,[ebp+28]	;screen
	mov esi,[ebp+24]	;sprite
	mov eax,[ebp+12]        ;Y
	mov edx,[ebp+8]         ;X
	lea eax,[eax+eax*4]
	shl eax,8
	shl edx,1
	add edi,eax
	add edi,edx

        mov ebx,[ebp+16]     ; EBX = Width
	mov edx,[ebp+20]     ; H

	mov eax,1280
	sub eax,ebx          ; EAX is now the scanline skip value (1280-W)

        test bl,2            ; odd number of pixels?
        jnz put1odd

        shr ebx,2            ; Quarter width (for two two-byte pixels)
put1loop:
	mov ecx,ebx
	rep movsd	; main block done
	add edi,eax	; 320-width
	dec edx
	jnz put1loop
        jmp put1end

put1odd:
        shr ebx,2            ; Quarter width (for two two-byte pixels)
put1loopodd:
	mov ecx,ebx
	rep movsd	; main block done
	movsw	        ; last pixel
	add edi,eax	; 320-width
	dec edx
	jnz put1loopodd

put1end:
	popad
	pop ebp
	ret


proc I32cel1
	push ebp
	mov ebp,esp

	pushad

	mov edi,[ebp+28]	;screen
	mov esi,[ebp+24]	;sprite
	mov eax,[ebp+12]        ;Y
	mov edx,[ebp+8]         ;X

	lea eax,[eax+eax*4]
	shl eax,8
	shl edx,1
	add edi,eax
	add edi,edx

	mov ebx,1280
	sub ebx,[ebp+16]     ; EBX is now the scanline skip value (1280-W)
	shr dword [ebp+16],1 ; halve width (because 15/16 bpp, not 8)
	mov edx,[ebp+20]     ; H

	mov ecx,[ebp+16]     ; width in pixels (not bytes)

cloop1:
        lodsw
	test ax,ax
	jz dont1
	mov [edi],ax
dont1:
        add edi,2
	loop cloop1
	mov ecx,[ebp+16]
	add edi,ebx	;320-width
	dec edx
	jnz cloop1

	popad
	pop ebp
	ret


;
;clipped cel sprites with preclipping
;

proc I32cel2
	push ebp
	mov ebp,esp

	pushad

	mov edi,[ebp+28]	;screen
	mov esi,[ebp+24]	;sprite
	mov eax,[ebp+12]   ;Y
	mov edx,[ebp+8]    ;X

	lea eax,[eax+eax*4]
	shl eax,8
	shl edx,1
	add edi,eax
	add edi,edx


	mov ebx,1280
	sub ebx,[ebp+16]		; W

	shr dword [ebp+16],1     ; W
	mov ecx,[ebp+16]
	mov edx,[ebp+20]     ; H

	shl dword [ebp+32],1     ; coff

	add esi,[ebp+32]
	mov ecx,[ebp+16]
        xor eax,eax
c2loop:
	lodsw
	cmp eax,0
	jz dont2
	mov [edi],ax
dont2:
	inc edi
	inc edi
	loop c2loop
	add esi,[ebp+32]
	add edi,ebx	;320-width
	mov ecx,[ebp+16]
	dec edx
	jnz c2loop

	popad
	pop ebp
	ret


;
;clipped cel sprites with postclipping
;

proc I32cel3
	push ebp
	mov ebp,esp

	pushad

	mov edi,[ebp+28]	;screen
	mov esi,[ebp+24]	;sprite
	mov eax,[ebp+12]   ;Y
	mov edx,[ebp+8]    ;X

	lea eax,[eax+eax*4]
	shl eax,8
	shl edx,1
	add edi,eax
	add edi,edx

	shl dword [ebp+16],1     ; W

	mov ebx,1280
	sub ebx,[ebp+16]		; W

	shr dword [ebp+16],1     ; W
	mov ecx,[ebp+16]
	mov edx,[ebp+20]     ; H

	shl dword [ebp+32],1     ; coff

	mov ecx,[ebp+16]
        xor eax,eax

c3loop:
	lodsw
	cmp eax,0
	jz dont3
	mov [edi],ax
dont3:
	inc edi
	inc edi
	loop c3loop
	add esi,[ebp+32]
	mov ecx,[ebp+16]
	add edi,ebx	;320-width
	dec edx
	jnz c3loop

	popad
	pop ebp
	ret

proc hline
	push ebp
	mov ebp,esp
	pushad

	mov edi,[ebp+24]	; screen
	mov eax,[ebp+12]        ; Y
	mov edx,[ebp+8]         ; X
	lea eax,[eax+eax*4]
	shl eax,8
	shl edx,1

	add edi,eax
	add edi,edx
	mov eax,[ebp+20]        ; colour of the line
	mov ecx,[ebp+16]        ; length of the line
	rep stosw
	popad
	pop ebp
	ret

;
;  copy screen, alias _fcpy
;

proc fblit
	push ebp
	mov ebp,esp

	push edi
	push esi
	push ecx

	mov esi,[ebp+12]        ; source screen
	mov edi,[ebp+8]         ; dest screen
	mov ecx,SCREENSIZE
	rep movsd

	pop ecx
	pop esi
	pop edi

	pop ebp
	ret

;
;       Plot a sprite with translucency, in 15bpp
;

proc I32celt15
	push ebp
	mov ebp,esp

	pushad

	mov edi,[ebp+28]	;screen
	mov esi,[ebp+24]	;sprite
	mov eax,[ebp+12]   ;Y
	mov edx,[ebp+8]    ;X

	lea eax,[eax+eax*4]
	shl eax,8
	shl edx,1
	add edi,eax
	add edi,edx

	mov ebx,1280
	sub ebx,[ebp+16]		; W

	shr dword [ebp+16],1     ; W
	mov ecx,[ebp+16]
	mov edx,[ebp+20]     ; H
        xor eax,eax

ct15loop:
	lodsw
	cmp eax,0
	jz ct15dont

        pushad                  ; I use ALL the general registers here..
        mov ebx,eax             ; copy bx = ax (source)
        mov cx,word [edi]       ; CX = dest

        BALANCE_5050 BAL_15BPP

        mov esi,ebx             ; Use esi,ebp as general regs
        mov ebp,ecx

        and ebx,00000000000000000111110000011111b       ; R+B
        and ecx,00000000000000000111110000011111b       ; R+B
        and esi,00000000000000000000001111100000b       ; G
        and ebp,00000000000000000000001111100000b       ; G

        ; BL = BLue BH= red  ESI = Green

        add ebx,ecx             ; Add Red and Blue groups
        add esi,ebp             ; Add Green groups

        ; saturate Blue component

        cmp bl,31               ; CL is 31
        jbe ct15_b_noclip         ; if > 31, clip to 31
        mov bl,31               ; clip it
ct15_b_noclip:

        ; saturate Red component

        cmp bh,01111100b        ; CL is 31
        jbe ct15_r_noclip         ; if > 31, clip to 31
        mov bh,01111100b        ; clip it
ct15_r_noclip:

        ; saturate Green component

        cmp esi,00000000000000000000001111100000b
        jbe ct15_g_noclip         ; if > 31, clip to 31
        mov esi,00000000000000000000001111100000b
ct15_g_noclip:

        add ebx,esi
        mov word [edi],bx       ; Write output

        popad

ct15dont:
	inc edi
	inc edi
	loop ct15loop
	mov ecx,[ebp+16]
	add edi,ebx	;Skip to next line
	dec edx
	jnz ct15loop
ct15unloop:

	popad
	pop ebp
	ret

;
;       Plot a sprite with translucency, in 16bpp
;

proc I32celt16
	push ebp
	mov ebp,esp

	pushad

	mov edi,[ebp+28]	;screen
	mov esi,[ebp+24]	;sprite
	mov eax,[ebp+12]        ;Y
	mov edx,[ebp+8]         ;X

	lea eax,[eax+eax*4]
	shl eax,8
	shl edx,1
	add edi,eax
	add edi,edx

	mov ebx,1280
	sub ebx,[ebp+16]		; W

	shr dword [ebp+16],1     ; W
	mov ecx,[ebp+16]
	mov edx,[ebp+20]     ; H
        xor eax,eax

ct16loop:
	lodsw
	cmp eax,0
	jz ct16dont

        pushad                  ; I use ALL the general registers here..
        mov ebx,eax             ; copy bx = ax (source)
        mov cx,word [edi]       ; CX = dest

        BALANCE_5050 BAL_16BPP

        mov esi,ebx             ; Use esi,ebp as general regs
        mov ebp,ecx

        and ebx,00000000000000001111100000011111b       ; R+B
        and ecx,00000000000000001111100000011111b       ; R+B
        and esi,00000000000000000000011111100000b       ; G
        and ebp,00000000000000000000011111100000b       ; G

        ; BL = BLue BH= red  ESI = Green

        shr bh,3
        shr ch,3

        add ebx,ecx             ; Add Red and Blue groups
        add esi,ebp             ; Add Green groups

        ; saturate Blue component

        cmp bl,31
        jbe ct16_b_noclip       ; if > 31, clip to 31
        mov bl,31               ; clip it
ct16_b_noclip:

        ; saturate Red component

        cmp bh,31
        jbe ct16_r_noclip         ; if > 31, clip to 31
        mov bh,31                 ; clip it
ct16_r_noclip:

        ; saturate Green component

        cmp esi,0000011111100000b
        jbe ct16_g_noclip         ; if > 31, clip to 31
        mov esi,0000011111100000b ; clip it
ct16_g_noclip:

        shl bh,3                ; Move Red back into position.
                                ; Don't bother about CH though
        add ebx,esi
        mov word [edi],bx       ; Write output

        popad

ct16dont:
	inc edi
	inc edi
	loop ct16loop
	mov ecx,[ebp+16]
	add edi,ebx             ; Skip to next line
	dec edx
	jnz ct16loop
ct16unloop:

	popad
	pop ebp
	ret

; Fast memset

proc fset
	push ebp
	mov ebp,esp
	pushad

        mov edi,[ebp+8]    ;b
	mov eax,[ebp+12]   ;c
	mov ecx,[ebp+16]   ;n
        rep stosd

        popad
        pop ebp
        ret

proc fcpy
	push ebp
	mov ebp,esp
	pushad
	
	mov edi,[ebp+8]
	mov esi,[ebp+12]
	mov ecx,[ebp+16]
	rep movsd

	popad
	pop ebp
	ret


;;
;;      Darkening code
;;

;       Darken a sprite.  This is independent of bpp

proc I32darkspr
	push ebp
	mov ebp,esp

	pushad

	mov edi,[ebp+24]	; output array, single-byte darkness levels
	mov esi,[ebp+28]	; input array, single-byte darkness levels
	mov eax,[ebp+12]        ; Y
	mov edx,[ebp+8]         ; X

	lea eax,[eax+eax*4]
	shl eax,7
	add edi,eax
	add edi,edx

	mov ecx,[ebp+16]    ; Width
	mov edx,[ebp+20]    ; Height

	mov ebx,640
	sub ebx,ecx             ;640 - W, width offset to next line

dsploop:
        xor eax,eax             ; KO the high word
        mov ah,byte [esi]       ; input darkness
        mov al,byte [edi]       ; output darkness
                                ; by putting [esi] in AH, AX is shl<<8
        add eax,[I32_dlut]     ; "We now are the LUT entry"
        mov al,[eax]            ; ..to paraphrase Tony Blair
        mov [edi],al

	inc edi                 ; inc byte output
	inc esi                 ; inc byte input
	loop dsploop
	mov ecx,[ebp+16]    ; Width
	add edi,ebx	    ;320-width
	dec edx
	jnz dsploop
dspunloop:

	popad
	pop ebp
	ret

;       Lighten a sprite, using different Lookup table.

proc I32darkspr_inv
	push ebp
	mov ebp,esp

	pushad

	mov edi,[ebp+24]	; output array, single-byte darkness levels
	mov esi,[ebp+28]	; input array, single-byte darkness levels
	mov eax,[ebp+12]        ; Y
	mov edx,[ebp+8]         ; X

	lea eax,[eax+eax*4]
	shl eax,7
	add edi,eax
	add edi,edx

	mov ecx,[ebp+16]    ; Width
	mov edx,[ebp+20]    ; Height

	mov ebx,640
	sub ebx,ecx             ;640 - W, width offset to next line

dspiloop:
        xor eax,eax             ; KO the high word
        mov ah,byte [esi]       ; input darkness
        mov al,byte [edi]       ; output darkness
                                ; by putting [esi] in AH, AX is shl<<8
        add eax,[I32_llut]  ; "We now are the LUT entry"
        mov al,[eax]            ; ..to paraphrase Tony Blair

commit:
        mov [edi],al

	inc edi                 ; inc byte output
	inc esi                 ; inc byte input
	loop dspiloop
	mov ecx,[ebp+16]    ; Width
	add edi,ebx	    ;320-width
	dec edx
	jnz dspiloop
dspiunloop:

	popad
	pop ebp
	ret

;       Darken a memory block, in 15bpp

proc I32darkmem15
	push ebp
	mov ebp,esp

	pushad

	mov edi,[ebp+8]	        ;Dest
	mov esi,[ebp+12]	;Source
	mov ecx,[ebp+16]        ;Len

        xor edx,edx          ; Clear high bits

dsmloop15:
        mov al,[esi]       ; get the source pixel
        mov dx,[edi]       ; get the dest pixel

        ; Consult lookuptable to get correct lighting value

        ; ax = I32_clut[(ax*32768)+dx]
        and eax,0x0000001f              ; ax = (ax)
        shl eax,15                      ; ax = (ax * 32768)
        add eax,edx                     ; ax = (ax * 32768)+dx
        shl eax,1                       ; align to 16 bit array
        add eax,[I32_clut]             ; I32_clut[(ax * 32768)+dx]
        mov ax,[eax]                    ; ax = I32_clut[(ax * 32768)+dx]

        mov [edi],ax                    ; Write output

dsmdont15:
	inc edi                         ; inc word output
	inc edi
	inc esi                         ; inc byte source
	loop dsmloop15
dsmunloop15:

	popad
	pop ebp
	ret


;       Darken a memory block, in 16bpp

proc I32darkmem16
	push ebp
	mov ebp,esp

	pushad

	mov edi,[ebp+8]	        ;Dest
	mov esi,[ebp+12]	;Source
	mov ecx,[ebp+16]        ;Len

        xor edx,edx          ; Clear high bits

dsmloop16:
        mov al,[esi]       ; get the source pixel
        mov dx,[edi]       ; get the dest pixel

        ; Consult lookuptable to get correct lighting value

        ; ax = I32_clut[(ax*65536)+dx]
        and eax,0x0000003f              ; ax = (ax)
        shl eax,16                      ; ax = (ax * 65536)
        add eax,edx                     ; ax = (ax * 65536)+dx
        shl eax,1                       ; align to 16 bit array
        add eax,[I32_clut]          ; I32_clut[(ax * 65536)+dx]
        mov ax,[eax]                    ; ax = I32_clut[(ax * 65536)+dx]

        mov [edi],ax                    ; Write output

dsmdont16:
	inc edi                         ; inc word output
	inc edi
	inc esi                         ; inc byte source
	loop dsmloop16
dsmunloop16:

	popad
	pop ebp
	ret

I32direc:
        push ebp
        mov ebp,esp
        pushad

        mov edi,[ebp+8]         ; Destination area
        mov esi,[ebp+12]        ; Source area
        mov eax,768             ; bytes to next line (1280-(2*256))
        mov edx,256             ; lines

direc_do:
        mov ecx,512             ; 256 pixels, 512 bytes
        rep stosd               ; move it
        add esi,eax             ; skip to next line
        add edi,eax
        dec edx                 ; one more line?
        jnz  direc_do           ; yes

        popad
        pop ebp
        ret

;
;	get 8bpp (for lightmap shading)
;

proc I32get2
	push ebp
	mov ebp,esp
	pushad

	mov esi,[ebp+28]	;screen
	mov edi,[ebp+24]	;sprite
	mov eax,[ebp+12]        ;Y
	mov edx,[ebp+8]         ;X
	lea eax,[eax+eax*4]
	shl eax,7
;	shl edx,1
	add esi,eax
	add esi,edx

        mov ebx,[ebp+16]     ; EBX = Width

	mov eax,640
	sub eax,ebx          ; EAX is now the scanline skip value (1280-W)
        shr ebx,2            ; halve width (to do 2 pixels per row)

	mov edx,[ebp+20]     ; H

g2loop:
	mov ecx,ebx
	rep movsd	     ; do a line
	add esi,eax	     ; skip to the start of the next line
	dec edx              ; next line
	jnz g2loop

	popad
	pop ebp
	ret

;
;       Text output
;

proc I32font
	push ebp
	mov ebp,esp

	pushad

	mov edi,[ebp+24]	;screen
	mov ebx,[ebp+20]	;colour
	mov esi,[ebp+16]	;character
	mov eax,[ebp+12]        ;Y
	mov edx,[ebp+8]         ;X

	lea eax,[eax+eax*4]
	shl eax,8
	shl edx,1
	add edi,eax
	add edi,edx

        mov edx,8       ; Vertical
floop1:
        mov ecx,8       ; Horizontal
        lodsb           ; Get the byte from the font

floop2:
	test al,128     ; it it present?
	jz fdont
	mov [edi],bx    ; Write the colour
fdont:
        add edi,2
        shl al,1        ; Get next bit
	loop floop2     ; Round again

	add edi,1264	; (640 - 8) * 2, distance to start of next line
	dec edx
	jnz floop1

	popad
	pop ebp
	ret

;
;       Lightning, 15bpp
;

proc I32lightning15
	push ebp
	mov ebp,esp

	pushad

	mov esi,[ebp+8]         ;screen
	mov edi,[ebp+8]         ;screen

	mov ecx,65535
l15loop:
        xor eax,eax
        lodsw
        mov ebx,eax
        and eax,00000000000000000111110000011111b       ; R+B
        and ebx,00000000000000000000001111100000b       ; G
        shl ah,1  ; Double red
        shl al,2  ; Quadruple blue
        shl ebx,1 ; Double green

        ; saturate Blue component

        cmp al,31               ; CL is 31
        jbe l15_b_noclip         ; if > 31, clip to 31
        mov al,31               ; clip it
l15_b_noclip:

        ; saturate Red component

        cmp ah,01111100b        ; CL is 31
        jbe l15_r_noclip         ; if > 31, clip to 31
        mov ah,01111100b        ; clip it
l15_r_noclip:

        ; saturate Green component

        cmp ebx,00000000000000000000001111100000b
        jbe l15_g_noclip         ; if > 31, clip to 31
        mov ebx,00000000000000000000001111100000b
l15_g_noclip:

        add eax,ebx
        stosw

        loop l15loop

	popad
	pop ebp
	ret

;
;       Lightning, 16bpp
;

proc I32lightning16
	push ebp
	mov ebp,esp

	pushad

	mov esi,[ebp+8]         ;screen
	mov edi,[ebp+8]         ;screen

	mov ecx,65535
l16loop:
        xor eax,eax
        lodsw
        mov ebx,eax
        and eax,00000000000000001111100000011111b       ; R+B
        and ebx,00000000000000000000011111100000b       ; G
 ;       shl ah,1  ; Double red
        shl al,2  ; Quadruple blue
        shl ebx,1 ; Double green

;        shr ah,3
        shr ah,2

        ; saturate Blue component

        cmp al,31               ; CL is 31
        jbe l16_b_noclip         ; if > 31, clip to 31
        mov al,31               ; clip it
l16_b_noclip:

        ; saturate Red component

        cmp ah,31; CL is 31
        jbe l16_r_noclip         ; if > 31, clip to 31
        mov ah,31; clip it
l16_r_noclip:

        shl ah,3                ; Move Red back into position.

        ; saturate Green component

        cmp ebx,00000000000000000000011111100000b
        jbe l16_g_noclip         ; if > 31, clip to 31
        mov ebx,00000000000000000000011111100000b
l16_g_noclip:

        add eax,ebx
        stosw

        loop l16loop

	popad
	pop ebp
	ret

