; Copyright (C) 1989, 1992 Aladdin Enterprises. All rights reserved. ; Distributed by Free Software Foundation, Inc. ; ; This file is part of Ghostscript. ; ; Ghostscript is distributed in the hope that it will be useful, but ; WITHOUT ANY WARRANTY. No author or distributor accepts responsibility ; to anyone for the consequences of using it or for whether it serves any ; particular purpose or works at all, unless he says so in writing. Refer ; to the Ghostscript General Public License for full details. ; ; Everyone is granted permission to copy, modify and redistribute ; Ghostscript, but only under the conditions described in the Ghostscript ; General Public License. A copy of this license is supposed to have been ; given to you along with Ghostscript so you can know your rights and ; responsibilities. It should be in a file named COPYING. Among other ; things, the copyright notice and this notice must be preserved on all ; copies. ; iutilasm.asm ; Assembly code for Ghostscript interpreter on MS-DOS systems ifdef FOR80386 .286c endif utilasm_TEXT SEGMENT WORD PUBLIC 'CODE' ASSUME CS:utilasm_TEXT ifdef FOR80386 ; Macro for 32-bit operand prefix. OP32 macro db 66h endm endif ; FOR80386 ifdef FOR80386 ; Replace the multiply and divide routines in the Turbo C library ; if we are running on an 80386. ; Macro to swap the halves of a 32-bit register. ; Unfortunately, masm won't allow a shift instruction with a count of 16, ; so we have to code it in hex. swap macro regno OP32 db 0c1h,0c0h+regno,16 ; rol regno,16 endm regax equ 0 regcx equ 1 regdx equ 2 regbx equ 3 ; Multiply (dx,ax) by (cx,bx) to (dx,ax). PUBLIC LXMUL@ PUBLIC F_LXMUL@ F_LXMUL@ proc far LXMUL@ proc far swap regdx mov dx,ax swap regcx mov cx,bx OP32 db 0fh,0afh,0d1h ; imul dx,cx OP32 mov ax,dx swap regdx ret LXMUL@ endp F_LXMUL@ endp ; Divide two stack operands, leave the result in (dx,ax). setup32 macro mov bx,sp OP32 mov ax,ss:[bx+4] ; dividend endm PUBLIC LDIV@, LUDIV@, LMOD@, LUMOD@ PUBLIC F_LDIV@, F_LUDIV@, F_LMOD@, F_LUMOD@ F_LDIV@ proc far LDIV@ proc far setup32 OP32 cwd OP32 idiv word ptr ss:[bx+8] ; divisor OP32 mov dx,ax swap regdx ret 8 LDIV@ endp F_LDIV@ endp F_LUDIV@ proc far LUDIV@ proc far setup32 OP32 xor dx,dx OP32 div word ptr ss:[bx+8] ; divisor OP32 mov dx,ax swap regdx ret 8 LUDIV@ endp F_LUDIV@ endp F_LMOD@ proc far LMOD@ proc far setup32 OP32 cwd OP32 idiv word ptr ss:[bx+8] ; divisor OP32 mov ax,dx swap regdx ret 8 LMOD@ endp F_LMOD@ endp F_LUMOD@ proc far LUMOD@ proc far setup32 OP32 xor dx,dx OP32 div word ptr ss:[bx+8] ; divisor OP32 mov ax,dx swap regdx ret 8 LUMOD@ endp F_LUMOD@ endp else ; !FOR80386 ; Replace the unsigned divide routines in the Turbo C library, ; which do the division one bit at a time (!). (We should replace ; the signed divide routines as well, but it's too much work.) PUBLIC LUDIV@, LUMOD@ PUBLIC F_LUDIV@, F_LUMOD@ ; Divide two unsigned longs on the stack. ; Leave either the quotient or the remainder in (dx,ax). ; We use an offset in bx distinguish div from mod. F_LUMOD@ proc far LUMOD@ proc far mov bx,2 jmp udiv LUMOD@ endp F_LUMOD@ endp F_LUDIV@ proc far LUDIV@ proc far xor bx,bx udiv: push bp push bx ; 0 = div, 2 = mod mov bp,sp nlo equ 8 nhi equ 10 dlo equ 12 dhi equ 14 mov ax,[bp+nlo] mov dx,[bp+nhi] mov bx,[bp+dlo] mov cx,[bp+dhi] ; Now we are dividing dx:ax by cx:bx. ; Check to see whether this is really a 32/16 division. or cx,cx jnz div2 ; 32/16, check for 16- vs. 32-bit quotient cmp dx,bx jae div1 ; 32/16 with 16-bit quotient, just do it. div bx ; ax = quo, dx = rem pop bx pop bp jmp cs:xx1[bx] even xx1 dw divx1 dw modx1 divx1: xor dx,dx ret 8 modx1: mov ax,dx xor dx,dx ret 8 ; 32/16 with 32-bit quotient, do in 2 parts. div1: mov cx,ax ; save lo num mov ax,dx xor dx,dx div bx ; ax = hi quo xchg cx,ax ; save hi quo, get lo num div bx ; ax = lo quo, dx = rem pop bx pop bp jmp cs:xx1a[bx] even xx1a dw offset divx1a dw offset modx1 divx1a: mov dx,cx ; hi quo ret 8 ; This is really a 32/32 bit division. ; (Note that the quotient cannot exceed 16 bits.) ; The following algorithm is taken from pp. 235-240 of Knuth, vol. 2 ; (first edition). ; Start by normalizing the numerator and denominator. div2: or ch,ch jz div21 ; ch == 0, but cl != 0 ; Do 8 steps all at once. mov bl,bh mov bh,cl mov cl,ch xor ch,ch mov al,ah mov ah,dl mov dl,dh xor dh,dh rol bx,1 ; cancel following rcr div2a: rcr bx,1 ; finish previous shift div21: shr dx,1 rcr ax,1 shr cx,1 jnz div2a rcr bx,1 ; Now we can do a 32/16 divide. div2x: div bx ; ax = quo, dx = rem ; Multiply by the denominator, and correct the result. mov cx,ax ; save quotient mul word ptr [bp+dhi] mov bx,ax ; save lo part of hi product mov ax,cx mul word ptr [bp+dlo] add dx,bx ; Now cx = trial quotient, (dx,ax) = cx * denominator. not dx neg ax cmc adc dx,0 ; double-precision neg jc divz ; zero quotient ; requires special handling add ax,[bp+nlo] adc dx,[bp+nhi] jc divx ; Quotient is too large, adjust it. div3: dec cx add ax,[bp+dlo] adc dx,[bp+dhi] jnc div3 ; All done. (dx,ax) = remainder, cx = lo quotient. divx: pop bx pop bp jmp cs:xx3[bx] even xx3 dw offset divx3 dw offset modx3 divx3: mov ax,cx xor dx,dx modx3: ret 8 ; Handle zero quotient specially. divz: pop bx jmp cs:xxz[bx] even xxz dw offset divxz dw offset modxz divxz: pop bp ret 8 modxz: mov ax,[bp+nlo-2] ; adjust for popf mov dx,[bp+nhi-2] pop bp ret 8 LUDIV@ endp F_LUDIV@ endp endif ; FOR80386 ; Swap even and odd bytes from src to dest. ; See gsmisc.c for the C definition. PUBLIC _memswab2 _memswab2 proc far mov bx,sp push ds push si mov dx,ss:[bx+12] ; count shr dx,1 shr dx,1 les si,ss:[bx+4] ; src lds bx,ss:[bx+8] ; dest jnc sw3 add si,2 sub bx,2 jmp sw2 sw1: mov ax,es:[si] add si,4 xchg ah,al mov [bx],ax sw2: mov ax,es:[si-2] add bx,4 xchg ah,al mov [bx-2],ax sw3: dec dx jge sw1 pop si pop ds ret _memswab2 ENDP ; Swap bytes in a 32-bit word from src to dest. ; See gsmisc.c for the C definition. PUBLIC _memswab4 _memswab4 proc far mov bx,sp push ds push si mov dx,ss:[bx+12] ; count shr dx,1 shr dx,1 les si,ss:[bx+4] ; src lds bx,ss:[bx+8] ; dest jmp sl3 sl1: mov ax,es:[si] add si,4 xchg ah,al mov cx,es:[si-2] mov [bx+2],ax xchg ch,cl mov [bx],cx add bx,4 sl3: dec dx jge sl1 pop si pop ds ret _memswab4 ENDP ; Transpose an 8x8 bit matrix. See gsmisc.c for the algorithm in C. PUBLIC _memflip8x8 _memflip8x8 proc far push ds push si push di ; After pushing, the offsets of the parameters are: ; byte *inp=10, int line_size=14, byte *outp=16, int dist=20. mov si,sp mov di,ss:[si+14] ; line_size lds si,ss:[si+10] ; inp ; We assign variables to registers as follows: ; ax = AE, bx = BF, cx (or di) = CG, dx = DH. ; Load the input data. Initially we assign ; ax = AB, bx = EF, cx (or di) = CD, dx = GH. mov ah,[si] iload macro reg add si,di mov reg,[si] endm iload al iload ch iload cl iload bh iload bl iload dh iload dl ; Transposition macro, see C code for explanation. trans macro reg1,reg2,shift,mask mov si,reg1 shr si,shift xor si,reg2 and si,mask xor reg2,si shl si,shift xor reg1,si endm ; Do 4x4 transpositions mov di,cx ; we need cl for the shift count mov cl,4 trans bx,ax,cl,0f0fh trans dx,di,cl,0f0fh ; Swap B/E, D/G xchg al,bh mov cx,di xchg cl,dh ; Do 2x2 transpositions mov di,cx ; need cl again mov cl,2 trans di,ax,cl,3333h trans dx,bx,cl,3333h mov cx,di ; done shifting >1 ; Do 1x1 transpositions trans bx,ax,1,5555h trans dx,cx,1,5555h ; Store result mov si,sp mov di,ss:[si+20] ; dist lds si,ss:[si+16] ; outp mov [si],ah istore macro reg add si,di mov [si],reg endm istore bh istore ch istore dh istore al istore bl istore cl istore dl ; All done pop di pop si pop ds ret _memflip8x8 ENDP utilasm_TEXT ENDS END