--- /dev/null
+tail_em.a
+aaru.s
+aar.s
+aar2.s
+and.s
+cii.s
+cms.s
+cmu.s
+cmu4.s
+csa.s
+csb.s
+dvi2.s
+dvi4.s
+dvu2.s
+dvu4.s
+exg.s
+gto.s
+hulp.s
+ior.s
+laru.s
+lar.s
+lar2.s
+los.s
+mli2.s
+mli4.s
+rck.s
+rmi2.s
+saru.s
+sar.s
+sar2.s
+sdf.s
+sdl.s
+set.s
+str.s
+sts.s
+unim.s
+trp.s
+inn.s
+xor.s
+nop.s
+outdec.s
+pstrng.s
--- /dev/null
+# $Header$
+install:
+ ../../install tail_em.a tail_em
+ ../../install tail.s end_em
+
+cmp:
+ -../../compare tail_em.a tail_em
+ -../../compare tail.s end_em
+
+clean :
+
+opr :
+ make pr | opr
+
+pr:
+ @arch pv tail_em.a | pr -h `pwd`/tail_em.a
+ @pr `pwd`/tail.s
--- /dev/null
+.define .aar
+! use .mli2
+
+! 2-byte descriptor elements
+! any size array elements
+! no range checking
+! parameters:
+! stack: pointer to descriptor
+! index
+! base address of array
+! stack: result (out)
+! uses .mli2 routine
+! side-effect: size of array elements in bc
+
+
+
+.aar:
+ pop hl ! return address
+ pop ix ! pointer to descr.
+ ex (sp),hl ! save ret. addr.
+ ! hl := index
+ ld c,(ix+0) ! bc := lower bound
+ ld b,(ix+1)
+ xor a
+ sbc hl,bc ! hl := index-lwb
+ ld c,(ix+4) ! bc := size
+ ld b,(ix+5)
+ ex de,hl ! de := index-lwb
+ call .mli2 ! hl := bc*de =
+ ! size*(index-lwb)
+ pop ix ! return address
+ pop de ! base
+ add hl,de ! addr. of element
+ push hl
+ jp (ix) ! return
--- /dev/null
+.define .aar2
+
+! special case aar: element size = 2 (statically known)
+! parameters:
+! on stack
+! execution time: 124 states
+
+
+
+.aar2:
+ pop ix ! save return address
+ pop hl ! pointer to descriptor
+ ld c,(hl) ! bc := lower bound
+ inc hl
+ ld b,(hl)
+ pop hl ! index
+ xor a
+ sbc hl,bc ! index - lwb
+ add hl,hl ! size*(index-lwb)
+ pop de ! base address of array
+ add hl,de
+ push hl
+ jp (ix)
--- /dev/null
+.define .aaru
+
+! AAR NOT DEFINED
+
+.aaru:
+ pop ix
+ pop hl
+ xor a
+ xor h
+ jp nz,1f
+ ld a,2
+ xor l
+ jp z,2f
+1:
+ ld hl,EARRAY
+ call .trp.z
+2:
+ push ix
+ jp .aar
--- /dev/null
+.define .and
+
+! auxiliary size 'and'
+! parameters:
+! de: size
+! stack: operands
+! stack: result (out)
+
+
+
+.and:
+ pop ix ! save return address
+ ld h,d
+ ld l,e
+ add hl,sp
+ ex de,hl
+ add hl,de ! now hl is the base of second
+ ld b,d ! operand. bc and de are base
+ ld c,e ! of the first operand
+1:
+ dec hl
+ dec de
+ ld a,(de)
+ and (hl)
+ ld (hl),a
+ xor a
+ sbc hl,bc
+ jr z,2f
+ add hl,bc
+ jr 1b
+2:
+ ld h,b
+ ld l,c
+ ld sp,hl
+ jp (ix)
--- /dev/null
+.define .cii
+
+! cii: convert integer to integer
+! parameters:
+! stack: destination size
+! source size
+! source
+! stack: result (out)
+! This code is also used by cuu.
+! The contents of the a-register determines
+! if we're doing a cii (a=0) or a cuu (a=1),
+! so be very careful with this register!
+
+
+
+.cii:
+ pop ix ! return address
+ pop hl ! destination size
+ pop de ! source size
+ ld b,h ! bc := destination size
+ ld c,l
+ xor a ! watch it, this is dirty!
+ ! Besides clearing the carry
+ ! this instruction sets a-reg.
+ ! to 0, to indicate this is
+ ! a cii and not a cuu.
+ sbc hl,de ! hl := destination size
+ ! - source size
+ jr z,1f ! equal, return
+ jp p,2f ! larger, expand
+ ! smaller, shrink
+ ! The most significant part of the source
+ ! is removed. As the least sign. part is
+ ! on top of the stack, we have to move an
+ ! entire data block.
+9:
+ add hl,sp ! note that hl < 0
+ ! (also come here via cuu)
+ add hl,de
+ dec hl ! now hl points to most
+ ! significant byte of what
+ ! will be left over of source
+ ex de,hl
+ add hl,sp
+ ex de,hl
+ dec de ! now de points to highest
+ ! byte of source
+ lddr ! move 'destination size'
+ ! bytes upwards (i.e. away
+ ! from top of stack)
+ inc de
+ ex de,hl
+ ld sp,hl ! adjust stackpointer
+1:
+ jp (ix) ! return
+
+2:
+ ! larger, expand
+ ! A number of bytes (containing the signbits
+ ! of the source) is inserted before the most
+ ! significant byte of the source.
+ ! As this byte is somewhere in the middle of
+ ! the stack, the entire source must first be
+ ! moved downwards (in the direction of the
+ ! top)
+8:
+ ld b,d ! bc := source size
+ ! (also come here via cuu)
+ ld c,e
+ ex de,hl ! de := difference (> 0)
+ ld hl,0
+ add hl,sp ! hl := sp
+ push hl
+ or a
+ sbc hl,de
+ ex de,hl ! de := sp - difference
+ pop hl ! hl := sp
+ ex de,hl ! adjust sp
+ ld sp,hl
+ ex de,hl
+ ldir ! move source upwards,
+ ! creating a 'hole'
+ ! inside the stack
+ ! now we will fill the hole with bytes
+ ! containing either 0 or -1, depending
+ ! on the signbit of the source.
+ or a
+ sbc hl,de
+ ex de,hl ! de := difference
+ dec hl ! now hl points to
+ ! most significant byte
+ ! of the source
+ or a ! see if we're doing
+ ! a 'cii' or a 'cuu'
+ jr nz,3f ! cuu, expand with zeroes
+ bit 7,(hl) ! test signbit
+ jr z,3f
+ dec b ! b := -1 (was 0 after ldir)
+3:
+ inc hl
+ ld (hl),b ! either 0 or -1
+ dec de
+ ld a,d
+ or e
+ jr nz,3b
+ jp (ix) ! return
+
+
+
+.define .cuu
+
+! cuu: convert unsigned to unsigned
+! parameters:
+! stack: destination size
+! source size
+! source
+! stack: result (out)
+! The only difference between a cuu and a cii is:
+! if the destination is larger than the source,
+! the former extends with zeroes and the latter
+! extends with sign bits
+! cuu uses the code of cii. In this case it puts
+! a '1' in the accumulator to indicate this is
+! a cuu.
+
+
+
+.cuu:
+ pop ix
+ pop hl
+ pop de
+ ld b,h
+ ld c,l
+ xor a ! clear carry
+ sbc hl,de
+ jr z,1b ! equal, return
+ jp m,9b ! smaller, shrink
+ inc a ! a := 1
+ jr 8b ! larger, expand
--- /dev/null
+.define .cms
+
+! any size sets
+! parameters:
+! hl: size
+! stack: second operand
+! first operand
+! stack: result (out)
+
+
+
+.cms:
+ pop ix
+ ld b,h
+ ld c,l
+ add hl,sp
+0:
+ dec sp
+ pop af
+ cpi
+ jr nz,1f
+ ld a,b
+ or c
+ jr nz,0b
+ ld de,0
+ jr 2f
+1:
+ add hl,bc
+ ld de,1
+2:
+ ld sp,hl
+ push de
+ jp (ix)
--- /dev/null
+.define .cmu
+
+! parameters:
+! hl : size (#bytes)
+! stack: second operand
+! first operand
+! stack: result (out)
+
+
+
+.cmu:
+ ! The two operands are compared byte by byte,
+ ! starting at the highest byte, until
+ ! they differ.
+ pop ix ! return address
+ pop hl ! #bytes
+ ld b,h ! bc := hl
+ ld c,l
+ add hl,sp
+ dec hl ! pointer to highest byte
+ ! of second operand
+ ld d,h ! de := hl
+ ld e,l
+ add hl,bc ! pointer to highest byte
+ ! of first operand
+ ld sp,hl ! points to where the
+ ! result will be stored
+ ex de,hl
+ ! now, de points to highest byte of 1st operand
+ ! sp ,, ,, ,,
+ ! hl ,, ,, 2nd ,,
+ ! bc contains #bytes
+
+0:
+ ! loop, compare the two operands
+ ! byte by byte.
+ ld a,(de)
+ xor (hl) ! Avoid overflow during
+ ! subtraction. If the
+ ! signbits differ, then
+ ! the operands differ.
+ jp m,2f ! signbits differ
+ ld a,(de) ! signbits are equal,
+ ! so we can savely
+ ! compare the bytes.
+ sub (hl)
+ jr nz,1f ! operands are different
+ dec de ! the two bytes are the
+ ! same, try next bytes,
+ ! if any.
+ dec hl ! bump pointers
+ dec bc
+ ld a,b ! bc = 0 ?
+ or c
+ jr nz,0b ! no, try next bytes
+ ! yes, then the two operands are equal.
+ ! Note that a=0 now.
+1:
+ ld h,a ! hl := result
+ ld l,a
+ jr 3f
+2:
+ ! the signbits differ
+ ld h,(hl) ! hl := positive if
+ ! signbit of current
+ ! byte of 2nd operand
+ ! is "0", else negative
+ ld l,1 ! just in case (hl)=0
+3:
+ ex (sp),hl ! sp was set above
+ jp (ix) ! return
--- /dev/null
+.define .cmu4
+
+! 4 byte cmu and cmi routine
+! parameters:
+! a: 0 for cmu, 1 for cmi
+! stack: operands
+! de: result (out)
+
+
+
+.cmu4:
+ pop ix
+ ld de,4
+ ld b,d
+ ld c,e
+ ld hl,0
+ add hl,sp
+ add hl,bc
+ dec hl
+ ld d,h
+ ld e,l
+ add hl,bc
+ ld (savesp),hl ! save new sp-1
+ or a
+ jr z,1f
+ ld a,(de)
+ cp (hl)
+ dec hl
+ dec de
+ dec bc
+ jr z,1f
+ jp p,4f
+ jr 6f
+1:
+ ld a,(de)
+ cp (hl)
+ dec de
+ dec hl
+ dec bc
+ jr nz,2f
+ ld a,b
+ or c
+ jr nz,1b
+ ld d,a
+ ld e,a
+ jr 3f
+2:
+ jr nc,4f
+6:
+ ld de,1
+ jr 3f
+4:
+ ld de,-1
+3:
+ ld hl,(savesp)
+ inc hl
+ ld sp,hl
+ jp (ix)
+.data
+savesp: .word 0
--- /dev/null
+.define .csa
+
+! this is not a subroutine, but just a
+! piece of code that computes the jump-
+! address and jumps to it.
+! traps if resulting address is zero
+
+
+
+.csa:
+ pop ix
+ pop hl
+ push bc
+ ld c,(ix)
+ ld b,(ix+1)
+ ld e,(ix+2)
+ ld d,(ix+3)
+ xor a
+ sbc hl,de
+ jp m,1f
+ ex de,hl
+ ld l,(ix+4)
+ ld h,(ix+5)
+ xor a
+ sbc hl,de
+ jp m,1f
+ ex de,hl
+ add hl,hl
+ ld de,6
+ add hl,de
+ ex de,hl
+ add ix,de
+ ld l,(ix)
+ ld h,(ix+1)
+ ld a,h
+ or l
+ jr nz,2f
+1: ld a,b
+ or c
+ jr z,.trp.z
+ ld l,c
+ ld h,b
+2: pop bc
+ jp (hl)
--- /dev/null
+.define .csb
+
+! this is not a subroutine, but just a
+! piece of code that computes the jump-
+! address and jumps to it.
+! traps if resulting address is zero
+
+
+
+.csb:
+ pop hl ! pointer to descriptor
+ pop de ! case index
+ ld c,(hl) ! bc := default offset
+ inc hl
+ ld b,(hl)
+ inc hl
+ push bc ! save def\ 1ault on stack
+ ld c,(hl) ! bc := #entries
+ inc hl
+ ld b,(hl)
+ inc hl
+1:
+ ! loop, try to find the case index
+ ! in the descriptor
+ ld a,b
+ or c
+ jr z,noteq ! done, index not found
+ ld a,(hl) ! is de=(hl) ?
+ inc hl
+ cp e
+ jr nz,2f ! no
+ ld a,(hl)
+ inc hl
+ cp d
+ jr nz,3f ! no
+ ld a,(hl) ! yes, get jump address
+ inc hl
+ ld h,(hl)
+ ld l,a
+ pop af ! remove default
+ jr 4f
+2:
+ inc hl ! skip high byte of index
+3:
+ inc hl ! skip jump address
+ inc hl
+ dec bc
+ jr 1b
+noteq:
+ pop hl ! take default exit
+4:
+ ld a,l ! jump address is zero?
+ or h
+ jr z,.trp.z ! yes, trap
+ jp (hl)
--- /dev/null
+.define .dvi2
+
+! 16-bit signed division
+! parameters:
+! bc: divisor
+! de: dividend
+! de: result (out)
+! no check on overflow
+
+
+
+.dvi2:
+ xor a
+ ld h,a
+ ld l,a
+ sbc hl,bc
+ jp m,1f
+ ld b,h
+ ld c,l
+ cpl
+1:
+ or a
+ ld hl,0
+ sbc hl,de
+ jp m,1f
+ ex de,hl
+ cpl
+1:
+ push af
+ ld hl,0
+ ld a,16
+0:
+ add hl,hl
+ ex de,hl
+ add hl,hl
+ ex de,hl
+ jr nc,1f
+ inc hl
+ or a
+1:
+ sbc hl,bc
+ inc de
+ jp p,2f
+ add hl,bc
+ dec de
+2:
+ dec a
+ jr nz,0b
+ pop af
+ or a
+ jr z,1f
+ ld hl,0
+ sbc hl,de
+ ex de,hl
+1:
+ ret
--- /dev/null
+.define .dvi4
+
+! 4-byte divide routine for z80
+! parameters:
+! stack: divisor
+! dividend
+! stack: quotient (out)
+! bc de: remainder (out) (high part in bc)
+
+
+
+.dvi4:
+ pop hl
+ ld (retaddr),hl
+ xor a
+ ld (.flag1),a
+ ld (.flag2),a
+ ld ix,0
+ add ix,sp
+ ld b,(ix+7) ! dividend
+ bit 7,b
+ jr z,1f
+ ld c,(ix+6)
+ ld d,(ix+5)
+ ld e,(ix+4)
+ call .negbd
+ ld (ix+7),b
+ ld (ix+6),c
+ ld (ix+5),d
+ ld (ix+4),e
+ ld a,1
+ ld (.flag1),a
+1:
+ ld b,(ix+3)
+ bit 7,b
+ jr z,2f
+ call .negst
+ ld a,1
+ ld (.flag2),a
+2:
+ call .dvu4
+ ld a,(.flag1)
+ or a
+ jr z,3f
+ call .negbd
+3:
+ ld (.savebc),bc
+ ld (.savede),de
+ ld a,(.flag2)
+ ld b,a
+ ld a,(.flag1)
+ xor b
+ jr z,4f
+ call .negst
+4:
+ ld bc,(.savebc)
+ ld de,(.savede)
+ ld hl,(retaddr)
+ jp (hl)
+.negbd:
+ xor a
+ ld h,a
+ ld l,a
+ sbc hl,de
+ ex de,hl
+ ld h,a
+ ld l,a
+ sbc hl,bc
+ ld b,h
+ ld c,l
+ ret
+.negst:
+ pop ix
+ pop de
+ pop bc
+ call .negbd
+ push bc
+ push de
+ jp (ix)
+.data
+ .flag1: .byte 0
+ .flag2: .byte 0
+ retaddr:.word 0
+ .savebc: .word 0
+ .savede: .word 0
--- /dev/null
+.define .dvu2
+
+! 16-bit divide
+! parameters:
+! bc: divisor
+! de: dividend
+! de: quotient (out)
+! hl: remainder (out)
+! no overflow detection
+
+
+
+.dvu2:
+ or a
+ ld h,d
+ ld l,e
+ sbc hl,bc
+ jp m,3f
+ jp c,3f ! bc > de?
+ ld hl,0
+ ld a,16
+0:
+ add hl,hl
+ ex de,hl
+ add hl,hl
+ ex de,hl
+ jr nc,1f
+ inc hl
+ or a
+1:
+ sbc hl,bc
+ inc de
+ jp p,2f
+ add hl,bc
+ dec de
+2:
+ dec a
+ jr nz,0b
+ ret
+3:
+ ld hl,0
+ ex de,hl
+ ret
--- /dev/null
+.define .dvu4
+
+! 4-byte divide routine for z80
+! parameters:
+! stack: divisor
+! dividend
+! stack: quotient (out)
+! bc de: remainder (out) (high part in bc)
+
+
+
+! a n-byte divide may be implemented
+! using 2 (virtual) registers:
+! - a n-byte register containing
+! the divisor
+! - a 2n-byte shiftregister (VSR)
+!
+! Initially, the VSR contains the dividend
+! in its low (right) n bytes and zeroes in its
+! high n bytes. The dividend is shifted
+! left into a "window" bit by bit. After
+! each shift, the contents of the window
+! is compared with the divisor. If it is
+! higher or equal, the divisor is subtracted from
+! it and a "1" bit is inserted in the
+! VSR from the right side! else a "0" bit
+! is inserted. These bits are shifted left
+! too during subsequent iterations.
+! At the end, the rightmost part of VSR
+! contains the quotient.
+! For n=4, we need 2*4+4 = 12 bytes of
+! registers. Unfortunately we only have
+! 5 2-byte registers on the z80
+! (bc,de,hl,ix and iy). Therefore we use
+! an overlay technique for the rightmost
+! 4 bytes of the VSR. The 32 iterations
+! are split up into two groups: during
+! the first 16 iterations we use the high
+! order 16 bits of the dividend! during
+! the last 16 iterations we use the
+! low order 16 bits.
+! register allocation:
+! VSR iy hl ix
+! divisor -de bc
+.dvu4:
+ ! initialization
+ pop hl ! save return address
+ ld (.retaddr),hl
+ pop bc ! low part (2 bytes)
+ ! of divisor in bc
+ xor a ! clear carry, a := 0
+ ld h,a ! hl := 0
+ ld l,a
+ ld (.flag),a ! first pass main loop
+ pop de ! high part divisor
+ sbc hl,de ! inverse of high part
+ ex de,hl ! of divisor in de
+ pop hl ! save low part of
+ ! dividend in memory
+ ld (.low),hl ! used during second
+ ! iteration over main loop
+ pop ix ! high part of dividend
+ push iy ! save LB
+ ld h,a ! hl := 0
+ ld l,a
+ ld iy,0 ! now the VSR is initialized
+
+ ! main loop, done twice
+1:
+ ld a,16
+ ! sub-loop, done 16 times
+2:
+ add iy,iy ! shift VSR left
+ add ix,ix
+ adc hl,hl
+ jp nc,3f
+ inc iy
+3:
+ or a ! subtract divisor from
+ ! window (iy hl)
+ ld (.iysave),iy
+ sbc hl,bc
+ jr nc,4f ! decrement iy if there
+ ! was no borrow
+ dec iy
+4:
+ add iy,de ! there is no "sbc iy,ss"
+ ! on the z80, so de was
+ ! inverted during init.
+ inc ix
+ ! see if the result is non-negative,
+ ! otherwise undo the subtract.
+ ! note that this uncooperating machine
+ ! does not set its S -or Z flag after
+ ! a 16-bit add.
+ ex (sp),iy ! does anyone see a better
+ ex (sp),hl ! solution ???
+ bit 7,h
+ ex (sp),hl
+ ex (sp),iy
+ jp z,5f
+ ! undo the subtract
+ add hl,bc
+ ld iy,(.iysave)
+ dec ix
+5:
+ dec a
+ jr nz,2b
+ ld a,(.flag) ! see if this was first or
+ ! second iteration of main loop
+ or a ! 0=first, 1=second
+ jr nz,6f
+ inc a ! a := 1
+ ld (.flag),a ! flag := 1
+ ld (.result),ix ! save high part of result
+ ld ix,(.low) ! initialize second
+ ! iteration, ix := low
+ ! part of dividend
+ jr 1b
+6:
+ ! clean up
+ push iy ! transfer remainder
+ pop bc ! from iy-hl to bc-de
+ ex de,hl
+ pop iy ! restore LB
+ ld hl,(.result) ! high part of result
+ push hl
+ push ix ! low part of result
+ ld hl,(.retaddr)
+ jp (hl) ! return
+
+.data
+.flag: .byte 0
+.low: .word 0
+.iysave: .word 0
+.retaddr: .word 0
+.result: .word 0
--- /dev/null
+.define endtext,enddata,endbss
+.define _end,_etext,_edata
+
+ .text
+endtext:
+_etext:
+ .align 2
+ .data
+enddata:
+_edata:
+ .align 2
+ .bss
+_end:
+endbss:
+ .align 2
--- /dev/null
+.define .exg
+.exg:
+ pop ix
+ pop de
+ ld hl,0
+ add hl,sp
+ ld b,h
+ ld c,l
+ add hl,de
+1:
+ ld a,(bc)
+ ex af,af2
+ ld a,(hl)
+ ld (bc),a
+ ex af,af2
+ ld (hl),a
+ inc bc
+ inc hl
+ dec de
+ ld a,d
+ or e
+ jr nz,1b
+ jp (ix)
+
+
--- /dev/null
+.define .gto
+
+.gto:
+ ld e,(hl)
+ inc hl
+ ld d,(hl)
+ push de
+ pop ix ! new pc
+ inc hl
+ ld e,(hl)
+ inc hl
+ ld d,(hl) ! new sp
+ inc hl
+ ld c,(hl)
+ inc hl
+ ld b,(hl) ! new lb
+ push bc
+ pop iy
+ push de
+ pop hl
+ ld sp,hl
+ jp (ix)
--- /dev/null
+loop = 100
+dvi4:
+ xor a
+ ld (.flag1),a
+ ld (.flag2),a
+ ld ix,0
+ add ix,sp
+ ld b,(ix+7) ! dividend
+ bit 7,b
+ jr z,1f
+ ld c,(ix+6)
+ ld d,(ix+5)
+ ld e,(ix+4)
+ call .negbd
+ ld (ix+7),d
+ ld (ix+6),e
+ ld (ix+5),h
+ ld (ix+4),l
+ ld a,1
+ ld (.flag1),a
+1:
+ ld b,(ix+3)
+ bit 7,b
+ jr z,2f
+ call .negst
+ ld a,1
+ ld (.flag2),a
+2:
+ call .dvu4
+ ld a,(.flag1)
+ jr z,3f
+ call .negbd
+3:
+ ld a,(.flag2)
+ ld b,a
+ ld a,(.flag1)
+ xor b
+ jr z,4f
+ call .negst
+4:
+ jr loop
+.negbd:
+ xor a
+ ld h,a
+ ld l,a
+ sbc hl,de
+ ex de,hl
+ ld h,a
+ ld l,a
+ sbc hl,bc
+ ret
+.negst:
+ pop iy
+ pop de
+ pop bc
+ call .negbd
+ push hl
+ push de
+ jp (iy)
+.data
+ .flag1: .byte 0
+ .flag2: .byte 0
--- /dev/null
+.define .inn
+! use .unimpld
+
+! any size sets
+! parameters:
+! hl: size
+! stack: bit number
+! stack: result (out)
+
+
+
+.inn:
+ pop ix
+ pop de
+ add hl,sp
+ ld b,h
+ ld c,l
+ ex de,hl
+ ld a,l
+ sra h
+ jp m,0f
+ rr l
+ sra h
+ rr l
+ sra h
+ rr l
+ add hl,sp
+ push hl
+ or a ! clear carry
+ sbc hl,de
+ pop hl
+ jp m,1f
+0: xor a
+ jr 4f
+1: ld e,(hl)
+ and 7
+ jr 2f
+3: rrc e
+ dec a
+2: jr nz,3b
+ ld a,e
+ and 1
+4:
+ ld e,a
+ ld d,0
+ ld h,b
+ ld l,c
+ ld sp,hl
+ push de
+ jp (ix)
--- /dev/null
+.define .ior
+
+! auxiliary size 'ior'
+! parameters:
+! de: size
+! stack: operands
+! stack: result (out)
+
+
+
+.ior:
+ pop ix
+ ld h,d
+ ld l,e
+ add hl,sp
+ ld b,h
+ ld c,l
+ ex de,hl
+ add hl,de
+1: dec hl
+ dec de
+ ld a,(de)
+ or (hl)
+ ld (hl),a
+ xor a
+ sbc hl,bc
+ jr z,2f
+ add hl,bc
+ jr 1b
+2: ld h,b
+ ld l,c
+ ld sp,hl
+ jp (ix)
--- /dev/null
+.define .lar
+! use .mli2
+
+! 2-byte descriptor elements
+! any size array elements
+! parameters:
+! on stack
+! uses .mli2
+! no range checking
+! adapted from .aar and .los
+
+
+
+.lar:
+ pop hl
+ pop ix
+ ex (sp),hl
+ ld c,(ix+0)
+ ld b,(ix+1)
+ xor a
+ sbc hl,bc
+ ld c,(ix+4)
+ ld b,(ix+5)
+ ex de,hl
+ call .mli2
+ pop ix
+ pop de
+ add hl,de ! address of array element
+ add hl,bc
+ dec hl ! pointer to highest byte of element
+ srl b
+ rr c
+ jr nc,1f
+ ld a,c ! skip check to save runtime
+ or b
+ jr nz,.trp.z ! size was odd but <> 1
+ ld c,(hl)
+ push bc
+ jp (ix)
+1: ld d,(hl)
+ dec hl
+ ld e,(hl)
+ dec hl
+ push de
+ dec bc
+ ld a,b
+ or c
+ jr nz,1b
+ jp (ix)
--- /dev/null
+.define .lar2
+
+! special case lar: element size = 2 (statically known)
+! parameters:
+! on stack
+! adapted from .aar2
+! execution time: 144 states
+
+
+
+.lar2:
+ pop ix
+ pop hl
+ ld c,(hl)
+ inc hl
+ ld b,(hl)
+ pop hl
+ xor a
+ sbc hl,bc
+ add hl,hl ! size*(index-lwb)
+ pop de
+ add hl,de ! + base
+ ld e,(hl)
+ inc hl
+ ld d,(hl)
+ push de
+ jp (ix)
--- /dev/null
+.define .laru
+
+! LAR NOT DEFINED
+
+.laru:
+ pop ix
+ pop hl
+ xor a
+ xor h
+ jp nz,1f
+ ld a,2
+ xor l
+ jp z,2f
+1:
+ ld hl,EARRAY
+ call .trp.z
+2:
+ push ix
+ jp .lar
--- /dev/null
+.define .los
+
+
+
+.los:
+ pop ix ! save return address
+ pop de ! number of bytes to transfer
+ pop hl ! address of lowest byte
+ add hl,de
+ dec hl ! address of highest byte
+ srl d ! divide de by 2
+ rr e
+ jr nc,1f ! see if de was odd
+ ld a,e ! yes, then it must be 1
+ or d
+ jr nz,.trp.z ! no, error
+ ld e,(hl) ! pack 1 byte into integer
+ push de
+ jp (ix) ! return
+1:
+ ld b,(hl) ! get 2 bytes
+ dec hl
+ ld c,(hl)
+ dec hl
+ push bc ! put them on stack, most
+ ! significant byte first
+ dec de
+ ld a,d
+ or e
+ jr nz,1b ! done ?
+ jp (ix) ! yes, return
--- /dev/null
+.define .mli2
+
+! 16 bit multiply
+! parameters:
+! bc: multiplicand
+! de: multiplier
+! hl: result (out)
+! multiplier (bc) is left unchanged
+! no detection of overflow
+
+
+
+.mli2:
+ ld hl,0
+ ld a,16
+0:
+ bit 7,d
+ jr z,1f
+ add hl,bc
+1:
+ dec a
+ jr z,2f
+ ex de,hl
+ add hl,hl
+ ex de,hl
+ add hl,hl
+ jr 0b
+2:
+ ret
--- /dev/null
+.define .mli4
+
+! 32-bit multiply routine for z80
+! parameters:
+! on stack
+
+
+
+! register utilization:
+! ix: least significant 2 bytes of result
+! hl: most significant 2 bytes of result
+! bc: least significant 2 bytes of multiplicand
+! de: most significant 2 bytes of multiplicand
+! iy: 2 bytes of multiplier (first most significant,
+! later least significant)
+! a: bit count
+.mli4:
+ !initialization
+ pop hl ! return address
+ pop de
+ ld (.mplier+2),de! least significant bytes of
+ ! multiplier
+ pop de
+ ld (.mplier),de ! most sign. bytes
+ pop de ! least significant bytes of
+ ! multiplicand
+ pop bc ! most sign. bytes
+ push hl ! return address
+ push iy ! LB
+ ld ix,0
+ xor a
+ ld h,a ! clear result
+ ld l,a
+ ld (.flag),a ! indicate that this is
+ ! first pass of main loop
+ ld iy,(.mplier)
+ ! main loop, done twice, once for each part (2 bytes)
+ ! of multiplier
+1:
+ ld a,16
+ ! sub-loop, done 16 times
+2:
+ add iy,iy ! shift left multiplier
+ jr nc,3f ! skip if most sign. bit is 0
+ add ix,de ! 32-bit add
+ adc hl,bc
+3:
+ dec a
+ jr z,4f ! done with this part of multiplier
+ add ix,ix ! 32-bit shift left
+ adc hl,hl
+ jr 2b
+4:
+ ! see if we have just processed the first part
+ ! of the multiplier (flag = 0) or the second
+ ! part (flag = 1)
+ ld a,(.flag)
+ or a
+ jr nz,5f
+ inc a ! a := 1
+ ld (.flag),a ! set flag
+ ld iy,(.mplier+2)! least significant 2 bytes now in iy
+ add ix,ix ! 32-bit shift left
+ adc hl,hl
+ jr 1b
+5:
+ ! clean up
+ pop iy ! restore LB
+ ex (sp),hl ! put most sign. 2 bytes of result
+ ! on stack! put return address in hl
+ push ix ! least sign. 2 bytes of result
+ jp (hl) ! return
+.data
+.flag: .byte 0
+.mplier: .space 4
--- /dev/null
+
+
+.define .nop
+
+! NOP
+! changed into output routine to print linenumber
+! in octal (6 digits)
+
+.nop:
+ push iy
+ ld iy,1f+5
+ ld hl,(hol0)
+ call outdec
+ ld iy,1f+18
+ ld hl,0
+ add hl,sp
+ call octnr
+ ld de,1f
+ call pstrng
+ pop iy
+ ret
+1: .asciz 'test xxxxx 0xxxxxx\r\n'
+
+octnr:
+ ld b,6
+1: ld a,7
+ and l
+ add a,'0'
+ dec iy
+ ld (iy+0),a
+ srl h
+ rr l
+ srl h
+ rr l
+ srl h
+ rr l
+ djnz 1b
+ ret
+
+
--- /dev/null
+.define outdec
+! output contents of HL as a sequence
+! of decimal digits
+outdec:
+ push hl
+ push de
+ push bc
+ push af
+ ld de,table
+ ld b,4
+1: call convert
+ or 0x30
+ ld (iy+0),a
+ inc iy
+ djnz 1b
+ ld a,l
+ or 0x30
+ ld (iy+0),a
+ pop af
+ pop bc
+ pop de
+ pop hl
+ ret
+! convert returns in a a count
+! hl is decremented count times by (de)
+! as a usefull side effect de is incremented
+! by 2
+convert:
+ push bc
+ ld b,h
+ ld c,l
+ ex de,hl
+ ld e,(hl)
+ inc hl
+ ld d,(hl)
+ inc hl
+ push hl ! save pointer to new value
+ ld h,b
+ ld l,c
+ xor a
+1: inc a
+ sbc hl,de
+ jr nc,1b
+ add hl,de
+ dec a
+ pop de
+ pop bc
+ ret
+table:
+ .short 10000
+ .short 1000
+ .short 100
+ .short 10
--- /dev/null
+.define pstrng
+! print a string of characters to the console
+! entry: DE points to string
+! string terminator is 0x00
+! exit: DE points to string terminator
+pstrng: push af
+1: ld a,(de)
+ or a
+ jr z,2f
+ call putchr
+ inc de
+ jr 1b
+2: pop af
+ ret
--- /dev/null
+.define .rck
+.rck:
+ pop bc
+ pop ix
+3: pop hl
+ push hl
+ ld e,(ix)
+ ld d,(ix+1)
+ ld a,h
+ xor d ! check sign bit to catch overflow with subtract
+ jp m,1f
+ sbc hl,de
+ jr 2f
+1: xor d ! now a equals (original) h again
+2: call m,e.rck
+ pop de
+ push de
+ ld l,(ix+2)
+ ld h,(ix+3)
+ ld a,h
+ xor d ! check sign bit to catch overflow with subtract
+ jp m,1f
+ sbc hl,de
+ jr 2f
+1: xor d ! now a equals (original) h again
+2: call m,e.rck
+ push bc
+ pop ix
+ jp (ix)
+
+
--- /dev/null
+.define .rmi2
+
+! 16-bit signed remainder
+! parameters:
+! bc: divisor
+! de: dividend
+! de: result (out)
+! no check on overflow
+
+
+
+.rmi2:
+ xor a
+ ld h,a
+ ld l,a
+ sbc hl,bc
+ jp m,1f
+ ld b,h
+ ld c,l
+1:
+ or a
+ ld hl,0
+ sbc hl,de
+ jp m,1f
+ ex de,hl
+ cpl
+1:
+ push af
+ ld hl,0
+ ld a,16
+0:
+ add hl,hl
+ ex de,hl
+ add hl,hl
+ ex de,hl
+ jr nc,1f
+ inc hl
+ or a
+1:
+ sbc hl,bc
+ inc de
+ jp p,2f
+ add hl,bc
+ dec de
+2:
+ dec a
+ jr nz,0b
+ ex de,hl
+ pop af
+ or a
+ jr z,1f
+ ld hl,0
+ sbc hl,de
+ ex de,hl
+1:
+ ret
--- /dev/null
+.define .sar
+! use .mli2
+! use .trp.z
+
+! 2-byte descriptors
+! any size array elements
+! parameters:
+! on stack
+! uses .mli2
+! adapted from .aar and .sts
+
+
+
+.sar:
+ pop hl
+ pop ix
+ ex (sp),hl
+ ld c,(ix+0)
+ ld b,(ix+1)
+ xor a
+ sbc hl,bc
+ ld c,(ix+4)
+ ld b,(ix+5)
+ ex de,hl
+ call .mli2
+ pop ix
+ pop de
+ add hl,de
+ srl b ! bc contains #bytes to transfer
+ rr c ! divide bc by 2
+ jr nc,1f
+ ld a,c
+ or b
+ jr nz,.trp.z
+ pop bc
+ ld (hl),c
+ jp (ix)
+1:
+ pop de
+ ld (hl),e
+ inc hl
+ ld (hl),d
+ inc hl
+ dec bc
+ ld a,b
+ or c
+ jr nz,1b
+ jp (ix)
--- /dev/null
+.define .sar2
+
+! special case sar: element size = 2 (statically known)
+! parameters:
+! on stack
+! adapted from .aar2
+! execution time: 143 states
+
+
+
+.sar2:
+ pop ix
+ pop hl
+ ld c,(hl)
+ inc hl
+ ld b,(hl)
+ pop hl
+ xor a
+ sbc hl,bc
+ add hl,hl
+ pop de
+ add hl,de
+ pop de
+ ld (hl),e
+ inc hl
+ ld (hl),d
+ jp (ix)
--- /dev/null
+.define .saru
+
+! SAR NOT DEFINED
+
+.saru:
+ pop ix
+ pop hl
+ xor a
+ xor h
+ jp nz,1f
+ ld a,2
+ xor l
+ jp z,2f
+1:
+ ld hl,EARRAY
+ call .trp.z
+2:
+ push ix
+ jp .sar
--- /dev/null
+.define .sdf
+
+! store double offsetted
+
+.sdf:
+ pop bc
+ push bc !test
+ pop ix ! return address
+ pop hl ! address
+ add hl,de
+ pop bc
+ ld (hl),c
+ inc hl
+ ld (hl),b
+ inc hl
+ pop bc
+ ld (hl),c
+ inc hl
+ ld (hl),b
+ jp (ix) ! return
--- /dev/null
+.define .sdl
+
+! store double local at any offset
+! parameters:
+! hl: offset
+! stack: operand (4 bytes)
+
+
+
+.sdl:
+ pop ix ! return address
+ push iy ! bc := LB
+ pop bc
+ add hl,bc ! pointer to lowest byte
+ ! of local
+ pop bc ! low 2 bytes of source
+ ld (hl),c
+ inc hl
+ ld (hl),b
+ inc hl
+ pop bc ! high 2 bytes of source
+ ld (hl),c
+ inc hl
+ ld (hl),b
+ jp (ix) ! return
+
--- /dev/null
+.define .set
+! use .unimpld
+
+! any size sets
+! parameters:
+! hl: size
+! stack: bitnumber
+! stack: result (out)
+
+
+
+.set:
+ pop ix ! return address
+ pop de ! bit number
+ ld b,h
+ ld c,l
+ xor a
+0: push af
+ inc sp
+ dec c
+ jr nz,0b
+ dec b
+ jp p,0b
+ ex de,hl
+ ld a,l
+ sra h
+ jp m,.unimpld
+ rr l
+ srl h
+ rr l
+ srl h
+ rr l
+ push hl
+ or a
+ sbc hl,de
+ pop hl
+ jp p,.unimpld
+ add hl,sp
+ ld (hl),1
+ and 7
+ jr 1f
+0: sla (hl)
+ dec a
+1: jr nz,0b
+ jp (ix)
--- /dev/null
+.define .strhp
+
+.strhp:
+ pop ix
+ pop hl
+ push hl
+ or a
+ sbc hl,sp
+ jp m,1f
+ pop hl
+ push hl
+ ld a,l
+ rra
+ jp c,1f
+ pop hl
+ ld (.reghp),hl
+ jp (ix)
+1:
+ pop hl
+ ld hl,EHEAP
+ call .trp.z
+ jp (ix)
--- /dev/null
+.define .sts
+! use trp.z
+
+! object size given by 2-byte integer on
+! top of stack.
+! parameters:
+! on stack
+! checks if #bytes is even or 1,
+! else traps
+
+
+
+.sts:
+ pop ix ! save return address
+ pop de ! # bytes to transfer
+ pop hl ! destination address
+ srl d ! divide de by 2
+ rr e
+ jr nc,1f ! see if it was odd
+ ld a,e ! yes, must be 1
+ or d
+ jr nz,.trp.z ! no, error
+ pop de ! transfer 1 byte,
+ ! padded with zeroes
+ ld (hl),e
+ jp (ix)
+1:
+ pop bc
+ ld (hl), c
+ inc hl
+ ld (hl),b
+ inc hl
+ dec de
+ ld a,e
+ or d
+ jr nz,1b
+ jp (ix)
--- /dev/null
+.define endtext,enddata,endbss
+.define _end,_etext,_edata
+
+ .text
+endtext:
+_etext:
+ .align 2
+ .data
+enddata:
+_edata:
+ .align 2
+ .bss
+_end:
+endbss:
+ .align 2
--- /dev/null
+.define .trp.z
+
+! changed into output routine to print errornumber
+
+.trp.z:
+! exx
+ pop bc
+ pop hl !error number
+ push hl
+ ld de,15
+ sbc hl,de
+ jp p,1f ! error no >= 16?
+ pop hl
+ push hl ! save error no on stack
+ push bc
+ push ix
+ push hl ! test bit "error no" of ignmask
+ ld hl,(ignmask)
+ ex (sp),hl
+ push hl
+ ld hl,2
+ call .inn
+ pop hl
+ pop ix
+ pop bc
+ ld a,h
+ or l
+ jr z,2f ! if bit <> 0 error
+1:
+ pop hl
+ push iy
+ push de
+ ld iy,1f+6
+ call outdec
+ ld de,1f
+ call pstrng
+ pop de
+ pop iy
+ jp 0x20
+2:
+ pop hl
+ push bc
+! exx
+ ret
+1: .asciz 'error xxxxx\r\n'
+
--- /dev/null
+.define unimpld, e.mon, e.rck, .trp.z, .unimpld
+
+.unimpld:
+unimpld: ! used in dispatch table to
+ ! catch unimplemented instructions
+ ld hl,EILLINS
+9: push hl
+ call .trp.z
+ jp 20
+
+e.mon:
+ ld hl,EMON
+ jr 9b
+e.rck:
+ push af
+ ld a,(ignmask)
+ bit 1,a
+ jr nz,8f
+ ld hl,ERANGE
+ jr 9b
+8:
+ pop af
+ ret
+
+
--- /dev/null
+.define .xor
+
+! auxiliary size 'xor'
+! parameters:
+! de: size
+! stack: operands
+! stack: result (out)
+
+
+
+.xor:
+ pop ix
+ ld h,d
+ ld l,e
+ add hl,sp
+ ld b,h
+ ld c,l
+ ex de,hl
+ add hl,de
+1: dec hl
+ dec de
+ ld a,(de)
+ xor (hl)
+ ld (hl),a
+ xor a
+ sbc hl,bc
+ jr z,2f
+ add hl,bc
+ jr 1b
+2: ld h,b
+ ld l,c
+ ld sp,hl
+ jp (ix)