--- /dev/null
+page0 = 1
+page1 = 2
+
+ .area SM (abs,ovr)
+
+; page 0 interpreter
+; stack and control transfer
+
+ .org page0 * 0x100
+
+p0_page1:
+ pop de
+ ld a,(bc)
+ inc bc
+ ld l,a
+ inc h ; page 1
+ jp (hl)
+
+p0_imm_call:
+ ld a,(bc)
+ inc bc
+ ld l,a
+ ld a,(bc)
+ inc bc
+ push bc
+ ld b,a
+ ld c,l
+ ld a,(bc)
+ inc bc
+ ld l,a
+ jp (hl)
+
+p0_call:
+ pop hl
+ push bc
+ ld c,l
+ ld b,h
+ inc bc
+ ld l,(hl)
+ ld h,page0
+ jp (hl)
+
+p0_imm_jfalse:
+ jr nc,p0_imm_jmp
+imm_not_taken:
+ inc bc
+ inc bc
+ ld a,(bc)
+ inc bc
+ ld l,a
+ jp (hl)
+
+p0_imm_jtrue:
+ jr nc,imm_not_taken
+p0_imm_jmp:
+ ld a,(bc)
+ inc bc
+ ld l,a
+ ld a,(bc)
+ ld b,a
+ ld c,l
+ ld a,(bc)
+ inc bc
+ ld l,a
+ jp (hl)
+
+p0_jfalse:
+ jr nc,p0_jmp
+not_taken:
+ inc sp
+ inc sp
+ ld a,(bc)
+ inc bc
+ ld l,a
+ jp (hl)
+
+p0_jtrue:
+ jr nc,not_taken
+p0_jmp:
+ pop bc
+ ld a,(bc)
+ inc bc
+ ld l,a
+ jp (hl)
+
+ .org page0 * 0x100 + 0x8f
+
+p0_imm_stkadj:
+imm_stkadj:
+ ld a,(bc)
+ inc bc
+ ld l,a
+ ld a,(bc)
+ inc bc
+ ld h,a
+ .db 0x3e ; ld a,
+p0_stkadj:
+ pop hl
+stkadj:
+ add hl,sp
+ ld sp,hl
+ ld a,(bc)
+ inc bc
+ ld l,a
+ ld h,page0
+ jp (hl)
+
+p0_imm_stkptr:
+imm_stkptr:
+ ld a,(bc)
+ inc bc
+ ld l,a
+ ld a,(bc)
+ inc bc
+ ld h,a
+ .db 0x3e ; ld a,
+p0_stkptr:
+ pop hl
+stkptr:
+ add hl,sp
+ ex de,hl
+ ld a,(bc)
+ inc bc
+ ld l,a
+ ld h,page1
+ jp (hl)
+
+p0_imm_stkld_w:
+imm_stkld_w:
+ ld a,(bc)
+ inc bc
+ ld l,a
+ ld a,(bc)
+ inc bc
+ ld h,a
+ .db 0x3e ; ld a,
+p0_stkld_w:
+ pop hl
+stkld_w:
+ add hl,sp
+ ld e,(hl)
+ inc hl
+ ld d,(hl)
+ ld a,(bc)
+ inc bc
+ ld l,a
+ ld h,page1
+ jp (hl)
+
+p0_imm_stkst_w:
+ pop de
+imm_stkst_w:
+ ld a,(bc)
+ inc bc
+ ld l,a
+ ld a,(bc)
+ inc bc
+ ld h,a
+stkst_w:
+ add hl,sp
+ ld (hl),e
+ inc hl
+ ld (hl),d
+ ld a,(bc)
+ inc bc
+ ld l,a
+ ld h,page0
+ jp (hl)
+
+p0_imm_w:
+ inc h ; page1
+imm_w:
+ ld a,(bc)
+ inc bc
+ ld e,a
+ ld a,(bc)
+ inc bc
+ ld d,a
+ ld a,(bc)
+ inc bc
+ ld l,a
+ jp (hl)
+
+p0_ld_w:
+ pop de
+ inc h ; page1
+ld_w:
+ ld a,(de)
+ inc de
+ ld l,a
+ ld a,(de)
+ ld d,a
+ ld e,l
+ ld a,(bc)
+ inc bc
+ ld l,a
+ jp (hl)
+
+p0_imm_st_w:
+ pop de
+imm_st_w:
+ ld a,(bc)
+ inc bc
+ ld (de),a
+ inc de
+ ld a,(bc)
+ inc bc
+ ld (de),a
+ ld a,(bc)
+ inc bc
+ ld l,a
+ jp (hl)
+
+p0_st_w:
+ pop de
+st_w:
+ pop hl
+ ld (hl),e
+ inc hl
+ ld (hl),d
+ ld a,(bc)
+ inc bc
+ ld l,a
+ ld h,page0
+ jp (hl)
+
+; page 1 interpreter
+; word arithmetic operations
+; top stack word cached in de
+
+ .org page1 * 0x100
+
+p1_page0:
+ push de
+ ld a,(bc)
+ inc bc
+ ld l,a
+ dec h ; page 0
+ jp (hl)
+
+p1_imm_stkadj:
+ push de
+ jr imm_stkadj
+p1_stkadj:
+ ex de,hl
+ jr stkadj
+p1_imm_stkptr:
+ push de
+ jr imm_stkptr
+p1_stkptr:
+ ex de,hl
+ jr stkptr
+p1_imm_stkld_w:
+ push de
+ jr imm_stkld_w
+p1_stkld_w:
+ ex de,hl
+ jr stkld_w
+p1_imm_stkst_w:
+ jr imm_stkst_w
+p1_stkst_w:
+ push de
+ jr stkst_w
+p1_imm_w:
+ jr imm_w
+p1_ld_w:
+ jr ld_w
+p1_imm_st_w:
+ dec h ; page0
+ jr imm_st_w
+p1_st_w:
+ jr st_w
+
+p1_imm_and_w:
+ ld a,(bc)
+ inc bc
+ and e
+ ld e,a
+ ld a,(bc)
+ inc bc
+ and d
+ ld d,a
+ ld a,(bc)
+ inc bc
+ ld l,a
+ jp (hl)
+
+p1_and_w:
+ pop hl
+ ld a,e
+ and l
+ ld e,a
+ ld a,d
+ and h
+ ld d,a
+ ld a,(bc)
+ inc bc
+ ld l,a
+ ld h,page1
+ jp (hl)
+
+p1_imm_or_w:
+ ld a,(bc)
+ inc bc
+ or e
+ ld e,a
+ ld a,(bc)
+ inc bc
+ or d
+ ld d,a
+ ld a,(bc)
+ inc bc
+ ld l,a
+ jp (hl)
+
+p1_or_w:
+ pop hl
+ ld a,e
+ or l
+ ld e,a
+ ld a,d
+ or h
+ ld d,a
+ ld a,(bc)
+ inc bc
+ ld l,a
+ ld h,page1
+ jp (hl)
+
+;p1_imm_xor_w:
+; ld a,(bc)
+; inc bc
+; xor e
+; ld e,a
+; ld a,(bc)
+; inc bc
+; xor d
+; ld d,a
+; ld a,(bc)
+; inc bc
+; ld l,a
+; jp (hl)
+
+p1_imm_xor_w: ; xor is less common than and/or, so save space for immediate
+ ld a,(bc)
+ ld l,a
+ inc bc
+ ld a,(bc)
+ ld h,a
+ inc bc
+ .db 0x3e ; ld a,
+p1_xor_w:
+ pop hl
+ ld a,e
+ xor l
+ ld e,a
+ ld a,d
+ xor h
+ ld d,a
+ ld a,(bc)
+ inc bc
+ ld l,a
+ ld h,page1
+ jp (hl)
+
+p1_imm_add_w: ; use also for p1_imm_sub_w with negated argument
+ ld a,(bc)
+ ld l,a
+ inc bc
+ ld a,(bc)
+ ld h,a
+ inc bc
+ .db 0x3e ; ld a,
+p1_add_w:
+ pop hl
+ add hl,de
+ ex de,hl
+ ld a,(bc)
+ inc bc
+ ld l,a
+ ld h,page1
+ jp (hl)
+
+p1_imm_subr_w: ; reversed, use with argument 0 for neg, -1 for cpl
+ ld a,(bc)
+ ld l,a
+ inc bc
+ ld a,(bc)
+ ld h,a
+ inc bc
+ .db 0x3e ; ld a,
+p1_sub_w:
+ pop hl
+ or a
+ sbc hl,de
+mul_done:
+ ex de,hl
+ ld a,(bc)
+ inc bc
+ ld l,a
+ ld h,page1
+ jp (hl)
+
+p1_imm_eq_w:
+ ld a,(bc)
+ ld l,a
+ inc bc
+ ld a,(bc)
+ ld h,a
+ inc bc
+ .db 0x3e ; ld a,
+p1_eq_w:
+ pop hl
+ or a
+ sbc hl,de
+ ld a,l
+ or h
+ cp 1
+ ld a,(bc)
+ inc bc
+ ld l,a
+ ld h,page0
+ jp (hl)
+
+p1_imm_gt_uw:
+ ld a,(bc)
+ ld l,a
+ inc bc
+ ld a,(bc)
+ ld h,a
+ inc bc
+ .db 0x3e ; ld a,
+p1_lt_uw:
+ pop hl
+ or a
+ sbc hl,de
+ ld a,(bc)
+ inc bc
+ ld l,a
+ ld h,page0
+ jp (hl)
+
+p1_imm_gt_sw:
+ ld a,(bc)
+ ld l,a
+ inc bc
+ ld a,(bc)
+ ld h,a
+ inc bc
+ .db 0x3e ; ld a,
+p1_lt_sw:
+ pop hl
+ ld a,l
+ sub e
+ ld a,h
+ sbc a,d
+ rla
+ jp po,1$
+ ccf
+1$: ld a,(bc)
+ inc bc
+ ld l,a
+ ld h,page0
+ jp (hl)
+
+p1_imm_sl_w: ; nonzero unsigned byte argument
+ jr imm_sl_w
+
+p1_sl_w:
+ pop hl
+ inc e
+ jr sl_loope
+
+p1_imm_sr_uw: ; nonzero unsigned byte argument
+ jr imm_sr_uw
+
+p1_sr_uw:
+ ld l,e
+ pop de
+ ld a,e
+ inc l
+ jr srl_loope
+
+p1_imm_sr_sw: ; nonzero unsigned byte argument
+ jr imm_sr_sw
+
+p1_sr_sw:
+ ld l,e
+ pop de
+ ld a,e
+ inc l
+ jr sra_loope
+
+p1_imm_mul_w: ; big endian argument
+ ld l,mul_done
+ push hl
+ ld a,(bc)
+ inc bc
+ ld hl,0
+ call mul
+ ld a,(bc)
+ inc bc
+ jr mul
+
+p1_mul_w:
+ ld l,<mul_done
+ ex (sp),hl
+ jr mul_w
+
+p1_div_uw:
+ ld l,<div_done
+ ex (sp),hl
+ jr div_uw
+
+p1_div_sw:
+ pop hl
+ call div_sw
+div_done:
+ push de
+ ex de,hl
+ ld a,(bc)
+ inc bc
+ ld l,a
+ ld h,page1
+ jp (hl)
+
+; math package
+
+imm_sl_w:
+ ex de,hl
+ ld a,(bc)
+ inc bc
+ ld e,a
+sl_loop:
+ add hl,hl
+sl_loope:
+ dec e
+ jr nz,sl_loop
+ ex de,hl
+ ld a,(bc)
+ inc bc
+ ld l,a
+ ld h,page1
+ jp (hl)
+
+imm_sr_uw:
+ ld a,(bc)
+ inc bc
+ ld l,a
+ ld a,e
+srl_loop:
+ srl d
+ rra
+srl_loope:
+ dec l
+ jr nz,srl_loop
+ ld e,a
+ ld a,(bc)
+ inc bc
+ ld l,a
+ jp (hl)
+
+imm_sr_sw:
+ ld a,(bc)
+ inc bc
+ ld l,a
+ ld a,e
+sra_loop:
+ sra d
+ rra
+sra_loope:
+ dec l
+ jr nz,sra_loop
+ ld e,a
+ ld a,(bc)
+ inc bc
+ ld l,a
+ jp (hl)
+
+mul_w:
+ ld a,l
+ push af
+ ld a,h
+ ld hl,0
+ call mul
+ pop af
+mul: ; bit 0
+ add hl,hl
+ rla
+ jr nc,1$
+ add hl,de
+1$: ; bit 1
+ add hl,hl
+ rla
+ jr nc,2$
+ add hl,de
+2$: ; bit 2
+ add hl,hl
+ rla
+ jr nc,3$
+ add hl,de
+3$: ; bit 3
+ add hl,hl
+ rla
+ jr nc,4$
+ add hl,de
+4$: ; bit 4
+ add hl,hl
+ rla
+ jr nc,5$
+ add hl,de
+5$: ; bit 5
+ add hl,hl
+ rla
+ jr nc,6$
+ add hl,de
+6$: ; bit 6
+ add hl,hl
+ rla
+ jr nc,7$
+ add hl,de
+7$: ; bit 7
+ add hl,hl
+ rla
+ ret nc
+ add hl,de
+ ret
+
+div_uw:
+ push bc
+ ld a,h
+ ld c,l
+ ld hl,0
+
+divu: ; positive dividend, positive divisor
+ call div0
+ ld b,a
+ ld a,c
+ call div
+ jr nc,1$
+ add hl,de
+1$: ld d,b
+ ld e,a
+ pop bc
+ ret
+
+div_sw:
+ push bc
+ ld a,h
+ or a
+ ld a,d
+ rla
+ jp p,4$ ; positive dividend
+
+ ; negative dividend
+ dec hl ; reduces remainder by 1 (we inc later)
+ ld a,h
+ ld c,l
+ ld hl,-1
+ jr c,2$ ; negative dividend, negative divisor
+
+ ; negative dividend, positive divisor
+ call div1
+ ld b,a
+ ld a,c
+ call div
+ inc a
+ jr c,1$
+ sbc hl,de
+1$: inc hl ; get into range -divisor+1..0
+ ld d,b
+ ld e,a
+ pop bc
+ ret
+
+2$: ; negative dividend, negative divisor
+ call divn0
+ ld b,a
+ ld a,c
+ call divn
+ jr nc,3$
+ add hl,de
+3$: inc hl ; get into range divisor+1..0
+ ld d,b
+ ld e,a
+ pop bc
+ ret
+
+4$: ; positive dividend
+ ld a,h
+ ld c,l
+ ld hl,0
+ jr nc,divu ; positive dividend, positive divisor
+
+ ; positive dividend, negative divisor
+ call divn1
+ ld b,a
+ ld a,c
+ call divn
+ inc a
+ jr c,5$
+ sbc hl,de
+5$: ld d,b
+ ld e,a
+ pop bc
+ ret
+
+; non-restoring division routine
+
+; de = divisor, hl:a = dividend with hl = previous remainder, a = next byte
+; enter at div0 with positive remainder in hl, such that hl < de
+; enter at div1 with negative remainder in hl, such that hl >= -de
+
+; div0/1 return a = 8-bit quotient as an odd number interpreted as -ff..ff,
+; by summing positive/negative place values, e.g. -80 +40 +20 -10 +8 -4 -2 +1
+
+; if entered at div0, there is a -80 and so quotient is in range -ff..-1
+; if entered at div1, there is a +80 and so quotient is in range 1..ff
+; falls out of loop after div01 with positive remainder, div11 with negative,
+; depending on this we should re-enter at div0 or div1, signalled by cf return
+
+; the successive quotient bytes can be concatenated into a full quotient,
+; but negative bytes require the next higher quotient byte to be decremented,
+; we know in advance if this will happen because the implied sign of the
+; quotient byte depends only on whether we entered at div0 or div1, hence,
+; before the div11 return we'll decrement to compensate for next negative byte
+
+; the decrement can also be seen as compensating for the extra add hl,de that
+; may be needed to make negative remainder positive before return to caller,
+; thus leaving quotient in a consistent state regardless of which exit taken,
+; remainder needs the add hl,de if cf=1 returned (equiv. return byte is even)
+
+; in the following code each sbc hl,de gets an inc a and each add hl,de gets
+; a dec a, guaranteeing the integrity of the division, the initial scf/rla is
+; needed to make the result 100 + -ff..ff or 1..1ff, so that the decrements
+; cannot borrow into the upcoming dividend bits also held in a, and there must
+; be another shift between the scf/rla and increment/decrement so that the scf
+; is implicitly in the 100s place, making the code awkward though it's correct
+
+; now optimized to only inc/dec a when doing zero-crossing, fix above analysis
+
+div: jr c,div1
+div0: ; bit 0, above
+ scf
+ rla
+ adc hl,hl
+ sbc hl,de
+ jr nc,div01
+ dec a
+div11: ; bit 1, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr nc,div12
+ inc a
+div02: ; bit 2, above
+ add a,a
+ adc hl,hl
+ sbc hl,de
+ jr nc,div03
+ dec a
+div13: ; bit 3, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr nc,div14
+ inc a
+div04: ; bit 4, above
+ add a,a
+ adc hl,hl
+ sbc hl,de
+ jr nc,div05
+ dec a
+div15: ; bit 5, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr nc,div16
+ inc a
+div06: ; bit 6, above
+ add a,a
+ adc hl,hl
+ sbc hl,de
+ jr nc,div07
+ dec a
+div17: ; bit 7, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr nc,div18
+ inc a
+div08: ; done, above
+ add a,a
+ dec a
+ or a
+ ret
+
+div1: ; bit 0, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr nc,div11
+ inc a
+div01: ; bit 1, above
+ add a,a
+ adc hl,hl
+ sbc hl,de
+ jr nc,div02
+ dec a
+div12: ; bit 2, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr nc,div13
+ inc a
+div03: ; bit 3, above
+ add a,a
+ adc hl,hl
+ sbc hl,de
+ jr nc,div04
+ dec a
+div14: ; bit 4, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr nc,div15
+ inc a
+div05: ; bit 5, above
+ add a,a
+ adc hl,hl
+ sbc hl,de
+ jr nc,div06
+ dec a
+div16: ; bit 6, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr nc,div17
+ inc a
+div07: ; bit 7, above
+ add a,a
+ adc hl,hl
+ sbc hl,de
+ jr nc,div08
+ dec a
+div18: ; done, below
+ add a,a
+ ;inc a
+ ;dec a ; compensation
+ scf
+ ret
+
+; divn0/1 are the same as div0/1 but carry reversed after add/subtract divisor
+; this is for negative divisors where we expect carry (means no zero crossing)
+
+; when divisor negated, remainder also negated, so we expect to do subtraction
+; when remainder negative and vice versa, need to clear carry after add hl,hl
+
+divn: jr c,divn1
+divn0: ; bit 0, above
+ scf
+ rla
+ adc hl,hl
+ or a
+ sbc hl,de
+ jr c,divn01
+ dec a
+divn11: ; bit 1, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr c,divn12
+ inc a
+divn02: ; bit 2, above
+ add a,a
+ adc hl,hl
+ or a
+ sbc hl,de
+ jr c,divn03
+ dec a
+divn13: ; bit 3, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr c,divn14
+ inc a
+divn04: ; bit 4, above
+ add a,a
+ adc hl,hl
+ or a
+ sbc hl,de
+ jr c,divn05
+ dec a
+divn15: ; bit 5, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr c,divn16
+ inc a
+divn06: ; bit 6, above
+ add a,a
+ adc hl,hl
+ or a
+ sbc hl,de
+ jr c,divn07
+ dec a
+divn17: ; bit 7, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr c,divn18
+ inc a
+divn08: ; done, above
+ add a,a
+ dec a
+ or a
+ ret
+
+divn1: ; bit 0, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr c,divn11
+ inc a
+divn01: ; bit 1, above
+ add a,a
+ adc hl,hl
+ or a
+ sbc hl,de
+ jr c,divn02
+ dec a
+divn12: ; bit 2, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr c,divn13
+ inc a
+divn03: ; bit 3, above
+ add a,a
+ adc hl,hl
+ or a
+ sbc hl,de
+ jr c,divn04
+ dec a
+divn14: ; bit 4, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr c,divn15
+ inc a
+divn05: ; bit 5, above
+ add a,a
+ adc hl,hl
+ or a
+ sbc hl,de
+ jr c,divn06
+ dec a
+divn16: ; bit 6, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr c,divn17
+ inc a
+divn07: ; bit 7, above
+ add a,a
+ adc hl,hl
+ or a
+ sbc hl,de
+ jr c,divn08
+ dec a
+divn18: ; done, below
+ add a,a
+ ;inc a
+ ;dec a ; compensation
+ scf
+ ret
+
+; debugging
+
+digits:
+ .ascii '0123456789abcdef'
+
+print_word:
+ push af
+ ld a,h
+ call print_byte
+ ld a,l
+ call print_byte
+ pop af
+ ret
+
+print_byte:
+ push af
+ push af
+ rrca
+ rrca
+ rrca
+ rrca
+ call print_digit
+ pop af
+ call print_digit
+ pop af
+ ret
+
+print_digit:
+ push de
+ push hl
+ and 0xf
+ ld e,a
+ ld d,0
+ ld hl,digits
+ add hl,de
+ ld a,(hl)
+ pop hl
+ pop de
+print_char:
+ push bc
+ push de
+ push hl
+ ld e,a
+ ld c,2
+ call 5
+ pop hl
+ pop de
+ pop bc
+ ret