page0 = 4 page1 = 6 page2 = 8 .area SM (abs,ovr) .org 0x100 ld hl,0x1234 ld de,0x56 call math_mul_uw0 call print_hlde call math_div_uw call print_hlde ld hl,0x6543 ld de,0x21 call math_mul_uw0 call print_hlde call math_div_uw call print_hlde ld hl,0xb975 ld de,0x31 call math_mul_uw0 call print_hlde call math_div_uw call print_hlde ld hl,0xdb97 ld de,0x531 call math_mul_uw0 call print_hlde call math_div_uw call print_hlde ld hl,0x1234 ld de,0x56 call math_mul_sw0 call print_hlde call math_div_sw call print_hlde ld hl,0x6543 ld de,0x21 call math_mul_sw0 call print_hlde call math_div_sw call print_hlde ld hl,0xb975 ld de,0x31 call math_mul_sw0 call print_hlde call math_div_sw call print_hlde ld hl,0xdb97 ld de,0x531 call math_mul_sw0 call print_hlde call math_div_sw call print_hlde ld hl,-0x1234 ld de,0x56 call math_mul_sw0 call print_hlde call math_div_sw call print_hlde ld hl,-0x6543 ld de,0x21 call math_mul_sw0 call print_hlde call math_div_sw call print_hlde ld hl,-0xb975 ld de,0x31 call math_mul_sw0 call print_hlde call math_div_sw call print_hlde ld hl,-0xdb97 ld de,0x531 call math_mul_sw0 call print_hlde call math_div_sw call print_hlde ld hl,0x1234 ld de,-0x56 call math_mul_sw0 call print_hlde call math_div_sw call print_hlde ld hl,0x6543 ld de,-0x21 call math_mul_sw0 call print_hlde call math_div_sw call print_hlde ld hl,0xb975 ld de,-0x31 call math_mul_sw0 call print_hlde call math_div_sw call print_hlde ld hl,0xdb97 ld de,-0x531 call math_mul_sw0 call print_hlde call math_div_sw call print_hlde ld hl,-0x1234 ld de,-0x56 call math_mul_sw0 call print_hlde call math_div_sw call print_hlde ld hl,-0x6543 ld de,-0x21 call math_mul_sw0 call print_hlde call math_div_sw call print_hlde ld hl,-0xb975 ld de,-0x31 call math_mul_sw0 call print_hlde call math_div_sw call print_hlde ld hl,-0xdb97 ld de,-0x531 call math_mul_sw0 call print_hlde call math_div_sw call print_hlde ld hl,0xdb97 ld de,0x7531 exx ld hl,0x1357 ld de,0xfdb9 exx call math_mul_ul0 call print_hlhldede call math_div_ul call print_hlhldede ld hl,0xdb97 ld de,0x7531 exx ld hl,0x1357 ld de,0xfdb9 exx call math_mul_sl0 call print_hlhldede call math_div_sl call print_hlhldede ld hl,~0xdb97 ld de,0x7531 exx ld hl,~0x1357 ld de,0xfdb9 exx call math_mul_sl0 call print_hlhldede call math_div_sl call print_hlhldede ld hl,0xdb97 ld de,~0x7531 exx ld hl,0x1357 ld de,~0xfdb9 exx call math_mul_sl0 call print_hlhldede call math_div_sl call print_hlhldede ld hl,~0xdb97 ld de,~0x7531 exx ld hl,~0x1357 ld de,~0xfdb9 exx call math_mul_sl0 call print_hlhldede call math_div_sl call print_hlhldede ld hl,restarts ld de,0x28 ld bc,restarts_end - restarts ldir ld bc,sm_start jp page0_dispatch0 sm_start: .db > (l & 0xf), logical ex de,hl math_sr_uw: ; hl >>= e & 0xf, logical ld a,e and 0x1f add 7 math_sr_uw1: ; hl >>= a - 7, immediate a in [7, 0x17), arithmetic ld e,a sub a jr sr_w_entry math_sr_sw0: ; hl = de >> (l & 0xf), arithmetic ex de,hl math_sr_sw: ; hl >>= e & 0xf, arithmetic ld a,e and 0xf add 7 math_sr_sw1: ; hl >>= a - 7, immediate a in [7, 0x17), arithmetic ld e,a ld a,h rla sbc a,a sr_w_entry: ; by -1 bit 0,e jr nz,1$ add hl,hl rla 1$: ; by -2 bit 1,e jr nz,2$ add hl,hl rla add hl,hl rla 2$: ; by -4 bit 2,e jr nz,3$ add hl,hl rla add hl,hl rla add hl,hl rla add hl,hl rla 3$: ; by 8 bit 3,e jr z,4$ ld l,h ld h,a ret 4$: ; by 16 (can't occur simultaneously with by 8) bit 4,e ret z ld l,a rla sbc a,a ld h,a ret math_sr_ul0: ; hl:de' >>= e & 0x1f, logical exx ex de,hl exx math_sr_ul: ; hl:hl' >>= e & 0x1f, logical ld a,e and 0x1f add 7 math_sr_ul1: ; hl:hl' >>= a - 7, immediate a in [7, 0x27), logical ld e,a sub a jr sr_l_entry math_sr_sl0: ; de':hl >>= e & 0x1f, arithmetic exx ex de,hl exx math_sr_sl: ; hl':hl >>= e & 0x1f, arithmetic ld a,e and 0xf add 7 math_sr_sl1: ; hl':hl >>= a - 7, immediate a in [7, 0x27), arithmetic ld e,a exx ld a,h exx rla sbc a,a sr_l_entry: ; by -1 bit 0,e jr nz,1$ add hl,hl exx adc hl,hl exx rla 1$: ; by -2 bit 1,e jr nz,2$ add hl,hl exx adc hl,hl exx rla add hl,hl exx adc hl,hl exx rla 2$: ; by -4 bit 2,e jr nz,3$ add hl,hl exx adc hl,hl exx rla add hl,hl exx adc hl,hl exx rla add hl,hl exx adc hl,hl exx rla add hl,hl exx adc hl,hl exx rla 3$: ; by 8 bit 3,e jr z,4$ exx ld d,l ld l,h ld h,a rla ld a,d exx ld l,h ld h,a sbc a,a 4$: ; by 16 bit 4,e jr z,5$ exx push hl ld l,a rla sbc a,a ld h,a exx pop hl ret 5$: ; by 32 (can't occur simultaneously with by 16) bit 5,e ret z ld l,a rla sbc a,a ld h,a exx ld l,a ld h,a exx ret ; smul: short multiplication ; 16 * 16 to 16 bit product (word) ; 32 * 32 to 32 bit product (long) ; mul: long multiplication ; 16 + 16 * 16 to 32 bit product (word) ; 32 + 32 * 32 to 64 bit product (long) ; smul is implemented in a more optimal way that uses only left shifts, ; since left shifts are cheaper on the z80, this works for smul because ; there is no need to worry about propagating carries into high result ; mul has the ability to initialize the product with some nonzero value, ; which smul doesn't have because it only shifts zeros in from the left, ; using this ability the long multiplication reverses the long division ; (initialize product with a remainder, then add in quotient * divisor) math_smul_w0: ; hl *= de ld c,l ld b,h math_smul_w: ; hl = bc * de ld a,d call smul_w0 ld a,e smul_w: ; bit 0 add hl,hl smul_w0: rla jr nc,1$ add hl,bc 1$: ; bit 1 add hl,hl rla jr nc,2$ add hl,bc 2$: ; bit 2 add hl,hl rla jr nc,3$ add hl,bc 3$: ; bit 3 add hl,hl rla jr nc,4$ add hl,bc 4$: ; bit 4 add hl,hl rla jr nc,5$ add hl,bc 5$: ; bit 5 add hl,hl rla jr nc,6$ add hl,bc 6$: ; bit 6 add hl,hl rla jr nc,7$ add hl,bc 7$: ; bit 7 add hl,hl rla ret nc add hl,bc ret math_smul_l0: ; hl':hl *= de':de ld c,l ld b,h exx ld c,l ld b,h exx math_smul_l: ; hl':hl = de':de * bc':bc exx ld a,d exx call smul_l0 exx ld a,e exx call smul_l ld a,d call smul_l ld a,e smul_l: ; bit 0 add hl,hl exx adc hl,hl exx smul_l0: rla jr nc,1$ add hl,bc exx adc hl,bc exx 1$: ; bit 1 add hl,hl exx adc hl,hl exx rla jr nc,2$ add hl,bc exx adc hl,bc exx 2$: ; bit 2 add hl,hl exx adc hl,hl exx rla jr nc,3$ add hl,bc exx adc hl,bc exx 3$: ; bit 3 add hl,hl exx adc hl,hl exx rla jr nc,4$ add hl,bc exx adc hl,bc exx 4$: ; bit 4 add hl,hl exx adc hl,hl exx rla jr nc,5$ add hl,bc exx adc hl,bc exx 5$: ; bit 5 add hl,hl exx adc hl,hl exx rla jr nc,6$ add hl,bc exx adc hl,bc exx 6$: ; bit 6 add hl,hl exx adc hl,hl exx rla jr nc,7$ add hl,bc exx adc hl,bc exx 7$: ; bit 7 add hl,hl exx adc hl,hl exx rla ret nc add hl,bc exx adc hl,bc exx ret math_mul_sw0: ; hl:de = hl * de, signed ld c,l ld b,h ld hl,0 math_mul_sw: ; hl:de = hl + bc * de, signed ld a,b rla ; cf will be preserved through to the last rra below ld a,e call mul_uw push af ld a,d call mul_uw1 ; do only 7 bits, get sign of d into cf jr nc,1$ or a sbc hl,bc 1$: rr h rr l rra jr nc,2$ or a sbc hl,de 2$: ld d,a pop af ld e,a ret math_mul_uw0: ; hl:de = hl * de, unsigned ld c,l ld b,h ld hl,0 math_mul_uw: ; hl:de = hl + bc * de, unsigned ld a,e call mul_uw ld e,a ld a,d call mul_uw ld d,a ret mul_uw: rra ; bit 0 jr nc,1$ add hl,bc 1$: rr h rr l mul_uw1: rra ; bit 1 jr nc,2$ add hl,bc 2$: rr h rr l rra ; bit 2 jr nc,3$ add hl,bc 3$: rr h rr l rra ; bit 3 jr nc,4$ add hl,bc 4$: rr h rr l rra ; bit 4 jr nc,5$ add hl,bc 5$: rr h rr l rra ; bit 5 jr nc,6$ add hl,bc 6$: rr h rr l rra ; bit 6 jr nc,7$ add hl,bc 7$: rr h rr l rra ; bit 7 jr nc,8$ add hl,bc 8$: rr h rr l rra ret math_mul_sl0: ; hl':hl:de':de = hl':hl * de':de, signed ld c,l ld b,h sub a ld l,a ld h,a exx ld c,l ld b,h ld l,a ld h,a exx ; hard to optimize this math_mul_sl: ; hl':hl:de':de = hl':hl + bc':bc * de':de, signed exx ld a,b rla ; cf will be preserved through to the last rra below exx ld a,e call mul_ul push af ld a,d call mul_ul push af exx ld a,e exx call mul_ul push af exx ld a,d exx call mul_ul1 ; do only 7 bits, get sign of d into cf jr nc,1$ or a sbc hl,bc exx sbc hl,bc exx ; hard to optimize this 1$: exx rr h rr l exx rr h rr l rra jr nc,2$ or a sbc hl,de exx sbc hl,de exx ; hard to optimize this 2$: exx ld d,a pop af ld e,a exx pop de pop af ld e,a ret math_mul_ul0: ; hl':hl:de':de = hl':hl * de':de, unsigned ld c,l ld b,h sub a ld l,a ld h,a exx ld c,l ld b,h ld l,a ld h,a exx math_mul_ul: ; hl':hl:de':de = hl':hl + bc':bc * de':de, unsigned ld a,e call mul_ul ld e,a ld a,d call mul_ul ld d,a exx ld a,e exx call mul_ul exx ld e,a ld a,d exx call mul_ul exx ld d,a exx ret mul_ul: rra ; bit 0 jr nc,1$ add hl,bc exx adc hl,bc exx ; optimize this 1$: exx rr h rr l exx rr h rr l mul_ul1: rra ; bit 1 jr nc,2$ add hl,bc exx adc hl,bc exx ; optimize this 2$: exx rr h rr l exx rr h rr l rra ; bit 2 jr nc,3$ add hl,bc exx adc hl,bc exx ; optimize this 3$: exx rr h rr l exx rr h rr l rra ; bit 3 jr nc,4$ add hl,bc exx adc hl,bc exx ; optimize this 4$: exx rr h rr l exx rr h rr l rra ; bit 4 jr nc,5$ add hl,bc exx adc hl,bc exx ; optimize this 5$: exx rr h rr l exx rr h rr l rra ; bit 5 jr nc,6$ add hl,bc exx adc hl,bc exx ; optimize this 6$: exx rr h rr l exx rr h rr l rra ; bit 6 jr nc,7$ add hl,bc exx adc hl,bc exx ; optimize this 7$: exx rr h rr l exx rr h rr l rra ; bit 7 jr nc,8$ add hl,bc exx adc hl,bc exx ; optimize this 8$: exx rr h rr l exx rr h rr l rra ret ; sdiv: short division ; 16 / 16 to 16 bit quotient, 16 bit remainder (word) ; 32 / 32 to 32 bit quotient, 32 bit remainder (long) ; div: long division ; 32 / 16 to 16 bit quotient, 16 bit remainder (word) ; 64 / 32 to 32 bit quotient, 32 bit remainder (long) ; sdiv is implemented as sign/zero extension then div math_sdiv_sw0: ; hl, de = hl % de, hl / de, signed ex de,hl math_sdiv_sw: ; hl, de = de % hl, de / hl, signed ld c,l ld b,h ld a,d rla sbc a,a ld l,a ld h,a math_div_sw: ; hl, de = hl:de % bc, hl:de / bc, signed ld a,h or a ld a,b rla jp m,div_w_n ; positive dividend ; positive dividend ld a,d jr nc,div_w_pp ; positive dividend, positive divisor ; positive dividend, negative divisor call div_w_n1 ld d,a ld a,e call div_w_ncf inc a ld e,a ret c sbc hl,bc ret div_w_n: ; negative dividend dec de ; reduces remainder by 1 (we inc later) ld a,d jr c,div_w_nn ; negative dividend, negative divisor ; negative dividend, positive divisor call div_w1 ld d,a ld a,e call div_wcf inc a ld e,a inc hl ; get into range -divisor+1..0 ret c sbc hl,bc ret div_w_nn: ; negative dividend, negative divisor call div_w_n0 ld d,a ld a,e call div_w_ncf ld e,a inc hl ; get into range divisor+1..0 ret nc add hl,bc ret math_sdiv_uw0: ; hl, de = hl % de, hl / de, unsigned ex de,hl math_sdiv_uw: ; hl, de = de % hl, de / hl, unsigned ld c,l ld b,h ld hl,0 math_div_uw: ; hl, de = hl:de % bc, hl:de / bc, unsigned ld a,d div_w_pp: ; positive dividend, positive divisor call div_w0 ld d,a ld a,e call div_wcf ld e,a ret nc add hl,bc ret ; non-restoring division routine ; de = divisor, hl:a = dividend with hl = previous remainder, a = next byte ; enter at div0 with positive remainder in hl, such that hl < de ; enter at div1 with negative remainder in hl, such that hl >= -de ; div0/1 return a = 8-bit quotient as an odd number interpreted as -ff..ff, ; by summing positive/negative place values, e.g. -80 +40 +20 -10 +8 -4 -2 +1 ; if entered at div0, there is a -80 and so quotient is in range -ff..-1 ; if entered at div1, there is a +80 and so quotient is in range 1..ff ; falls out of loop after div01 with positive remainder, div11 with negative, ; depending on this we should re-enter at div0 or div1, signalled by cf return ; the successive quotient bytes can be concatenated into a full quotient, ; but negative bytes require the next higher quotient byte to be decremented, ; we know in advance if this will happen because the implied sign of the ; quotient byte depends only on whether we entered at div0 or div1, hence, ; before the div11 return we'll decrement to compensate for next negative byte ; the decrement can also be seen as compensating for the extra add hl,de that ; may be needed to make negative remainder positive before return to caller, ; thus leaving quotient in a consistent state regardless of which exit taken, ; remainder needs the add hl,de if cf=1 returned (equiv. return byte is even) ; in the following code each sbc hl,de gets an inc a and each add hl,de gets ; a dec a, guaranteeing the integrity of the division, the initial scf/rla is ; needed to make the result 100 + -ff..ff or 1..1ff, so that the decrements ; cannot borrow into the upcoming dividend bits also held in a, and there must ; be another shift between the scf/rla and increment/decrement so that the scf ; is implicitly in the 100s place, making the code awkward though it's correct ; now optimized to only inc/dec a when doing zero-crossing, fix above analysis .if 1 ; temporary div_wcf: jr c,div_w1 div_w0: ; above push de ld d,8 scf rla div_w00: adc hl,hl jr c,div_w01 sbc hl,bc jr nc,div_w02 dec a ;jr div_w12 add a,a dec d jr nz,div_w10 ;inc a ;dec a ; compensation scf pop de ret div_w01: or a sbc hl,bc div_w02: add a,a dec d jr nz,div_w00 dec a or a pop de ret div_w1: ; below push de ld d,8 add a,a div_w10: adc hl,hl jr nc,div_w11 add hl,bc jr nc,div_w12 inc a ;jr div_w02 add a,a dec d jr nz,div_w00 dec a or a pop de ret div_w11: add hl,bc div_w12: add a,a dec d jr nz,div_w10 ;inc a ;dec a ; compensation scf pop de ret div_w_ncf: jr c,div_w_n1 div_w_n0: ; above push de ld d,8 scf rla div_w_n00: adc hl,hl jr nc,div_w_n01 or a sbc hl,bc jr c,div_w_n02 dec a ;jr div_w_n12 add a,a dec d jr nz,div_w_n10 ;inc a ;dec a ; compensation scf pop de ret div_w_n01: sbc hl,bc div_w_n02: add a,a dec d jr nz,div_w_n00 dec a or a pop de ret div_w_n1: ; below push de ld d,8 add a,a div_w_n10: adc hl,hl jr c,div_w_n11 add hl,bc jr c,div_w_n12 inc a ;jr div_w_n02 add a,a dec d jr nz,div_w_n00 dec a or a pop de ret div_w_n11: add hl,bc div_w_n12: add a,a dec d jr nz,div_w_n10 ;inc a ;dec a ; compensation scf pop de ret .else div_wcf: jr c,div_w1 div_w0: ; bit 0, above scf rla adc hl,hl sbc hl,bc jr nc,div_w01 dec a div_w11: ; bit 1, below add a,a adc hl,hl add hl,bc jr nc,div_w12 inc a div_w02: ; bit 2, above add a,a adc hl,hl sbc hl,bc jr nc,div_w03 dec a div_w13: ; bit 3, below add a,a adc hl,hl add hl,bc jr nc,div_w14 inc a div_w04: ; bit 4, above add a,a adc hl,hl sbc hl,bc jr nc,div_w05 dec a div_w15: ; bit 5, below add a,a adc hl,hl add hl,bc jr nc,div_w16 inc a div_w06: ; bit 6, above add a,a adc hl,hl sbc hl,bc jr nc,div_w07 dec a div_w17: ; bit 7, below add a,a adc hl,hl add hl,bc jr nc,div_w18 inc a div_w08: ; done, above add a,a dec a or a ret div_w1: ; bit 0, below add a,a adc hl,hl add hl,bc jr nc,div_w11 inc a div_w01: ; bit 1, above add a,a adc hl,hl sbc hl,bc jr nc,div_w02 dec a div_w12: ; bit 2, below add a,a adc hl,hl add hl,bc jr nc,div_w13 inc a div_w03: ; bit 3, above add a,a adc hl,hl sbc hl,bc jr nc,div_w04 dec a div_w14: ; bit 4, below add a,a adc hl,hl add hl,bc jr nc,div_w15 inc a div_w05: ; bit 5, above add a,a adc hl,hl sbc hl,bc jr nc,div_w06 dec a div_w16: ; bit 6, below add a,a adc hl,hl add hl,bc jr nc,div_w17 inc a div_w07: ; bit 7, above add a,a adc hl,hl sbc hl,bc jr nc,div_w08 dec a div_w18: ; done, below add a,a ;inc a ;dec a ; compensation scf ret ; divn0/1 are the same as div0/1 but carry reversed after add/subtract divisor ; this is for negative divisors where we expect carry (means no zero crossing) ; when divisor negated, remainder also negated, so we expect to do subtraction ; when remainder negative and vice versa, need to clear carry after add hl,hl div_w_ncf: jr c,div_w_n1 div_w_n0: ; bit 0, above scf rla adc hl,hl or a sbc hl,bc jr c,div_w_n01 dec a div_w_n11: ; bit 1, below add a,a adc hl,hl add hl,bc jr c,div_w_n12 inc a div_w_n02: ; bit 2, above add a,a adc hl,hl or a sbc hl,bc jr c,div_w_n03 dec a div_w_n13: ; bit 3, below add a,a adc hl,hl add hl,bc jr c,div_w_n14 inc a div_w_n04: ; bit 4, above add a,a adc hl,hl or a sbc hl,bc jr c,div_w_n05 dec a div_w_n15: ; bit 5, below add a,a adc hl,hl add hl,bc jr c,div_w_n16 inc a div_w_n06: ; bit 6, above add a,a adc hl,hl or a sbc hl,bc jr c,div_w_n07 dec a div_w_n17: ; bit 7, below add a,a adc hl,hl add hl,bc jr c,div_w_n18 inc a div_w_n08: ; done, above add a,a dec a or a ret div_w_n1: ; bit 0, below add a,a adc hl,hl add hl,bc jr c,div_w_n11 inc a div_w_n01: ; bit 1, above add a,a adc hl,hl or a sbc hl,bc jr c,div_w_n02 dec a div_w_n12: ; bit 2, below add a,a adc hl,hl add hl,bc jr c,div_w_n13 inc a div_w_n03: ; bit 3, above add a,a adc hl,hl or a sbc hl,bc jr c,div_w_n04 dec a div_w_n14: ; bit 4, below add a,a adc hl,hl add hl,bc jr c,div_w_n15 inc a div_w_n05: ; bit 5, above add a,a adc hl,hl or a sbc hl,bc jr c,div_w_n06 dec a div_w_n16: ; bit 6, below add a,a adc hl,hl add hl,bc jr c,div_w_n17 inc a div_w_n07: ; bit 7, above add a,a adc hl,hl or a sbc hl,bc jr c,div_w_n08 dec a div_w_n18: ; done, below add a,a ;inc a ;dec a ; compensation scf ret .endif math_sdiv_sl0: ; hl':hl, de':de = hl':de % de':hl, hl':de / de':hl, signed exx ex de,hl exx math_sdiv_sl: ; ; hl':hl, de':de = de':de % hl':hl, de':de / hl':hl, signed exx ld c,l ld b,h ld a,d rla sub a,a ld l,a ld h,a exx ld c,l ld b,h ld l,a ld h,a math_div_sl: ; hl':hl, de':de = ; hl':hl:de':de % bc':bc, hl':hl:de':de / bc':bc, signed exx ld a,h or a jp m,div_l_n ; positive dividend ; positive dividend ld a,b rla ld a,d exx jr nc,div_l_pp ; positive dividend, positive divisor ; positive dividend, negative divisor call div_l_n1 exx ld d,a ld a,e exx call div_l_ncf exx ld e,a exx ld a,d call div_l_ncf ld d,a ld a,e call div_l_ncf inc a ld e,a ret c sbc hl,bc exx sbc hl,bc exx ret div_l_n: ; negative dividend exx ld a,e or d dec de ; reduces remainder by 1 (we inc later) exx jr nz,1$ dec de 1$: ld a,b rla ld a,d exx jr c,div_l_nn ; negative dividend, negative divisor ; negative dividend, positive divisor call div_l1 exx ld d,a ld a,e exx call div_lcf exx ld e,a exx ld a,d call div_lcf ld d,a ld a,e call div_lcf inc a ld e,a jr c,2$ sbc hl,bc exx sbc hl,bc exx 2$: inc hl ; get into range divisor+1..0 ld a,l or h ret nz exx inc hl exx ret math_div_ul0: ; hl':hl, de':de = hl':de % de':hl, hl':de / de':hl, unsigned exx ex de,hl exx math_div_ul1: ; ; hl':hl, de':de = de':de % hl':hl, de':de / hl':hl, unsigned ld c,l ld b,h sub a ld l,a ld h,a exx ld c,l ld b,h ld l,a ld h,a exx math_div_ul: ; hl':hl, de':de = ; hl':hl:de':de % bc':bc, hl':hl:de':de / bc':bc, unsigned exx ld a,d exx div_l_pp: ; positive dividend, positive divisor call div_l0 exx ld d,a ld a,e exx call div_lcf exx ld e,a exx ld a,d call div_lcf ld d,a ld a,e call div_lcf ld e,a ret nc add hl,bc exx adc hl,bc exx ret div_l_nn: ; negative dividend, negative divisor call div_l_n0 exx ld d,a ld a,e exx call div_l_ncf exx ld e,a exx ld a,d call div_l_ncf ld d,a ld a,e call div_l_ncf ld e,a jr nc,1$ add hl,bc exx adc hl,bc exx 1$: inc hl ; get into range divisor+1..0 ld a,l or h ret nz exx inc hl exx ret ; non-restoring division routine ; see earlier comments for the word version, this extends the concept to long .if 1 ; temporary div_lcf: jr c,div_l1 div_l0: ; above push de ld d,8 scf rla div_l00: adc hl,hl exx adc hl,hl exx jr c,div_l01 sbc hl,bc exx sbc hl,bc exx jr nc,div_l02 dec a ;jr div_l12 add a,a dec d jr nz,div_l10 ;inc a ;dec a ; compensation scf pop de ret div_l01: or a sbc hl,bc exx sbc hl,bc exx div_l02: add a,a dec d jr nz,div_l00 dec a or a pop de ret div_l1: ; below push de ld d,8 add a,a div_l10: adc hl,hl exx adc hl,hl exx jr nc,div_l11 add hl,bc exx adc hl,bc exx jr nc,div_l12 inc a ;jr div_l02 add a,a dec d jr nz,div_l00 dec a or a pop de ret div_l11: add hl,bc exx adc hl,bc exx div_l12: add a,a dec d jr nz,div_l10 ;inc a ;dec a ; compensation scf pop de ret div_l_ncf: jr c,div_l_n1 div_l_n0: ; above push de ld d,8 scf rla div_l_n00: adc hl,hl exx adc hl,hl exx jr nc,div_l_n01 or a sbc hl,bc exx sbc hl,bc exx jr c,div_l_n02 dec a ;jr div_l_n12 add a,a dec d jr nz,div_l_n10 ;inc a ;dec a ; compensation scf pop de ret div_l_n01: sbc hl,bc exx sbc hl,bc exx div_l_n02: add a,a dec d jr nz,div_l_n00 dec a or a pop de ret div_l_n1: ; below push de ld d,8 add a,a div_l_n10: adc hl,hl exx adc hl,hl exx jr c,div_l_n11 add hl,bc exx adc hl,bc exx jr c,div_l_n12 inc a ;jr div_l_n02 add a,a dec d jr nz,div_l_n00 dec a or a pop de ret div_l_n11: add hl,bc exx adc hl,bc exx div_l_n12: add a,a dec d jr nz,div_l_n10 ;inc a ;dec a ; compensation scf pop de ret .else ; changed all jr to jp, revisit this div_lcf: jp c,div_l1 div_l0: ; bit 0, above scf rla adc hl,hl exx adc hl,hl exx sbc hl,bc exx sbc hl,bc exx jp nc,div_l01 dec a div_l11: ; bit 1, below add a,a adc hl,hl exx adc hl,hl exx add hl,bc exx adc hl,bc exx jp nc,div_l12 inc a div_l02: ; bit 2, above add a,a adc hl,hl exx adc hl,hl exx sbc hl,bc exx sbc hl,bc exx jp nc,div_l03 dec a div_l13: ; bit 3, below add a,a adc hl,hl exx adc hl,hl exx add hl,bc exx adc hl,bc exx jp nc,div_l14 inc a div_l04: ; bit 4, above add a,a adc hl,hl exx adc hl,hl exx sbc hl,bc exx sbc hl,bc exx jp nc,div_l05 dec a div_l15: ; bit 5, below add a,a adc hl,hl exx adc hl,hl exx add hl,bc exx adc hl,bc exx jp nc,div_l16 inc a div_l06: ; bit 6, above add a,a adc hl,hl exx adc hl,hl exx sbc hl,bc exx sbc hl,bc exx jp nc,div_l07 dec a div_l17: ; bit 7, below add a,a adc hl,hl exx adc hl,hl exx add hl,bc exx adc hl,bc exx jp nc,div_l18 inc a div_l08: ; done, above add a,a dec a or a ret div_l1: ; bit 0, below add a,a adc hl,hl exx adc hl,hl exx add hl,bc exx adc hl,bc exx jp nc,div_l11 inc a div_l01: ; bit 1, above add a,a adc hl,hl exx adc hl,hl exx sbc hl,bc exx sbc hl,bc exx jp nc,div_l02 dec a div_l12: ; bit 2, below add a,a adc hl,hl exx adc hl,hl exx add hl,bc exx adc hl,bc exx jp nc,div_l13 inc a div_l03: ; bit 3, above add a,a adc hl,hl exx adc hl,hl exx sbc hl,bc exx sbc hl,bc exx jp nc,div_l04 dec a div_l14: ; bit 4, below add a,a adc hl,hl exx adc hl,hl exx add hl,bc exx adc hl,bc exx jp nc,div_l15 inc a div_l05: ; bit 5, above add a,a adc hl,hl exx adc hl,hl exx sbc hl,bc exx sbc hl,bc exx jp nc,div_l06 dec a div_l16: ; bit 6, below add a,a adc hl,hl exx adc hl,hl exx add hl,bc exx adc hl,bc exx jp nc,div_l17 inc a div_l07: ; bit 7, above add a,a adc hl,hl exx adc hl,hl exx sbc hl,bc exx sbc hl,bc exx jp nc,div_l08 dec a div_l18: ; done, below add a,a ;inc a ;dec a ; compensation scf ret ; version for negative divisors ; see earlier comments for the word version, this extends the concept to long div_l_ncf: jp c,div_l_n1 div_l_n0: ; bit 0, above scf rla adc hl,hl exx adc hl,hl exx or a sbc hl,bc exx sbc hl,bc exx jp c,div_l_n01 dec a div_l_n11: ; bit 1, below add a,a adc hl,hl exx adc hl,hl exx add hl,bc exx adc hl,bc exx jp c,div_l_n12 inc a div_l_n02: ; bit 2, above add a,a adc hl,hl exx adc hl,hl exx or a sbc hl,bc exx sbc hl,bc exx jp c,div_l_n03 dec a div_l_n13: ; bit 3, below add a,a adc hl,hl exx adc hl,hl exx add hl,bc exx adc hl,bc exx jp c,div_l_n14 inc a div_l_n04: ; bit 4, above add a,a adc hl,hl exx adc hl,hl exx or a sbc hl,bc exx sbc hl,bc exx jp c,div_l_n05 dec a div_l_n15: ; bit 5, below add a,a adc hl,hl exx adc hl,hl exx add hl,bc exx adc hl,bc exx jp c,div_l_n16 inc a div_l_n06: ; bit 6, above add a,a adc hl,hl exx adc hl,hl or a exx sbc hl,bc exx sbc hl,bc exx jp c,div_l_n07 dec a div_l_n17: ; bit 7, below add a,a adc hl,hl exx adc hl,hl exx add hl,bc exx adc hl,bc exx jp c,div_l_n18 inc a div_l_n08: ; done, above add a,a dec a or a ret div_l_n1: ; bit 0, below add a,a adc hl,hl exx adc hl,hl exx add hl,bc exx adc hl,bc exx jp c,div_l_n11 inc a div_l_n01: ; bit 1, above add a,a adc hl,hl exx adc hl,hl exx or a sbc hl,bc exx sbc hl,bc exx jp c,div_l_n02 dec a div_l_n12: ; bit 2, below add a,a adc hl,hl exx adc hl,hl exx add hl,bc exx adc hl,bc exx jp c,div_l_n13 inc a div_l_n03: ; bit 3, above add a,a adc hl,hl exx adc hl,hl exx or a sbc hl,bc exx sbc hl,bc exx jp c,div_l_n04 dec a div_l_n14: ; bit 4, below add a,a adc hl,hl exx adc hl,hl exx add hl,bc exx adc hl,bc exx jp c,div_l_n15 inc a div_l_n05: ; bit 5, above add a,a adc hl,hl exx adc hl,hl exx or a sbc hl,bc exx sbc hl,bc exx jp c,div_l_n06 dec a div_l_n16: ; bit 6, below add a,a adc hl,hl exx adc hl,hl exx add hl,bc exx adc hl,bc exx jp c,div_l_n17 inc a div_l_n07: ; bit 7, above add a,a adc hl,hl exx adc hl,hl exx or a sbc hl,bc exx sbc hl,bc exx jp c,div_l_n08 dec a div_l_n18: ; done, below add a,a ;inc a ;dec a ; compensation scf ret .endif ; debugging print_hlde: call print_word ld a,': call print_char ex de,hl call print_word ex de,hl ld a,0xd call print_char ld a,0xa jp print_char print_hlhldede: exx call print_word exx call print_word ld a,': call print_char exx ex de,hl call print_word ex de,hl exx ex de,hl call print_word ex de,hl ld a,0xd call print_char ld a,0xa jp print_char print_trace: ; print af, bc, hl':de, de':hl, (sp+2):(sp), sp call print_trace2 ld a,(bc) inc bc ld l,a jp (hl) print_trace2: push hl push af pop hl push hl call print_word ld a,' call print_char ld l,c ld h,b call print_word ld a,' call print_char exx push hl exx pop hl call print_word ld a,': call print_char ld l,e ld h,d call print_word ld a,' call print_char exx push de exx pop hl call print_word ld a,': call print_char pop af pop hl push hl push af call print_word ld a,' call print_char ld hl,8 add hl,sp ld a,(hl) inc hl ld h,(hl) ld l,a call print_word ld a,': call print_char ld hl,6 add hl,sp ld a,(hl) inc hl ld h,(hl) ld l,a call print_word ld a,' call print_char ld hl,6 add hl,sp call print_word ld a,0xd call print_char ld a,0xa call print_char pop af pop hl ret print_word: push af ld a,h call print_byte ld a,l call print_byte pop af ret print_byte: push af push af rrca rrca rrca rrca call print_digit pop af call print_digit pop af ret print_digit: push de push hl and 0xf ld e,a ld d,0 ld hl,digits add hl,de ld a,(hl) pop hl pop de print_char: push bc push de push hl ld e,a ld c,2 call 5 pop hl pop de pop bc ret digits: .ascii '0123456789abcdef' ; sm code sm_main: ; create stack frame .db