111 ld bc,restarts_end - restarts
124 ; rst 0x28, immediate to hl
133 ; rst 0x30, pop hl:de'
142 ; rst 0x38, immediate to de':hl
163 ; stack and control transfer
165 .org page0 * 0x100 - 18
188 ; page 0 administrative
296 ; word arithmetic operations
297 ; top stack word cached in de
299 .org page1 * 0x100 - 13
317 ; page 1 administrative
443 ; use addition for page1_imm_sub_w
457 jr add_w_done ;mul_w_done
465 jr add_w_done ;mul_w_done
613 ; long arithmetic operations
614 ; top stack long cached in de:hl'
616 .org page2 * 0x100 - 13
634 ; page 2 administrative
744 add_l_entry: ; optimize this
761 sub_l_entry: ; optimize this
770 ; use addition for page2_imm_sub_l
935 ; lightweight routines
937 ; these can be duplicated without much cost, and will be called often, so the
938 ; calling convention is geared to what the interpreter needs (de:hl' and so on)
940 math_imm_w: ; immediate to de
954 math_imm_l: ; immediate to hl':de
978 math_stkld_w: ; sp(imm_w) to de
986 ; use inline code for math_ld_w
997 math_stkld_l: ; sp(imm_w) to de:hl'
1005 math_ld_l: ; (hl) to hl':de
1026 math_stkst_w: ; de to sp(imm_w)
1044 math_stkst_l: ; de:hl' to sp(imm_w)
1052 math_st_l: ; hl':de to (hl)
1090 math_and_imm_w: ; de &= imm_w
1106 math_and_imm_l: ; hl':de &= imm_l
1134 math_and_w: ; de &= hl
1153 math_and_l: ; hl':de &= de':hl
1175 math_or_imm_w: ; de |= imm_w
1191 math_or_imm_l: ; hl':de |= imm_l
1219 math_or_w: ; de |= hl
1238 math_or_l: ; hl':de |= de':hl
1260 math_xor_imm_w: ; de ^= imm_w
1276 math_xor_imm_l: ; hl':de ^= imm_l
1304 math_xor_w: ; de ^= hl
1323 math_xor_l: ; hl':de ^= de':hl
1345 math_cmp_sw: ; cf=1 de < hl, zf=1 de == hl, signed
1347 math_cmprev_sw: ; cf=1 hl < de, zf=1 hl == de, signed
1362 ; use inline code for math_cmp_uw, math_cmprev_uw
1369 math_cmp_sl: ; cf=1 hl':de < de':hl, zf=1 hl':de == de':hl, signed
1392 math_cmp_ul: ; cf=1 hl':de < de':hl, zf=1 hl':de == de':hl, unsigned
1398 jr nz,cmp_l_dispatch
1415 math_cmprev_sl: ; cf=1 de':hl < hl':de, zf=1 de':hl == hl':de, signed
1439 math_cmprev_ul: ; cf=1 de':hl < hl':de, zf=1 de':hl == hl':de, signed
1446 jr nz,cmprev_l_dispatch
1457 ; heavyweight routines
1459 ; these have unrolled loops and so there needs to be as much reuse as possible
1461 ; for the multiply and divide the unrolled loops are placed in subroutines, so
1462 ; there is some overhead to use them, but it allows the calling code to itself
1463 ; be cheaply unrolled, so the amount left to do is determined by context not
1464 ; by a counter; the tradeoff is do we count loops by push bc/pop bc/djnz or by
1465 ; call/call/call, since both need a stack push/pop it should be about the same
1467 ; for these routines the calling convention is geared to whatever the routine
1468 ; needs to work most efficiently, this makes the usage more cumbersome as you
1469 ; to do something like ex de,hl/call/ex de,hl but it is done this way because
1470 ; otherwise some callsites need to exchange registers into place on one side
1471 ; of a call/ret boundary only for them to be immediately exchanged back again
1473 ; as an exception to this we may provide an earlier entry point(s), before the
1474 ; routine proper, which exchanges arguments into place (the caller must still
1475 ; deal with exchanges afterwards), we do this because of callsites that are in
1476 ; the interpreter pages and need to conserve code size, therefore the prefix
1477 ; chosen need not occur more than once, and we'll use the longest such prefix
1479 ; while some of these earlier entry points have a meaning, e.g. math_divrev_l
1480 ; before math_div_l, many of them do not, so we will just number them instead
1481 ; (this is because the prefix code can be a compromise between callers' needs)
1483 math_sl_w0: ; hl = de << (l & 0xf)
1485 math_sl_w: ; hl <<= e & 0xf
1509 math_sl_l0: ; de':hl <<= e & 0x1f
1513 math_sl_l: ; hl':hl <<= e & 0x1f
1571 math_sr_uw0: ; hl = de >> (l & 0xf), logical
1573 math_sr_uw: ; hl >>= e & 0xf, logical
1577 math_sr_uw1: ; hl >>= a - 7, immediate a in [7, 0x17), arithmetic
1582 math_sr_sw0: ; hl = de >> (l & 0xf), arithmetic
1584 math_sr_sw: ; hl >>= e & 0xf, arithmetic
1588 math_sr_sw1: ; hl >>= a - 7, immediate a in [7, 0x17), arithmetic
1623 4$: ; by 16 (can't occur simultaneously with by 8)
1632 math_sr_ul0: ; hl:de' >>= e & 0x1f, logical
1636 math_sr_ul: ; hl:hl' >>= e & 0x1f, logical
1640 math_sr_ul1: ; hl:hl' >>= a - 7, immediate a in [7, 0x27), logical
1645 math_sr_sl0: ; de':hl >>= e & 0x1f, arithmetic
1649 math_sr_sl: ; hl':hl >>= e & 0x1f, arithmetic
1653 math_sr_sl1: ; hl':hl >>= a - 7, immediate a in [7, 0x27), arithmetic
1730 5$: ; by 32 (can't occur simultaneously with by 16)
1743 ; smul: short multiplication
1744 ; 16 * 16 to 16 bit product (word)
1745 ; 32 * 32 to 32 bit product (long)
1746 ; mul: long multiplication
1747 ; 16 + 16 * 16 to 32 bit product (word)
1748 ; 32 + 32 * 32 to 64 bit product (long)
1750 ; smul is implemented in a more optimal way that uses only left shifts,
1751 ; since left shifts are cheaper on the z80, this works for smul because
1752 ; there is no need to worry about propagating carries into high result
1754 ; mul has the ability to initialize the product with some nonzero value,
1755 ; which smul doesn't have because it only shifts zeros in from the left,
1756 ; using this ability the long multiplication reverses the long division
1757 ; (initialize product with a remainder, then add in quotient * divisor)
1759 math_smul_w0: ; hl *= de
1762 math_smul_w: ; hl = bc * de
1808 math_smul_l0: ; hl':hl *= de':de
1815 math_smul_l: ; hl':hl = de':de * bc':bc
1917 math_mul_sw0: ; hl:de = hl * de, signed
1921 math_mul_sw: ; hl:de = hl + bc * de, signed
1923 rla ; cf will be preserved through to the last rra below
1928 call mul_uw1 ; do only 7 bits, get sign of d into cf
1943 math_mul_uw0: ; hl:de = hl * de, unsigned
1947 math_mul_uw: ; hl:de = hl + bc * de, unsigned
2007 ; sdiv: short division
2008 ; 16 / 16 to 16 bit quotient, 16 bit remainder (word)
2009 ; 32 / 32 to 32 bit quotient, 32 bit remainder (long)
2010 ; div: long division
2011 ; 32 / 16 to 16 bit quotient, 16 bit remainder (word)
2012 ; 64 / 32 to 32 bit quotient, 32 bit remainder (long)
2014 ; sdiv is implemented as sign/zero extension then div
2016 math_sdiv_sw0: ; hl, de = hl % de, hl / de, signed
2018 math_sdiv_sw: ; hl, de = de % hl, de / hl, signed
2026 math_div_sw: ; hl, de = hl:de % bc, hl:de / bc, signed
2031 jp m,div_w_n ; positive dividend
2035 jr nc,div_w_pp ; positive dividend, positive divisor
2037 ; positive dividend, negative divisor
2050 dec de ; reduces remainder by 1 (we inc later)
2052 jr c,div_w_nn ; negative dividend, negative divisor
2054 ; negative dividend, positive divisor
2061 inc hl ; get into range -divisor+1..0
2066 div_w_nn: ; negative dividend, negative divisor
2072 inc hl ; get into range divisor+1..0
2077 math_sdiv_uw0: ; hl, de = hl % de, hl / de, unsigned
2079 math_sdiv_uw: ; hl, de = de % hl, de / hl, unsigned
2083 math_div_uw: ; hl, de = hl:de % bc, hl:de / bc, unsigned
2085 div_w_pp: ; positive dividend, positive divisor
2095 ; non-restoring division routine
2097 ; de = divisor, hl:a = dividend with hl = previous remainder, a = next byte
2098 ; enter at div0 with positive remainder in hl, such that hl < de
2099 ; enter at div1 with negative remainder in hl, such that hl >= -de
2101 ; div0/1 return a = 8-bit quotient as an odd number interpreted as -ff..ff,
2102 ; by summing positive/negative place values, e.g. -80 +40 +20 -10 +8 -4 -2 +1
2104 ; if entered at div0, there is a -80 and so quotient is in range -ff..-1
2105 ; if entered at div1, there is a +80 and so quotient is in range 1..ff
2106 ; falls out of loop after div01 with positive remainder, div11 with negative,
2107 ; depending on this we should re-enter at div0 or div1, signalled by cf return
2109 ; the successive quotient bytes can be concatenated into a full quotient,
2110 ; but negative bytes require the next higher quotient byte to be decremented,
2111 ; we know in advance if this will happen because the implied sign of the
2112 ; quotient byte depends only on whether we entered at div0 or div1, hence,
2113 ; before the div11 return we'll decrement to compensate for next negative byte
2115 ; the decrement can also be seen as compensating for the extra add hl,de that
2116 ; may be needed to make negative remainder positive before return to caller,
2117 ; thus leaving quotient in a consistent state regardless of which exit taken,
2118 ; remainder needs the add hl,de if cf=1 returned (equiv. return byte is even)
2120 ; in the following code each sbc hl,de gets an inc a and each add hl,de gets
2121 ; a dec a, guaranteeing the integrity of the division, the initial scf/rla is
2122 ; needed to make the result 100 + -ff..ff or 1..1ff, so that the decrements
2123 ; cannot borrow into the upcoming dividend bits also held in a, and there must
2124 ; be another shift between the scf/rla and increment/decrement so that the scf
2125 ; is implicitly in the 100s place, making the code awkward though it's correct
2127 ; now optimized to only inc/dec a when doing zero-crossing, fix above analysis
2131 div_w0: ; bit 0, above
2138 div_w11: ; bit 1, below
2144 div_w02: ; bit 2, above
2150 div_w13: ; bit 3, below
2156 div_w04: ; bit 4, above
2162 div_w15: ; bit 5, below
2168 div_w06: ; bit 6, above
2174 div_w17: ; bit 7, below
2180 div_w08: ; done, above
2186 div_w1: ; bit 0, below
2192 div_w01: ; bit 1, above
2198 div_w12: ; bit 2, below
2204 div_w03: ; bit 3, above
2210 div_w14: ; bit 4, below
2216 div_w05: ; bit 5, above
2222 div_w16: ; bit 6, below
2228 div_w07: ; bit 7, above
2234 div_w18: ; done, below
2237 ;bcc a ; compensation
2241 ; divn0/1 are the same as div0/1 but carry reversed after add/subtract divisor
2242 ; this is for negative divisors where we expect carry (means no zero crossing)
2244 ; when divisor negated, remainbcr also negated, so we expect to do subtraction
2245 ; when remainbcr negative and vice versa, need to clear carry after add hl,hl
2249 div_w_n0: ; bit 0, above
2257 div_w_n11: ; bit 1, below
2263 div_w_n02: ; bit 2, above
2270 div_w_n13: ; bit 3, below
2276 div_w_n04: ; bit 4, above
2283 div_w_n15: ; bit 5, below
2289 div_w_n06: ; bit 6, above
2296 div_w_n17: ; bit 7, below
2302 div_w_n08: ; done, above
2308 div_w_n1: ; bit 0, below
2314 div_w_n01: ; bit 1, above
2321 div_w_n12: ; bit 2, below
2327 div_w_n03: ; bit 3, above
2334 div_w_n14: ; bit 4, below
2340 div_w_n05: ; bit 5, above
2347 div_w_n16: ; bit 6, below
2353 div_w_n07: ; bit 7, above
2360 div_w_n18: ; done, below
2363 ;bcc a ; compensation
2367 math_div_sl0: ; hl':hl, de':de = hl':de % de':hl, hl':de / de':hl, signed
2371 math_div_sl1: ; ; hl':hl, de':de = de':de % hl':hl, de':de / hl':hl, signed
2387 ; hl':hl:de':de % bc':bc, hl':hl:de':de / bc':bc, signed
2394 jp m,div_l_n ; positive dividend
2400 jr nc,div_l_pp ; positive dividend, positive divisor
2402 ; positive dividend, negative divisor
2431 dec de ; reduces remainder by 1 (we inc later)
2437 jr c,div_l_nn ; negative dividend, negative divisor
2439 ; negative dividend, positive divisor
2461 2$: inc hl ; get into range divisor+1..0
2470 math_div_ul0: ; hl':hl, de':de = hl':de % de':hl, hl':de / de':hl, unsigned
2474 math_div_ul1: ; ; hl':hl, de':de = de':de % hl':hl, de':de / hl':hl, unsigned
2488 ; hl':hl:de':de % bc':bc, hl':hl:de':de / bc':bc, unsigned
2492 div_l_pp: ; positive dividend, positive divisor
2515 div_l_nn: ; negative dividend, negative divisor
2536 1$: inc hl ; get into range divisor+1..0
2545 ; non-restoring division routine
2546 ; see earlier comments for the word version, this extends the concept to long
2548 ; changed all jr to jp, revisit this
2552 div_l0: ; bit 0, above
2565 div_l11: ; bit 1, below
2577 div_l02: ; bit 2, above
2589 div_l13: ; bit 3, below
2601 div_l04: ; bit 4, above
2613 div_l15: ; bit 5, below
2625 div_l06: ; bit 6, above
2637 div_l17: ; bit 7, below
2649 div_l08: ; done, above
2655 div_l1: ; bit 0, below
2667 div_l01: ; bit 1, above
2679 div_l12: ; bit 2, below
2691 div_l03: ; bit 3, above
2703 div_l14: ; bit 4, below
2715 div_l05: ; bit 5, above
2727 div_l16: ; bit 6, below
2739 div_l07: ; bit 7, above
2751 div_l18: ; done, below
2754 ;dec a ; compensation
2758 ; version for negative divisors
2759 ; see earlier comments for the word version, this extends the concept to long
2763 div_l_n0: ; bit 0, above
2777 div_l_n11: ; bit 1, below
2789 div_l_n02: ; bit 2, above
2802 div_l_n13: ; bit 3, below
2814 div_l_n04: ; bit 4, above
2827 div_l_n15: ; bit 5, below
2839 div_l_n06: ; bit 6, above
2852 div_l_n17: ; bit 7, below
2864 div_l_n08: ; done, above
2870 div_l_n1: ; bit 0, below
2882 div_l_n01: ; bit 1, above
2895 div_l_n12: ; bit 2, below
2907 div_l_n03: ; bit 3, above
2920 div_l_n14: ; bit 4, below
2932 div_l_n05: ; bit 5, above
2945 div_l_n16: ; bit 6, below
2957 div_l_n07: ; bit 7, above
2970 div_l_n18: ; done, below
2973 ;dec a ; compensation
2991 print_trace: ; print af, bc, hl':de, de':hl, (sp+2):(sp), sp
3111 .ascii '0123456789abcdef'
3116 ; create stack frame
3124 ; push result pointer
3129 ; call sm_factorial(argument)
3145 .db <page1_imm_cmp_sw
3160 .db <page1_imm_add_w
3164 ; divide by place value
3167 ; replace current value with remainder
3171 ; print quotient plus '0
3173 .db <page1_imm_add_w
3183 .db <page1_imm_add_w
3209 ; enlarge stack frame
3217 ; push result pointer
3222 ; call sm_factorial(argument)
3238 .db <page1_imm_cmp_sw
3253 .db <page1_imm_add_w
3257 ; divide by place value
3260 ; replace current value with remainder
3264 ; print quotient plus '0
3266 .db <page1_imm_add_w
3271 .dw 4 ; cheating -- kill hi word of long too
3276 .db <page1_imm_add_w
3302 ; destroy stack frame
3310 .dw 10000,1000,100,10,1
3312 .dw 0xca00,0x3b9a ; 1000000000
3313 .dw 0xe100,0x5f5 ; 100000000
3314 .dw 0x9680,0x98 ; 10000000
3315 .dw 0x4240,0xf ; 1000000
3316 .dw 0x86a0,1 ; 100000
3329 .db <page1_imm_cmp_sw
3334 ; no, set up for *result =
3344 .db <page1_imm_add_w
3347 ; push result pointer
3352 ; call sm_factorial(argument - 1)
3365 ; set *result = sm_factorial(argument - 1) * argument
3372 ; yes, set up for *result =
3391 .db <page2_imm_cmp_sl
3397 ; no, set up for *result =
3407 .db <page2_imm_add_l
3410 ; push result pointer
3415 ; call sm_factorial(argument - 1)
3428 ; set *result = sm_factorial(argument - 1) * argument
3435 ; yes, set up for *result =