From f7157ca24c92b9eddd1713bb3e4efaa893922b09 Mon Sep 17 00:00:00 2001 From: ceriel Date: Tue, 5 Dec 1995 15:38:55 +0000 Subject: [PATCH] Copied versions of dv[iu].s from ../../m68k2/libem. --- mach/m68k4/libem/dvi.s | 118 ++++++++++++++++++++++++++++++----------- mach/m68k4/libem/dvu.s | 93 +++++++++++++++++++++++--------- 2 files changed, 154 insertions(+), 57 deletions(-) diff --git a/mach/m68k4/libem/dvi.s b/mach/m68k4/libem/dvi.s index 6f600e542..82470c9e6 100644 --- a/mach/m68k4/libem/dvi.s +++ b/mach/m68k4/libem/dvi.s @@ -5,40 +5,96 @@ .sect .bss ! signed long divide + !----------------------------------------------------------------------------- + ! rewritten by Kai-Uwe Bloem (i5110401@dbstu1.bitnet) for speed. + ! #1 01/12/90 initial revision. Minor reduce of shift operations. + ! #2 03/07/90 use 68000 divu instruction whereever possible. This change + ! makes #1 superflous. (derived from my GNU division routine) + !----------------------------------------------------------------------------- + ! Some common cases can be handled in a special, much faster way : + ! 1) divisor = 0 + ! => cause trap, then return to user. Result is undefined + ! 2) dividend < divisor + ! => quotient = 0, remainder = dividend + ! 3) divisor < 0x10000 ( i.e. divisor is only 16 bits wide ) + ! => quotient and remainder can be calculated quite fast by repeated + ! application of 68000 divu operations (ca. 400 cycles) + ! 4) otherwise (due to #2, #3 dividend, divisor both wider then 16 bits) + ! => do slow division by shift and subtract + !----------------------------------------------------------------------------- + + + ! register usage: + ! : d0 divisor + ! d1 dividend + ! exit : d1 quotient + ! d2 remainder + .sect .text .dvi: - move.l 4(sp),d2 - move.l 8(sp),d1 - move.l d4,-(sp) + move.l (sp)+,a1 ! return address + move.l (sp)+,d0 ! divisor + move.l (sp)+,d2 ! dividend + move.l d3,a0 ! save d3 + move.l d4,-(sp) ! save result sign register clr.l d4 - tst.l d2 ! divisor - bpl 1f - neg.l d2 - not d4 -1: - tst.l d1 ! dividend - bpl 2f - neg.l d1 - not d4 - swap d4 - not d4 - swap d4 + tst.l d2 + bpl 0f ! dividend is negative ? + neg.l d2 ! yes - negate + not.l d4 ! and note negation in d4 +0: + tst.l d0 + bpl 0f ! divisor is negative ? + neg.l d0 ! yes - negate + not.w d4 ! note negation +0: + clr.l d1 ! prepare quotient +! === case 1: divisor = 0 + tst.l d0 ! divisor = 0 ? + beq 9f ! yes - divide by zero trap +! === case 2: dividend < divisor + cmp.l d0,d2 ! dividend < divisor ? + bcs 8f ! yes - division already finished +! === case 3: divisor <= 0x0ffff + cmp.l #0x0ffff,d0 ! is divisor only 16 bits wide ? + bhi 2f + move.w d2,d3 ! save dividend.l + clr.w d2 ! prepare dividend.h for divu operation + swap d2 + beq 0f ! dividend.h is all zero, no divu necessary + divu d0,d2 +0: move.w d2,d1 ! save quotient.h + swap d1 + move.w d3,d2 ! divide dividend.l + divu d0,d2 ! (d2.h = remainder of prev divu) + move.w d2,d1 ! save qoutient.l + clr.w d2 ! get remainder + swap d2 + bra 8f +! === case 4: divisor and dividend both > 0x0ffff 2: - move.l d1,-(sp) - move.l d2,-(sp) - jsr .dvu - tst d4 - beq 5f - neg.l d1 ! quotient + move #32-1,d3 ! loop count +4: + lsl.l #1,d2 ! shift dividend ... + roxl.l #1,d1 ! ... into d1 + cmp.l d0,d1 ! compare with divisor + bcs 5f + sub.l d0,d1 ! bigger, subtract divisor + add #1,d2 ! note subtraction in result 5: - tst.l d4 - bpl 6f - neg.l d0 ! remainder -6: - move.l (sp)+,d4 - move.l (sp)+,a0 - add.l #8,sp - move.l a0,-(sp) - rts + dbra d3,4b + exg d1,d2 ! get results in the correct registers +8: + tst.w d4 ! quotient < 0 ? + bpl 0f + neg.l d1 ! yes - negate +0: tst.l d4 ! remainder < 0 ? + bpl 0f + neg.l d2 +0: move.l (sp)+,d4 ! restore d4 + move.l a0,d3 ! restore d3 + jmp (a1) -.align 2 +EIDIVZ = 6 +9: move.w #EIDIVZ,-(sp) + jsr .trp diff --git a/mach/m68k4/libem/dvu.s b/mach/m68k4/libem/dvu.s index 902cd5ab7..005c351e6 100644 --- a/mach/m68k4/libem/dvu.s +++ b/mach/m68k4/libem/dvu.s @@ -5,36 +5,77 @@ .sect .bss ! unsigned long divide + !----------------------------------------------------------------------------- + ! rewritten by Kai-Uwe Bloem (i5110401@dbstu1.bitnet) for speed. + ! #1 01/12/90 initial revision. Minor reduce of shift operations. + ! #2 03/07/90 use 68000 divu instruction whereever possible. This change + ! makes #1 superflous. (derived from my GNU division routine) + !----------------------------------------------------------------------------- + ! Some common cases can be handled in a special, much faster way : + ! 1) divisor = 0 + ! => cause trap, then return to user. Result is undefined + ! 2) dividend < divisor + ! => quotient = 0, remainder = dividend + ! 3) divisor < 0x10000 ( i.e. divisor is only 16 bits wide ) + ! => quotient and remainder can be calculated quite fast by repeated + ! application of 68000 divu operations (ca. 400 cycles) + ! 4) otherwise (due to #2, #3 dividend, divisor both wider then 16 bits) + ! => do slow division by shift and subtract + !----------------------------------------------------------------------------- + + ! register usage: - ! : d2 divisor + ! : d0 divisor ! d1 dividend ! exit : d1 quotient - ! d0 remainder + ! d2 remainder + .sect .text .dvu: - move.l 4(sp),d2 - move.l 8(sp),d1 - move.l d3,-(sp) - tst.l d2 - bne 0f - move.l #EIDIVZ,-(sp) - jsr .trp -0: - clr.l d0 - move.l #31,d3 -3: - lsl.l #1,d1 - roxl.l #1,d0 - cmp.l d2,d0 - blt 4f - sub.l d2,d0 - add #1,d1 + move.l d3,a0 ! save d3 + move.l (sp)+,a1 ! return address + move.l (sp)+,d0 ! divisor + move.l (sp)+,d2 ! dividend + clr.l d1 ! prepare quotient +! === case 1: divisor = 0 + tst.l d0 ! divisor = 0 ? + beq 9f ! yes - divide by zero trap +! === case 2: dividend < divisor + cmp.l d0,d2 ! dividend < divisor ? + bcs 8f ! yes - division already finished +! === case 3: divisor <= 0x0ffff + cmp.l #0x0ffff,d0 ! is divisor only 16 bits wide ? + bhi 2f + move.w d2,d3 ! save dividend.l + clr.w d2 ! prepare dividend.h for divu operation + swap d2 + beq 0f ! dividend.h is all zero, no divu necessary + divu d0,d2 +0: move.w d2,d1 ! save quotient.h + swap d1 + move.w d3,d2 ! divide dividend.l + divu d0,d2 ! (d2.h = remainder of prev divu) + move.w d2,d1 ! save qoutient.l + clr.w d2 ! get remainder + swap d2 + bra 8f +! === case 4: divisor and dividend both > 0x0ffff +2: + move #32-1,d3 ! loop count 4: - dbf d3,3b - move.l (sp)+,d3 - move.l (sp)+,a0 - add.l #8,sp - move.l a0,-(sp) - rts + lsl.l #1,d2 ! shift dividend ... + roxl.l #1,d1 ! ... into d1 + cmp.l d0,d1 ! compare with divisor + bcs 5f + sub.l d0,d1 ! bigger, subtract divisor + add #1,d2 ! note subtraction in result +5: + dbra d3,4b + exg d1,d2 ! get results in the correct registers +8: + move.l a0,d3 ! restore d3 + jmp (a1) -.align 2 +EIDIVZ = 6 +9: move.w #EIDIVZ,-(sp) + jsr .trp -- 2.34.1