From f7157ca24c92b9eddd1713bb3e4efaa893922b09 Mon Sep 17 00:00:00 2001
From: ceriel <none@none>
Date: Tue, 5 Dec 1995 15:38:55 +0000
Subject: [PATCH] Copied versions of dv[iu].s from ../../m68k2/libem.

---
 mach/m68k4/libem/dvi.s | 118 ++++++++++++++++++++++++++++++-----------
 mach/m68k4/libem/dvu.s |  93 +++++++++++++++++++++++---------
 2 files changed, 154 insertions(+), 57 deletions(-)

diff --git a/mach/m68k4/libem/dvi.s b/mach/m68k4/libem/dvi.s
index 6f600e542..82470c9e6 100644
--- a/mach/m68k4/libem/dvi.s
+++ b/mach/m68k4/libem/dvi.s
@@ -5,40 +5,96 @@
 .sect .bss
 
  ! signed long divide
+ !-----------------------------------------------------------------------------
+ ! rewritten by Kai-Uwe Bloem (i5110401@dbstu1.bitnet) for speed.
+ !   #1  01/12/90  initial revision. Minor reduce of shift operations.
+ !   #2  03/07/90  use 68000 divu instruction whereever possible. This change
+ !		   makes #1 superflous. (derived from my GNU division routine)
+ !-----------------------------------------------------------------------------
+ ! Some common cases can be handled in a special, much faster way :
+ !      1) divisor = 0
+ !          => cause trap, then return to user. Result is undefined
+ !      2) dividend < divisor
+ !          => quotient = 0, remainder = dividend
+ !      3) divisor < 0x10000 ( i.e. divisor is only 16 bits wide )
+ !          => quotient and remainder can be calculated quite fast by repeated
+ !             application of 68000 divu operations (ca. 400 cycles)
+ !      4) otherwise (due to #2, #3 dividend, divisor both wider then 16 bits)
+ !          => do slow division by shift and subtract
+ !-----------------------------------------------------------------------------
+
+
+ ! register usage:
+ ! 	 : d0 divisor
+ !         d1 dividend
+ ! exit  : d1 quotient
+ !         d2 remainder
+
 	.sect .text
 .dvi:
-	move.l	4(sp),d2
-	move.l	8(sp),d1
-	move.l	d4,-(sp)
+	move.l	(sp)+,a1	! return address
+	move.l	(sp)+,d0	! divisor
+	move.l	(sp)+,d2	! dividend
+	move.l	d3,a0		! save d3
+	move.l	d4,-(sp)	! save result sign register
 	clr.l	d4
-	tst.l	d2	! divisor
-	bpl	1f
-	neg.l	d2
-	not	d4
-1:
-	tst.l	d1	! dividend
-	bpl	2f
-	neg.l	d1
-	not	d4
-	swap	d4
-	not	d4
-	swap	d4
+	tst.l	d2
+	bpl	0f		! dividend is negative ?
+	neg.l	d2		! yes - negate
+	not.l	d4		! and note negation in d4
+0:
+	tst.l	d0
+	bpl	0f		! divisor is negative ?
+	neg.l	d0		! yes - negate
+	not.w	d4		! note negation
+0:
+	clr.l	d1		! prepare quotient
+! === case 1: divisor = 0
+	tst.l	d0		! divisor = 0 ?
+	beq	9f		! yes - divide by zero trap
+! === case 2: dividend < divisor
+	cmp.l	d0,d2		! dividend < divisor ?
+	bcs	8f		! yes - division already finished
+! === case 3: divisor <= 0x0ffff
+	cmp.l	#0x0ffff,d0	! is divisor only 16 bits wide ?
+	bhi	2f
+	move.w	d2,d3		! save dividend.l
+	clr.w	d2		! prepare dividend.h for divu operation
+	swap	d2
+	beq	0f		! dividend.h is all zero, no divu necessary
+	divu	d0,d2
+0:	move.w	d2,d1		! save quotient.h
+	swap	d1
+	move.w	d3,d2		! divide dividend.l
+	divu	d0,d2		! (d2.h = remainder of prev divu)
+	move.w	d2,d1		! save qoutient.l
+	clr.w	d2		! get remainder
+	swap	d2
+	bra	8f
+! === case 4: divisor and dividend both > 0x0ffff
 2:
-	move.l	d1,-(sp)
-	move.l	d2,-(sp)
-	jsr	.dvu
-	tst	d4
-	beq	5f
-	neg.l	d1	! quotient
+	move	#32-1,d3	! loop count
+4:
+	lsl.l	#1,d2		! shift dividend ...
+	roxl.l	#1,d1		!  ... into d1
+	cmp.l	d0,d1		! compare with divisor
+	bcs	5f
+	sub.l	d0,d1		! bigger, subtract divisor
+	add	#1,d2		! note subtraction in result
 5:
-	tst.l	d4
-	bpl	6f
-	neg.l	d0	! remainder
-6:
-	move.l	(sp)+,d4
-	move.l	(sp)+,a0
-	add.l	#8,sp
-	move.l	a0,-(sp)
-	rts
+	dbra	d3,4b
+	exg	d1,d2		! get results in the correct registers
+8:
+	tst.w	d4		! quotient < 0 ?
+	bpl	0f
+	neg.l	d1		! yes - negate
+0:	tst.l	d4		! remainder < 0 ?
+	bpl	0f
+	neg.l	d2
+0:	move.l	(sp)+,d4	! restore d4
+	move.l	a0,d3		! restore d3
+	jmp	(a1)
 
-.align 2
+EIDIVZ	= 6
+9:	move.w	#EIDIVZ,-(sp)
+	jsr	.trp
diff --git a/mach/m68k4/libem/dvu.s b/mach/m68k4/libem/dvu.s
index 902cd5ab7..005c351e6 100644
--- a/mach/m68k4/libem/dvu.s
+++ b/mach/m68k4/libem/dvu.s
@@ -5,36 +5,77 @@
 .sect .bss
 
  ! unsigned long divide
+ !-----------------------------------------------------------------------------
+ ! rewritten by Kai-Uwe Bloem (i5110401@dbstu1.bitnet) for speed.
+ !   #1  01/12/90  initial revision. Minor reduce of shift operations.
+ !   #2  03/07/90  use 68000 divu instruction whereever possible. This change
+ !		   makes #1 superflous. (derived from my GNU division routine)
+ !-----------------------------------------------------------------------------
+ ! Some common cases can be handled in a special, much faster way :
+ !      1) divisor = 0
+ !          => cause trap, then return to user. Result is undefined
+ !      2) dividend < divisor
+ !          => quotient = 0, remainder = dividend
+ !      3) divisor < 0x10000 ( i.e. divisor is only 16 bits wide )
+ !          => quotient and remainder can be calculated quite fast by repeated
+ !             application of 68000 divu operations (ca. 400 cycles)
+ !      4) otherwise (due to #2, #3 dividend, divisor both wider then 16 bits)
+ !          => do slow division by shift and subtract
+ !-----------------------------------------------------------------------------
+
+
  ! register usage:
- ! 	 : d2 divisor
+ ! 	 : d0 divisor
  !         d1 dividend
  ! exit  : d1 quotient
- !         d0 remainder
+ !         d2 remainder
+
 	.sect .text
 .dvu:
-	move.l	4(sp),d2
-	move.l	8(sp),d1
-	move.l	d3,-(sp)
-	tst.l	d2
-	bne	0f
-	move.l	#EIDIVZ,-(sp)
-	jsr	.trp
-0:
-	clr.l	d0
-	move.l	#31,d3
-3:
-	lsl.l	#1,d1
-	roxl.l	#1,d0
-	cmp.l	d2,d0
-	blt	4f
-	sub.l	d2,d0
-	add	#1,d1
+	move.l	d3,a0		! save d3
+	move.l	(sp)+,a1	! return address
+	move.l	(sp)+,d0	! divisor
+	move.l	(sp)+,d2	! dividend
+	clr.l	d1		! prepare quotient
+! === case 1: divisor = 0
+	tst.l	d0		! divisor = 0 ?
+	beq	9f		! yes - divide by zero trap
+! === case 2: dividend < divisor
+	cmp.l	d0,d2		! dividend < divisor ?
+	bcs	8f		! yes - division already finished
+! === case 3: divisor <= 0x0ffff
+	cmp.l	#0x0ffff,d0	! is divisor only 16 bits wide ?
+	bhi	2f
+	move.w	d2,d3		! save dividend.l
+	clr.w	d2		! prepare dividend.h for divu operation
+	swap	d2
+	beq	0f		! dividend.h is all zero, no divu necessary
+	divu	d0,d2
+0:	move.w	d2,d1		! save quotient.h
+	swap	d1
+	move.w	d3,d2		! divide dividend.l
+	divu	d0,d2		! (d2.h = remainder of prev divu)
+	move.w	d2,d1		! save qoutient.l
+	clr.w	d2		! get remainder
+	swap	d2
+	bra	8f
+! === case 4: divisor and dividend both > 0x0ffff
+2:
+	move	#32-1,d3	! loop count
 4:
-	dbf	d3,3b
-	move.l	(sp)+,d3
-	move.l	(sp)+,a0
-	add.l	#8,sp
-	move.l	a0,-(sp)
-	rts
+	lsl.l	#1,d2		! shift dividend ...
+	roxl.l	#1,d1		!  ... into d1
+	cmp.l	d0,d1		! compare with divisor
+	bcs	5f
+	sub.l	d0,d1		! bigger, subtract divisor
+	add	#1,d2		! note subtraction in result
+5:
+	dbra	d3,4b
+	exg	d1,d2		! get results in the correct registers
+8:
+	move.l	a0,d3		! restore d3
+	jmp	(a1)
 
-.align 2
+EIDIVZ	= 6
+9:	move.w	#EIDIVZ,-(sp)
+	jsr	.trp
-- 
2.34.1