From 0e2861d8b08411dbf2239b672db19eadf56a1d30 Mon Sep 17 00:00:00 2001
From: ceriel <none@none>
Date: Wed, 29 Nov 1989 09:49:35 +0000
Subject: [PATCH] Added 80[23]87 support

---
 mach/i386/as/mach1.c     |  28 +--
 mach/i386/as/mach2.c     |  15 +-
 mach/i386/as/mach3.c     | 118 ++++++++++-
 mach/i386/as/mach4.c     |  45 ++++-
 mach/i386/libem/LIST     |   2 +-
 mach/i386/libem/fp8087.s | 414 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 599 insertions(+), 23 deletions(-)
 create mode 100644 mach/i386/libem/fp8087.s

diff --git a/mach/i386/as/mach1.c b/mach/i386/as/mach1.c
index 961c0dde5..910a3b93d 100644
--- a/mach/i386/as/mach1.c
+++ b/mach/i386/as/mach1.c
@@ -8,12 +8,16 @@
  * INTEL 80386 C declarations
  */
 
-#define	low6(z)		(z & 077)
-#define	fit6(z)		(low6(z) == z)
+#define low6(z)		(z & 077)
+#define fit6(z)		(low6(z) == z)
+#define low3(z)		(z & 07)
+#define fit3(z)		(low3(z) == z)
+
+#define FESC	0xD8		/* escape for 80[23]87 processor */
 
 #define ufitb(z)	((unsigned)(z) <= 255)
 
-#define	IS_R8		0x100
+#define IS_R8		0x100
 #define IS_R32		0x200
 #define IS_EXPR		0x400
 #define IS_RSEG		0x800
@@ -33,15 +37,15 @@ struct operand {
 
 extern struct operand	op_1, op_2;
 
-#define	mod_1	op_1.mod
-#define	mod_2	op_2.mod
-#define	rm_1	op_1.rm
-#define	rm_2	op_2.rm
-#define	reg_1	op_1.reg
-#define	reg_2	op_2.reg
-#define	sib_1	op_1.sib
-#define	sib_2	op_2.sib
-#define	exp_1	op_1.exp
+#define mod_1	op_1.mod
+#define mod_2	op_2.mod
+#define rm_1	op_1.rm
+#define rm_2	op_2.rm
+#define reg_1	op_1.reg
+#define reg_2	op_2.reg
+#define sib_1	op_1.sib
+#define sib_2	op_2.sib
+#define exp_1	op_1.exp
 #define exp_2	op_2.exp
 
 #ifdef RELOCATION
diff --git a/mach/i386/as/mach2.c b/mach/i386/as/mach2.c
index 303e648c2..ad099afd8 100644
--- a/mach/i386/as/mach2.c
+++ b/mach/i386/as/mach2.c
@@ -17,8 +17,8 @@
 %token <y_word> RSYSTR
 %token <y_word> PREFIX
 %token <y_word> ADDOP
-%token <y_word>	BITTEST
-%token <y_word>	BOUND
+%token <y_word> BITTEST
+%token <y_word> BOUND
 %token <y_word> CALFOP
 %token <y_word> CALLOP
 %token <y_word> ENTER
@@ -45,3 +45,14 @@
 %token <y_word> SETCC
 %token <y_word> TEST
 %token <y_word> XCHG
+
+/* Intel 80[23]87 coprocessor tokens */
+%token <y_word> FNOOP
+%token <y_word> FMEM
+%token <y_word> FMEM_AX
+%token <y_word> FST_I
+%token <y_word> FST_ST
+%token <y_word> FST_ST2
+%token <y_word> ST
+
+%type <y_valu> st_i
diff --git a/mach/i386/as/mach3.c b/mach/i386/as/mach3.c
index d6d89b602..70a61e1e9 100644
--- a/mach/i386/as/mach3.c
+++ b/mach/i386/as/mach3.c
@@ -161,7 +161,7 @@
 0,	NOOP_1,		0157,		"outs",
 0,	NOOP_1,		0220,		"nop",
 0,	NOOP_1,		0230,		"cbw",
-0,	NOOP_1,		0230,		"cwde",	/* same opcode as cbw! */
+0,	NOOP_1,		0230,		"cwde", /* same opcode as cbw! */
 0,	NOOP_1,		0231,		"cdq",	/* same opcode as cwd! */
 0,	NOOP_1,		0231,		"cwd",
 0,	NOOP_1,		0233,		"wait",
@@ -279,3 +279,119 @@
 0,	TEST,		1,		"test",
 0,	XCHG,		0,		"xchgb",
 0,	XCHG,		1,		"xchg",
+
+/* Intel 80[23]87 coprocessor keywords */
+
+0,	ST,		0,			"st",
+
+0,	FNOOP,		FESC+1+(0xF0<<8),	"f2xm1",
+0,	FNOOP,		FESC+1+(0xE1<<8),	"fabs",
+0,	FNOOP,		FESC+1+(0xE0<<8),	"fchs",
+0,	FNOOP,		FESC+3+(0xE2<<8),	"fclex",
+0,	FNOOP,		FESC+6+(0xD9<<8),	"fcompp",
+0,	FNOOP,		FESC+1+(0xF6<<8),	"fdecstp",
+0,	FNOOP,		FESC+3+(0xE1<<8),	"fdisi",
+0,	FNOOP,		FESC+3+(0xE0<<8),	"feni",
+0,	FNOOP,		FESC+1+(0xF7<<8),	"fincstp",
+0,	FNOOP,		FESC+3+(0xE3<<8),	"finit",
+0,	FNOOP,		FESC+1+(0xE8<<8),	"fld1",
+0,	FNOOP,		FESC+1+(0xEA<<8),	"fldl2e",
+0,	FNOOP,		FESC+1+(0xE9<<8),	"fldl2t",
+0,	FNOOP,		FESC+1+(0xEC<<8),	"fldlg2",
+0,	FNOOP,		FESC+1+(0xED<<8),	"fldln2",
+0,	FNOOP,		FESC+1+(0xEB<<8),	"fldpi",
+0,	FNOOP,		FESC+1+(0xEE<<8),	"fldz",
+0,	FNOOP,		FESC+1+(0xD0<<8),	"fnop",
+0,	FNOOP,		FESC+1+(0xF3<<8),	"fpatan",
+0,	FNOOP,		FESC+1+(0xFF<<8),	"fcos",
+0,	FNOOP,		FESC+1+(0xFE<<8),	"fsin",
+0,	FNOOP,		FESC+1+(0xFB<<8),	"fsincos",
+0,	FNOOP,		FESC+1+(0xF8<<8),	"fprem",
+0,	FNOOP,		FESC+1+(0xF2<<8),	"fptan",
+0,	FNOOP,		FESC+1+(0xFC<<8),	"frndint",
+0,	FNOOP,		FESC+1+(0xFD<<8),	"fscale",
+0,	FNOOP,		FESC+1+(0xFA<<8),	"fsqrt",
+0,	FNOOP,		FESC+1+(0xE4<<8),	"ftst",
+0,	FNOOP,		FESC+1+(0xE5<<8),	"fxam",
+0,	FNOOP,		FESC+1+(0xF4<<8),	"fxtract",
+0,	FNOOP,		FESC+1+(0xF1<<8),	"fyl2x",
+0,	FNOOP,		FESC+1+(0xF9<<8),	"fyl2pi",
+
+0,	FMEM,		FESC+6+(0<<11),		"fiadds",
+0,	FMEM,		FESC+2+(0<<11),		"fiaddl",
+0,	FMEM,		FESC+0+(0<<11),		"fadds",
+0,	FMEM,		FESC+4+(0<<11),		"faddd",
+0,	FMEM,		FESC+7+(4<<11),		"fbld",
+0,	FMEM,		FESC+7+(6<<11),		"fbstp",
+0,	FMEM,		FESC+6+(2<<11),		"ficoms",
+0,	FMEM,		FESC+2+(2<<11),		"ficoml",
+0,	FMEM,		FESC+0+(2<<11),		"fcoms",
+0,	FMEM,		FESC+4+(2<<11),		"fcomd",
+0,	FMEM,		FESC+6+(3<<11),		"ficomps",
+0,	FMEM,		FESC+2+(3<<11),		"ficompl",
+0,	FMEM,		FESC+0+(3<<11),		"fcomps",
+0,	FMEM,		FESC+4+(3<<11),		"fcompd",
+0,	FMEM,		FESC+6+(6<<11),		"fidivs",
+0,	FMEM,		FESC+2+(6<<11),		"fidivl",
+0,	FMEM,		FESC+0+(6<<11),		"fdivs",
+0,	FMEM,		FESC+4+(6<<11),		"fdivd",
+0,	FMEM,		FESC+6+(7<<11),		"fidivrs",
+0,	FMEM,		FESC+2+(7<<11),		"fidivrl",
+0,	FMEM,		FESC+0+(7<<11),		"fdivrs",
+0,	FMEM,		FESC+4+(7<<11),		"fdivrd",
+0,	FMEM,		FESC+7+(5<<11),		"fildq",
+0,	FMEM,		FESC+7+(0<<11),		"filds",
+0,	FMEM,		FESC+3+(0<<11),		"fildl",
+0,	FMEM,		FESC+1+(0<<11),		"flds",
+0,	FMEM,		FESC+5+(0<<11),		"fldd",
+0,	FMEM,		FESC+3+(5<<11),		"fldx",
+0,	FMEM,		FESC+1+(5<<11),		"fldcw",
+0,	FMEM,		FESC+1+(4<<11),		"fldenv",
+0,	FMEM,		FESC+6+(1<<11),		"fimuls",
+0,	FMEM,		FESC+2+(1<<11),		"fimull",
+0,	FMEM,		FESC+0+(1<<11),		"fmuls",
+0,	FMEM,		FESC+4+(1<<11),		"fmuld",
+0,	FMEM,		FESC+5+(4<<11),		"frstor",
+0,	FMEM,		FESC+5+(6<<11),		"fsave",
+0,	FMEM,		FESC+7+(2<<11),		"fists",
+0,	FMEM,		FESC+3+(2<<11),		"fistl",
+0,	FMEM,		FESC+1+(2<<11),		"fsts",
+0,	FMEM,		FESC+5+(2<<11),		"fstd",
+0,	FMEM,		FESC+7+(7<<11),		"fistpq",
+0,	FMEM,		FESC+7+(3<<11),		"fistps",
+0,	FMEM,		FESC+3+(3<<11),		"fistpl",
+0,	FMEM,		FESC+1+(3<<11),		"fstps",
+0,	FMEM,		FESC+5+(3<<11),		"fstpd",
+0,	FMEM,		FESC+3+(7<<11),		"fstpx",
+0,	FMEM,		FESC+1+(7<<11),		"fstcw",
+0,	FMEM,		FESC+1+(6<<11),		"fstenv",
+0,	FMEM_AX,	FESC+5+(7<<11),		"fstsw",
+0,	FMEM,		FESC+6+(4<<11),		"fisubs",
+0,	FMEM,		FESC+2+(4<<11),		"fisubl",
+0,	FMEM,		FESC+0+(4<<11),		"fsubs",
+0,	FMEM,		FESC+4+(4<<11),		"fsubd",
+0,	FMEM,		FESC+6+(5<<11),		"fisubrs",
+0,	FMEM,		FESC+2+(5<<11),		"fisubrl",
+0,	FMEM,		FESC+0+(5<<11),		"fsubrs",
+0,	FMEM,		FESC+4+(5<<11),		"fsubrd",
+
+0,	FST_I,		FESC+1+(0xC0<<8),	"fld",
+0,	FST_I,		FESC+5+(0xD0<<8),	"fst",
+0,	FST_I,		FESC+5+(0xC8<<8),	"fstp",
+0,	FST_I,		FESC+1+(0xC8<<8),	"fxch",
+0,	FST_I,		FESC+0+(0xD0<<8),	"fcom",
+0,	FST_I,		FESC+0+(0xD8<<8),	"fcomp",
+0,	FST_I,		FESC+5+(0xC0<<8),	"ffree",
+
+0,	FST_ST,		FESC+0+(0xC0<<8),	"fadd",
+0,	FST_ST,		FESC+2+(0xC0<<8),	"faddp",
+0,	FST_ST2,	FESC+0+(0xF0<<8),	"fdiv",
+0,	FST_ST2,	FESC+2+(0xF0<<8),	"fdivp",
+0,	FST_ST2,	FESC+0+(0xF8<<8),	"fdivr",
+0,	FST_ST2,	FESC+2+(0xF8<<8),	"fdivrp",
+0,	FST_ST,		FESC+0+(0xC8<<8),	"fmul",
+0,	FST_ST,		FESC+2+(0xC8<<8),	"fmulp",
+0,	FST_ST2,	FESC+0+(0xE0<<8),	"fsub",
+0,	FST_ST2,	FESC+2+(0xE0<<8),	"fsubp",
+0,	FST_ST2,	FESC+0+(0xE8<<8),	"fsubr",
+0,	FST_ST2,	FESC+2+(0xE8<<8),	"fsubrp",
diff --git a/mach/i386/as/mach4.c b/mach/i386/as/mach4.c
index a6cebdc4b..7dda649ef 100644
--- a/mach/i386/as/mach4.c
+++ b/mach/i386/as/mach4.c
@@ -8,7 +8,7 @@
 
 operation
 	:
-		prefix oper		
+		prefix oper
 			{	address_long = 1; operand_long = 1; }
 	|	prefix1		/* to allow for only prefixes on a line */
 	;
@@ -121,32 +121,63 @@ oper	:	NOOP_1
 			but this gives a bad yacc conflict
 		*/
 		MOV ea_1 ',' RSYSCR
-			{	
+			{
 				if ($1 != 1 || !(reg_1 & IS_R32))
 					serror("syntax error");
 				emit1(0xF); emit1(042); emit1(0200|($4<<3)|(reg_1&07));}
 	|	MOV ea_1 ',' RSYSDR
-			{	
+			{
 				if ($1 != 1 || !(reg_1 & IS_R32))
 					serror("syntax error");
 				emit1(0xF); emit1(043); emit1(0200|($4<<3)|(reg_1&07));}
 	|	MOV ea_1 ',' RSYSTR
-			{	
+			{
 				if ($1 != 1 || !(reg_1 & IS_R32))
 					serror("syntax error");
 				emit1(0xF); emit1(046); emit1(0200|($4<<3)|(reg_1&07));}
 	|	MOV RSYSCR ',' R32
-			{	
+			{
 				if ($1 != 1) serror("syntax error");
 				emit1(0xF); emit1(040); emit1(0200|($4<<3)|$2);}
 	|	MOV RSYSDR ',' R32
-			{	
+			{
 				if ($1 != 1) serror("syntax error");
 				emit1(0xF); emit1(041); emit1(0200|($4<<3)|$2);}
 	|	MOV RSYSTR ',' R32
-			{	
+			{
 				if ($1 != 1) serror("syntax error");
 				emit1(0xF); emit1(044); emit1(0200|($4<<3)|$2);}
+/* Intel 80[23]87 coprocessor instructions */
+	|	FNOOP
+			{	emit1($1); emit1($1>>8);}
+	|	FMEM mem
+			{	emit1($1); ea_2(($1>>8)&070);}
+	|	FMEM_AX R32
+			{	if ($2 != 0) {
+					serror("illegal register");
+				}
+				emit1(FESC|7); emit1(7<<5);
+			}
+	|	FST_I st_i
+			{	emit1($1); emit1(($1>>8)|$2); }
+	|	FST_I ST
+			{	emit1($1); emit1($1>>8); }
+	|	FST_ST ST ',' st_i
+			{	emit1($1); emit1(($1>>8)|$4); }
+	|	FST_ST st_i ',' ST
+			{	emit1($1|4); emit1((($1>>8)|$2)); }
+	|	FST_ST2 st_i ',' ST
+			{	emit1($1|4); emit1((($1>>8)|$2)^010); }
+	;
+
+st_i	:	ST '(' absexp ')'
+			{	if (!fit3($3)) {
+					serror("illegal index in FP stack");
+				}
+				$$ = $3;
+			}
+	;
+
 	;
 mem	:	'(' expr ')'
 			{	rm_2 = 05; exp_2 = $2; reg_2 = 05; mod_2 = 0;
diff --git a/mach/i386/libem/LIST b/mach/i386/libem/LIST
index 29dba7ef7..e9de9fc1a 100644
--- a/mach/i386/libem/LIST
+++ b/mach/i386/libem/LIST
@@ -13,7 +13,7 @@ dvi.s
 dvu.s
 error.s
 exg.s
-fakfp.s
+fp8087.s
 fat.s
 gto.s
 iaar.s
diff --git a/mach/i386/libem/fp8087.s b/mach/i386/libem/fp8087.s
new file mode 100644
index 000000000..39ee20e3e
--- /dev/null
+++ b/mach/i386/libem/fp8087.s
@@ -0,0 +1,414 @@
+.define .adf4, .adf8, .sbf4, .sbf8, .mlf4, .mlf8, .dvf4, .dvf8
+.define .ngf4, .ngf8, .fif4, .fif8, .fef4, .fef8
+.define .cif4, .cif8, .cuf4, .cuf8, .cfi, .cfu, .cff4, .cff8
+.define .cmf4, .cmf8
+.sect .text; .sect .rom; .sect .data; .sect .bss
+
+!	$Header$
+
+!	Implement interface to floating point package for Intel 8087
+
+	.sect .rom
+one:
+	.data2	1
+two:
+	.data2	2
+bigmin:
+	.data4 	-2147483648
+
+	.sect .text
+.adf4:
+	mov	bx,sp
+	flds	4(bx)
+	fadds	8(bx)
+	fstps	8(bx)
+	wait
+	ret
+.adf8:
+	mov	bx,sp
+	fldd	4(bx)
+	faddd	12(bx)
+	fstpd	12(bx)
+	wait
+	ret
+
+.sbf4:
+	mov	bx,sp
+	flds	8(bx)
+	fsubs	4(bx)
+	fstps	8(bx)
+	wait
+	ret
+
+.sbf8:
+	mov	bx,sp
+	fldd	12(bx)
+	fsubd	4(bx)
+	fstpd	12(bx)
+	wait
+	ret
+
+.mlf4:
+	mov	bx,sp
+	flds	4(bx)
+	fmuls	8(bx)
+	fstps	8(bx)
+	wait
+	ret
+.mlf8:
+	mov	bx,sp
+	fldd	4(bx)
+	fmuld	12(bx)
+	fstpd	12(bx)
+	wait
+	ret
+
+.dvf4:
+	mov	bx,sp
+	flds	8(bx)
+	fdivs	4(bx)
+	fstps	8(bx)
+	wait
+	ret
+
+.dvf8:
+	mov	bx,sp
+	fldd	12(bx)
+	fdivd	4(bx)
+	fstpd	12(bx)
+	wait
+	ret
+
+.ngf4:
+	mov	bx,sp
+	flds	4(bx)
+	fchs
+	fstps	4(bx)
+	wait
+	ret
+
+.ngf8:
+	mov	bx,sp
+	fldd	4(bx)
+	fchs
+	fstpd	4(bx)
+	wait
+	ret
+
+.fif4:
+	mov	bx,sp
+	flds	8(bx)
+	fmuls	12(bx)		! multiply
+	fld	st		! copy result
+	ftst			! test sign; handle negative separately
+	fstsw	ax
+	wait
+	sahf			! result of test in condition codes
+	jb	1f
+	frndint			! this one rounds (?)
+	fcom	st(1)		! compare with original; if <=, then OK
+	fstsw	ax
+	wait
+	sahf
+	jbe	2f
+	fisubs	(one)		! else subtract 1
+	jmp	2f
+1:				! here, negative case
+	frndint			! this one rounds (?)
+	fcom	st(1)		! compare with original; if >=, then OK
+	fstsw	ax
+	wait
+	sahf
+	jae	2f
+	fiadds	(one)		! else add 1
+2:
+	fsub	st(1),st	! subtract integer part
+	mov	bx,4(bx)
+	fstps	(bx)
+	fstps	4(bx)
+	wait
+	ret
+
+.fif8:
+	mov	bx,sp
+	fldd	8(bx)
+	fmuld	16(bx)		! multiply
+	fld	st		! and copy result
+	ftst			! test sign; handle negative separately
+	fstsw	ax
+	wait
+	sahf			! result of test in condition codes
+	jb	1f
+	frndint			! this one rounds (?)
+	fcom	st(1)		! compare with original; if <=, then OK
+	fstsw	ax
+	wait
+	sahf
+	jbe	2f
+	fisubs	(one)		! else subtract 1
+	jmp	2f
+1:				! here, negative case
+	frndint			! this one rounds (?)
+	fcom	st(1)		! compare with original; if >=, then OK
+	fstsw	ax
+	wait
+	sahf
+	jae	2f
+	fiadds	(one)		! else add 1
+2:
+	fsub	st(1),st	! subtract integer part
+	mov	bx,4(bx)
+	fstpd	(bx)
+	fstpd	8(bx)
+	wait
+	ret
+
+.fef4:
+				! this could be simpler, if only the
+				! fxtract instruction was emulated properly
+	mov	bx,sp
+	mov	ax,8(bx)
+	and	ax,0x7f800000
+	je	1f		! zero exponent
+	shr	ax,23
+	sub	ax,126
+	mov	cx,ax		! exponent in cx
+	mov	ax,8(bx)
+	and	ax,0x807fffff
+	or	ax,0x3f000000	! load -1 exponent
+	mov	bx,4(bx)
+	mov	4(bx),ax
+	mov	(bx),cx
+	ret
+1:				! we get here on zero exp
+	mov	ax,8(bx)
+	and	ax,0x007fffff
+	jne	1f		! zero result
+	mov	bx,4(bx)
+	mov	(bx),ax
+	mov	4(bx),ax
+	ret
+1:				! otherwise unnormalized number
+	mov	cx,8(bx)
+	and	cx,0x807fffff
+	mov	dx,cx
+	and	cx,0x80000000
+	mov	ax,-125
+2:
+	test	dx,0x800000
+	jne	1f
+	dec	ax
+	shl	dx,1
+	or	dx,cx
+	jmp	2b
+1:
+	mov	bx,4(bx)
+	mov	(bx),ax
+	and	dx,0x807fffff
+	or	dx,0x3f000000	! load -1 exponent
+	mov	4(bx),dx
+	ret
+
+.fef8:
+				! this could be simpler, if only the
+				! fxtract instruction was emulated properly
+	mov	bx,sp
+	mov	ax,12(bx)
+	and	ax,0x7ff00000
+	je	1f		! zero exponent
+	shr	ax,20
+	sub	ax,1022
+	mov	cx,ax		! exponent in cx
+	mov	ax,12(bx)
+	and	ax,0x800fffff
+	or	ax,0x3fe00000	! load -1 exponent
+	mov	dx,8(bx)
+	mov	bx,4(bx)
+	mov	4(bx),dx
+	mov	8(bx),ax
+	mov	(bx),cx
+	ret
+1:				! we get here on zero exp
+	mov	ax,12(bx)
+	and	ax,0xfffff
+	or	ax,8(bx)
+	jne	1f		! zero result
+	mov	bx,4(bx)
+	mov	(bx),ax
+	mov	4(bx),ax
+	mov	8(bx),ax
+	ret
+1:				! otherwise unnormalized number
+	mov	cx,12(bx)
+	and	cx,0x800fffff
+	mov	dx,cx
+	and	cx,0x80000000
+	mov	ax,-1021
+2:
+	test	dx,0x100000
+	jne	1f
+	dec	ax
+	shl	8(bx),1
+	rcl	dx,1
+	or	dx,cx
+	jmp	2b
+1:
+	and	dx,0x800fffff
+	or	dx,0x3fe00000	! load -1 exponent
+	mov	cx,8(bx)
+	mov	bx,4(bx)
+	mov	(bx),ax
+	mov	8(bx),dx
+	mov	4(bx),cx
+	ret
+
+.cif4:
+	mov	bx,sp
+	fildl	8(bx)
+	fstps	8(bx)
+	wait
+	ret
+
+.cif8:
+	mov	bx,sp
+	fildl	8(bx)
+	fstpd	4(bx)
+	wait
+	ret
+
+.cuf4:
+	mov	bx,sp
+	fildl	8(bx)
+	cmp	8(bx),0
+	jge	1f
+	fisubl	(bigmin)
+	fisubl	(bigmin)
+1:
+	fstps	8(bx)
+	wait
+	ret
+
+.cuf8:
+	mov	bx,sp
+	fildl	8(bx)
+	cmp	8(bx),0
+	jge	1f
+	fisubl	(bigmin)
+	fisubl	(bigmin)
+1:
+	fstpd	4(bx)
+	wait
+	ret
+
+.cfi:
+	mov	bx,sp
+	fstcw	4(bx)
+	wait
+	mov	dx,4(bx)
+	or	4(bx),0xc00	! truncating mode
+	wait
+	fldcw	4(bx)
+	cmp	8(bx),4
+	jne	2f
+				! loc 4 loc ? cfi
+	flds	12(bx)
+	fistpl	12(bx)
+1:
+	mov	4(bx),dx
+	wait
+	fldcw	4(bx)
+	ret
+2:
+				! loc 8 loc ? cfi
+	fldd	12(bx)
+	fistpl	16(bx)
+	jmp	1b
+
+.cfu:
+	mov	bx,sp
+	fstcw	4(bx)
+	wait
+	mov	dx,4(bx)
+	or	4(bx),0xc00	! truncating mode
+	wait
+	fldcw	4(bx)
+	cmp	8(bx),4
+	jne	2f
+				! loc 4 loc ? cfu
+	flds	12(bx)
+	fabs			! ???
+	fistpl	12(bx)
+	wait
+1:
+	mov	4(bx),dx
+	wait
+	fldcw	4(bx)
+	ret
+2:
+				! loc 8 loc ? cfu
+	fldd	12(bx)
+	fabs			! ???
+	fistpl	16(bx)
+	jmp	1b
+
+.cff4:
+	mov	bx,sp
+	fldd	4(bx)
+	fstcw	4(bx)
+	wait
+	mov	dx,4(bx)
+	and	4(bx),0xf3ff	! set to rounding mode
+	wait
+	fldcw	4(bx)
+	fstps	8(bx)
+	mov	4(bx),dx
+	wait
+	fldcw	4(bx)
+	wait
+	ret
+
+.cff8:
+	mov	bx,sp
+	flds	4(bx)
+	fstpd	4(bx)
+	wait
+	ret
+
+.cmf4:
+	mov	bx,sp
+	xor	cx,cx
+	flds	8(bx)
+	flds	4(bx)
+	fcompp			! compare and pop operands
+	fstsw	ax
+	wait
+	sahf
+	je	1f
+	jb	2f
+	dec	cx
+	jmp	1f
+2:
+	inc	cx
+1:
+	mov	ax,cx
+	ret
+
+
+.cmf8:
+	mov	bx,sp
+	xor	cx,cx
+	fldd	12(bx)
+	fldd	4(bx)
+	fcompp			! compare and pop operands
+	fstsw	ax
+	wait
+	sahf
+	je	1f
+	jb	2f
+	dec	cx
+	jmp	1f
+2:
+	inc	cx
+1:
+	mov	ax,cx
+	ret
-- 
2.34.1