From 33dc73d4c47e0458823e10f30f2f6cf559255639 Mon Sep 17 00:00:00 2001
From: ceriel <none@none>
Date: Mon, 12 Nov 1990 15:29:14 +0000
Subject: [PATCH] Latest version from Albert Koelmans

---
 mach/arm/as/mach0.c |   9 +-
 mach/arm/as/mach1.c |   2 +
 mach/arm/as/mach2.c |   4 +-
 mach/arm/as/mach3.c |  13 +-
 mach/arm/as/mach4.c |  29 ++-
 mach/arm/as/mach5.c | 463 +++++++++++++++++++++++++++++++++-----------
 6 files changed, 391 insertions(+), 129 deletions(-)

diff --git a/mach/arm/as/mach0.c b/mach/arm/as/mach0.c
index a4f688209..abe7ec450 100644
--- a/mach/arm/as/mach0.c
+++ b/mach/arm/as/mach0.c
@@ -1,12 +1,13 @@
-/* $Header; mach0.c, v1.1 20-Oct-88 AJM */
+/* $Header; mach0.c, v1.2 06-Mar-89 AJM */
 
 #define LISTING
 #define ASLD
 #define THREE_PASS
-
+#define DEBUG 0
+/*
 #define WORDS_REVERSED
 #define BYTES_REVERSED
-
+*/
 #undef valu_t
 #define valu_t 	long
 
@@ -44,4 +45,6 @@
 #define	CMP	0x01500000
 #define	TEQ	0x01300000
 #define	TST	0x01100000
+#define LDR	0x04100000
+#define STR	0x04000000
 
diff --git a/mach/arm/as/mach1.c b/mach/arm/as/mach1.c
index dd97686f2..69ad511e2 100644
--- a/mach/arm/as/mach1.c
+++ b/mach/arm/as/mach1.c
@@ -1 +1,3 @@
+/* $Header; mach1.c, v1.1 06-Mar-89 AJM */
 extern word_t opcode;
+extern int success;	/* LDR/STR address failure flag */
diff --git a/mach/arm/as/mach2.c b/mach/arm/as/mach2.c
index 09404529d..e61c7b7c4 100644
--- a/mach/arm/as/mach2.c
+++ b/mach/arm/as/mach2.c
@@ -1,4 +1,4 @@
-/* $Header: mach2.c, V1.3 2-Nov-88 AJM */
+/* $Header: mach2.c, V1.4 15-Mar-89 AJM */
 %token <y_word> COND
 %token <y_word> LINK
 %token <y_word> BRANCH
@@ -20,5 +20,5 @@
 %token <y_word> MLA
 
 %type <y_word> optlink optcond opts optt optp optb optexc reglist rlist
-%type <y_word> optsign optpsr optshift shftcnt address offset
+%type <y_word> optsign optpsr optshift shftcnt address offset aoptshift
 %type <y_expr> operand
diff --git a/mach/arm/as/mach3.c b/mach/arm/as/mach3.c
index 24acf0a96..27a3324da 100644
--- a/mach/arm/as/mach3.c
+++ b/mach/arm/as/mach3.c
@@ -1,4 +1,4 @@
-/* $Header: mach3.c, v1.5 2-Nov-88 AJM */
+/* $Header: mach3.c, v2.0 23-Feb-89 AJM */
 
 0,		COND,		0x00000000,	".EQ",
 0,		COND,		0x10000000,	".NE",
@@ -77,6 +77,7 @@
 0,		REG,		13,		"R13",
 0,		REG,		14,		"R14",
 0,		REG,		15,		"R15",
+0,		REG,		15,		"PC",
 
 0,		SHIFT,		0x00000000,	"LSL",
 0,		SHIFT,		0x00000000,	"ASL",
@@ -97,10 +98,10 @@
 0,		BDT,		0x08100000,	"LDMDA",
 0,		BDT,		0x09900000,	"LDMIB",
 0,		BDT,		0x08900000,	"LDMIA",
-0,		BDT,		0x08100000,	"LDMFD",
-0,		BDT,		0x08900000,	"LDMFA",
-0,		BDT,		0x09100000,	"LDMED",
-0,		BDT,		0x09900000,	"LDMEA",
+0,		BDT,		0x08900000,	"LDMFD",
+0,		BDT,		0x08100000,	"LDMFA",
+0,		BDT,		0x09900000,	"LDMED",
+0,		BDT,		0x09100000,	"LDMEA",
 0,		BDT,		0x09000000,	"STMDB",
 0,		BDT,		0x08000000,	"STMDA",
 0,		BDT,		0x09800000,	"STMIB",
@@ -112,7 +113,7 @@
 
 0,		SWI,		0x0F000000,	"SWI",
 
-0,		ADR,		0,		"ADR",
+0,		ADR,		0x00000000,	"ADR",
 
 0,		MUL,		0x00000090,	"MUL",
 0,		MLA,		0x00200090,	"MLA",
diff --git a/mach/arm/as/mach4.c b/mach/arm/as/mach4.c
index 42ea000f1..846eaffea 100644
--- a/mach/arm/as/mach4.c
+++ b/mach/arm/as/mach4.c
@@ -1,4 +1,4 @@
-/* $Header: mach4.c, v1.5 2-Nov-88 AJM */
+/* $Header: mach4.c, v1.9 15-Mar-89 AJM */
 
 operation	: BRANCH optlink expr
 			{branch($1, $2, $3.val);}
@@ -9,13 +9,13 @@ operation	: BRANCH optlink expr
 		| DATA3 optcond opts optp REG ',' operand
 			{data($1,$2|$3|$4|$5<<16,$7.val,$7.typ);}
 		| SDT optcond optb optt REG ',' address
-			{emit4($1|$2|$3|$4|$5<<12|$7);}
+			{strldr($1,$2|$3|$4|$5<<12,$7);}
 		| BDT optcond REG optexc ',' reglist optpsr
 			{emit4($1|$2|$3<<16|$4|$6|$7);}
 		| SWI optcond expr
 			{emit4($1|$2|$3.val);}
 		| ADR optcond REG ',' expr
-			{calcadr($2, $3, $5.val, $5.typ);}
+			{calcadr($2,$3,$5.val,$5.typ);}
 		| MUL optcond REG ',' REG ',' REG
 			{emit4($1|$2|$3<<16|$5|$7<<8);}
 		| MLA optcond REG ',' REG ',' REG ',' REG
@@ -76,6 +76,14 @@ optshift	: ',' SHIFT shftcnt
 			{$$ = 0;}
 		;
 
+aoptshift	: ',' SHIFT '#' expr
+			{$$ = $2|calcshft($4.val, $4.typ, $<y_word>0);}
+		| ',' RRX
+			{$$ = $2;}
+		|
+			{$$ = 0;}
+		;
+
 shftcnt		: '#' expr
 			{$$ = calcshft($2.val, $2.typ, $<y_word>0);}
 		| REG
@@ -83,18 +91,18 @@ shftcnt		: '#' expr
 		;
 
 address		: expr
-			{$$ = 0x01000000|calcaddress($1.val,$1.typ,$<y_word>-1);}
+			{success = 0; $$ = $1.val;}
 		| '[' REG ']'
-			{$$ = 0x01000000|$2<<16;}
+			{success = 1; $$ = 0x01000000|$2<<16;}
 		| '[' REG ',' offset ']' optexc
-			{$$ = $2<<16|$4|$6|0x01000000;}
+			{success = 1; $$ = $2<<16|$4|$6|0x01000000;}
 		| '[' REG ']' ',' offset
-			{$$ = $2<<16|$5;}
+			{success = 1; $$ = $2<<16|$5;}
 		;
 
 offset		: '#' expr
 			{$$ = calcoffset($2.val);}
-		| optsign REG optshift
+		| optsign REG aoptshift
 			{$$ = 0x02000000|$1|$2|$3;}
 		;
 	
@@ -113,4 +121,9 @@ rlist		: REG
 			{$$ = 1<<$1;}
 		| rlist ',' REG
 			{$$ = $1|1<<$3;}
+		| REG '-' REG
+			{
+			       for ($$ = 0; $1 <= $3; $1++)
+                                       $$ |= (1<<$1);
+                        }
 		;
diff --git a/mach/arm/as/mach5.c b/mach/arm/as/mach5.c
index 899e32638..b0085f8d8 100644
--- a/mach/arm/as/mach5.c
+++ b/mach/arm/as/mach5.c
@@ -1,4 +1,4 @@
-/* $Header: mach5.c, v1.6 3-Nov-88 AJM */
+/* $Header: mach5.c, v3.3 25-Apr-89 AJM */
 
 branch(brtyp, link, val)
 word_t brtyp;
@@ -7,7 +7,7 @@ valu_t val;
 {
 	valu_t offset;
 
-	offset = val - DOTVAL - 8;
+	offset = val - DOTVAL - 8;		/* Allow for pipeline */
 	if ((offset & 0xFC000000) != 0 && (offset & 0xFC000000) != 0xFC000000){
 		serror("offset out of range");
 	}
@@ -22,16 +22,31 @@ valu_t val;
 short typ;
 {
 	valu_t tmpval;
+	int adrflag = 0;
 
-	if (typ == S_REG){
+	if (typ == S_REG){	/* The argument is a register */
 		emit4(opc|ins|val);
 		return;
 	}
 
-	ins |= 0x02000000;
+/* Do a bit of optimisation here, since the backend might produce instructions
+   of the type    MOV R0, R0, #0.   We can ignore these. */
 
+	if (((opc == ADD) || (opc == SUB)) && (val == 0)){	/* ADD or SUB 0 ? */
+		if ((ins & 0x000F0000) == ((ins & 0x0000F000) << 4)) /* Same reg ? */
+			return;		/* Don't emit anything */
+	}
+
+/* No optimisation, so carry on ... */
+
+	ins |= 0x02000000;	/* The argument is an immediate value */
 	tmpval = val;
-	if (typ == S_ABS){
+	if (opc == 0xff){		/* This is an ADR */
+		adrflag = 1;
+		opc = MOV;
+	}
+
+	if (typ == S_ABS){	/* An absolute value */
 		if (calcimm(&opc, &tmpval, typ)){
 			emit4(opc|ins|tmpval);
 			return;
@@ -39,50 +54,44 @@ short typ;
 	}
 
 	tmpval = val;
-	if (small(calcimm(&opc, &tmpval, typ),12)){
-		emit4(opc|ins|tmpval);
-		return;
-	}	
-
-	if (opc == MOV && typ != S_ABS)
-	{
-		if (small((val & 0xF0000000) == 0xF0000000, 8)){
-			emit4(0xE51F0004 | (ins & 0xF000));
-			emit4(val);
+	if (!adrflag){		/* Don't do this for ADRs */
+		if (oursmall(calcimm(&opc, &tmpval, typ), 12)){
+			emit4(opc|ins|tmpval);
 			return;
+		}	
+	}
+
+	if (opc == MOV || opc == MVN || opc == ADD || opc == SUB){
+		if (!bflag && pass == PASS_3){		/* Debugging info */
+			/* warning("MOV/ADD extension"); */
+			/* if (dflag)
+				printf("value: %lx\n", val);*/
 		}
-		if (small(1,4)){
-			emit4(0xE51F0000 | (ins & 0xF000));
-			emit4(0xEA000000);
-			emit4(val);
+		if (oursmall((val & 0xFFFF0000) == 0, 8)){
+			putaddr(opc, ins, val, 2);
 			return;
 		}
-		DOTVAL += 16;
-		return;
-	}
-	if (opc == ADD && typ != S_ABS)
-	{
-		if (small((val & 0xF0000000) == 0xF0000000, 4)){
-			emit4(0xE51F0004 | (ins & 0xF000));
-			emit4(val);
-			emit4(0xE2800000 | (ins&0xFF000) | (ins&0xF000)>>12);
+		if (oursmall((val & 0xFF000000) == 0, 4)){
+			putaddr(opc, ins, val, 3);
 			return;
 		}
-		emit4(0xE51F0000 | (ins & 0xF000));
-		emit4(0xEA000000);
-		emit4(val);
-		emit4(0xE2800000 | (ins&0xFF000) | (ins&0xF000)>>12);
+		putaddr(opc, ins, val, 4);
 		return;
 	}
-	/* default: */
-		if (pass == PASS_1)
-			DOTVAL += 16;
-		else
-			serror("immediate value out of range");
-		return;
-
+	
+	if (pass == PASS_1)
+		DOTVAL += 16;	/* Worst case we can emit */
+	else
+		serror("immediate value out of range");
+	return;
 }
 
+
+/* Calculate an immediate value.  This is not as easy as it sounds, because
+   the ARM uses an 8-bit value and 4-bit shift to encode the value into a 
+   12-bit field.  Unfortunately this means that some numbers may not fit at
+   all. */
+
 calcimm(opc,val,typ)
 word_t *opc;
 valu_t *val;
@@ -90,78 +99,83 @@ short typ;
 {
 	int i = 0;
 
-	if (typ == S_UND) return 0;
+	if (typ == S_UND)
+		return(0);	/* Can't do anything with an undefined label */
 
-	if ((*val & 0xFFFFFF00) == 0) return 1;
+	if ((*val & 0xFFFFFF00) == 0)	/* Value is positive, but < 256, */
+		return(1); 		/* so doesn't need a shift */
 
-	if ((~*val & 0xFFFFFF00) == 0){
-		if (*opc == AND)
-			{
+	if ((~*val & 0xFFFFFF00) == 0){	/* Value is negative, but < 256, */
+		if (*opc == AND)	/* so no shift required, only */
+			{		/* inversion */
 			*val = ~*val;
 			*opc = BIC;
-			return 1;
+			return(1);
 			}
 		if (*opc == MOV)
 			{
 			*val = ~*val;
 			*opc = MVN;
-			return 1;
+			return(1);
 			}
 		if (*opc == ADC)
 			{
 			*val = ~*val;
 			*opc = SBC;
-			return 1;
+			return(1);
 			}
 
 	}	
-	if ((-1**val & 0xFFFFFF00) == 0 ){
+	if ((-1**val & 0xFFFFFF00) == 0){ /* Same idea ... */
 		if (*opc == ADD)
 			{
 			*val *= -1;
 			*opc = SUB;
-			return 1;
+			return(1);
 			}
 		if (*opc == CMP)
 			{
 			*val *= -1;
 			*opc = CMN;
-			return 1;
+			return(1);
 			}
 	}
 
-	do{
-		rotateleft2(&*val);
+	do{					/* Now we need to shift */
+		rotateleft2(&*val);		/* Rotate left by two bits */
 		i++;
-		if((*val & 0xFFFFFF00) == 0){
-			*val = *val|i<<8;
-			return 1;
+		if((*val & 0xFFFFFF00) == 0){	/* Got a value < 256 */
+			*val = *val|i<<8;	/* OR in the shift */
+			return(1);
 		}
-		if ((~*val & 0xFFFFFF00) == 0){
-			if (*opc == AND)
+		if ((~*val & 0xFFFFFF00) == 0){ /* If negative, carry out */
+			if (*opc == AND)	/* inversion as before */
 				{
 				*val = ~*val|i<<8;
 				*opc = BIC;
-				return 1;
+				return(1);
 				}
 			if (*opc == MOV)
 				{
 				*val = ~*val|i<<8;
 				*opc = MVN;
-				return 1;
+				return(1);
 				}
 			if (*opc == ADC)
 				{
 				*val = ~*val|i<<8;
 				*opc = SBC;
-				return 1;
+				return(1);
 				}
 		}	
 	}while(i<15);
 
-	return 0;
+	return(0);	/* Failed if can't encode it after 16 rotates */
 }
 
+
+/* Calculate an offset in an address */
+
 word_t
 calcoffset(val)
 valu_t val;
@@ -175,68 +189,150 @@ valu_t val;
 	return(0);
 }
 
-word_t
-calcaddress (val,typ,reg)
+
+/* This routine deals with STR and LDR instructions */
+
+strldr(opc, ins, val)
+long opc, ins;
 valu_t val;
-short typ;
-word_t reg;
 {
-	int tmpval;
 
-	if (typ == S_UND){
-		DOTVAL += 8;
-		return 0;
+	long reg, reg2;	/* The registers we are using */
+	long tmpval;
+
+/* If the expression was a register, then just output it and save 24
+   bytes */
+
+	if (success){ 
+		emit4(opc|ins|val);
+		return;
 	}
-	tmpval = val - DOTVAL - 8;
-	if(small((tmpval & 0xFFFFF000) == 0, 8))
-		return(val|0x008F0000);
-	tmpval *= -1;
-	if(small((tmpval & 0xFFFFF000) == 0, 8))
-		return(val|0x000F0000);
-	emit4(0xE51F0004 | reg << 12);
-	emit4(val | 0xF0000000);
-	return(reg << 16);
+
+	reg = ins & 0x0000F000;		/* Extract register from instruction */
+
+	if (opc == LDR){
+
+		tmpval = val - DOTVAL - 8;
+		if (oursmall((tmpval & 0xFFFFF000) == 0, 16)){	/* If it's +ve */
+			emit4(opc|ins|tmpval|0x018F0000);	/* PC rel, up bit */
+			return;
+		}
+
+		tmpval *= -1;
+		if (oursmall((tmpval & 0xFFFFF000) == 0, 16)){	/* If it's -ve */
+			emit4(opc|ins|tmpval|0x010F0000);	/* PC rel, no up bit */
+			return;
+		}
+
+		if (!bflag && pass == PASS_3){	/* Debugging info */
+			/* warning("LDR address extension"); */
+			if (dflag)
+				printf("value: %lx\n", val);
+		}
+
+		opc = 0x03A00000;	/* Set opc for putaddr */
+
+		if (oursmall((val & 0xFFFF0000) == 0, 8)){
+			putaddr(opc, ins & 0xFFBFFFFF, val, 2);
+			emit4(0x05100000|ins|reg<<4);
+			return;
+		}
+		if (oursmall((val & 0xFF000000) == 0, 4)){
+			putaddr(opc, ins & 0xFFBFFFFF, val, 3);
+			emit4(0x05100000|ins|reg<<4);
+			return;
+		}
+		putaddr(opc, ins & 0xFFBFFFFF, val, 4);
+		emit4(0x05100000|ins|reg<<4);
+		return;
+	}
+
+/* If the failure was an STR instruction, things are a bit more complicated as
+   we can't overwrite the register before we store its value.  We therefore
+   need to use another register as well, which must be saved and restored. 
+   This register is saved on a stack pointed to by R12.  Apart from this
+   complication, the scheme is similar to the LDR above.  */
+
+	if (opc == STR){
+		reg2 = reg >> 12;	    /* Use R6 as the second register, */
+		reg2 = (reg2 == 6 ? 0 : 6); /* or R0 if we can't */
+
+		tmpval = val - DOTVAL - 8;
+		if (oursmall((tmpval & 0xFFFFF000) == 0, 24)){	/* If it's +ve */
+			emit4(opc|ins|tmpval|0x018F0000);	/* PC rel, up bit */
+			return;
+		}
+
+		tmpval *= -1;
+		if (oursmall((tmpval & 0xFFFFF000) == 0, 24)){	/* If it's -ve */
+			emit4(opc|ins|tmpval|0x010F0000);	/* PC rel, no up bit */
+			return;
+		}
+
+		if (!bflag && pass == PASS_3){	/* Debugging info */
+			/* warning("STR address extension"); */
+			if (dflag)
+				printf("value: %lx\n", val);
+		}
+
+		opc = 0x03A00000;	/* Set opc for putaddr */
+
+		if (oursmall((val & 0xFFFF0000) == 0, 8)){
+			emit4(0xE92C0000|1<<reg2);
+			putaddr(opc, (ins & 0xFFBF0FFF)|reg2<<12, val, 2);
+			emit4(0x05000000|ins|reg2<<16);
+			emit4(0xE8BC0000|1<<reg2);
+			return;
+		}
+		if (oursmall((val & 0xFF000000) == 0, 4)){
+			emit4(0xE92C0000|1<<reg2);
+			putaddr(opc, (ins & 0xFFBF0FFF)|reg2<<12, val, 3);
+			emit4(0x05000000|ins|reg2<<16);
+			emit4(0xE8BC0000|1<<reg2);
+			return;
+		}
+		emit4(0xE92C0000|1<<reg2);
+		putaddr(opc, (ins & 0xFFBF0FFF)|reg2<<12, val, 4);
+		emit4(0x05000000|ins|reg2<<16);
+		emit4(0xE8BC0000|1<<reg2);	
+		return;
+	}
+
 }
 
-word_t
+
+/* This routine deals with ADR instructions.  The ARM does not have a
+   'calculate effective address' instruction, so we use ADD, SUB, MOV or
+   MVN instead.  ADR is not a genuine instruction, but is provided to make
+   life easier.  At present these are all calculated by using a MOV and
+   successive ADDs.  Even if the address will fit into a single MOV, we 
+   still use two instructions; the second is a no-op.  This is to cure the
+   optimisation problem with mobile addresses ! */
+
+
 calcadr(ins, reg, val, typ)
 word_t ins, reg;
 valu_t val;
 short typ;
 {
 	valu_t tmpval = val;
-	word_t opc;
-	int i = 0;
+	word_t opc = 0xff;	/* Dummy opc used as a flag for data() */
 
-	if (typ != S_ABS){
-		tmpval = val-DOTVAL-8;
-		if (tmpval > 0) {
-			if (small((tmpval & 0xFFFFFF00) == 0),12){
-				emit4(ins|ADD|0x020F0000|reg<<12|tmpval);
-				return 0;
-			}
-		}
-	
-		tmpval *= -1;
-		if (small((tmpval & 0xFFFFFF00) == 0), 12){
-			emit4(ins|SUB|0x020F0000|reg<<12|tmpval);
-			return 0;
-		}
-	}
+/* First check that the address is in range */
 
-	tmpval = val;
-	opc = MOV;
-	if (calcimm(&opc, &tmpval, typ)){
-		emit4(ins|opc|0x020F0000|reg<<12|tmpval);
-		return 0;
+	if (val < 0) 
+		tmpval = ~tmpval; /* Invert negative addresses for check */
+
+	if ((tmpval & 0xFC000000) && (typ != S_UND)){
+		serror("adr address out of range");
+		return;
 	}
 
-/* Failed */
-	if (pass == PASS_1)
-		DOTVAL += 16;
-	else
-		serror("illegal ADR argument");
-	return ;
+/* Can't do it PC relative, so use an absolute MOV instead */
+
+	data (opc, ins|reg<<12, val, typ);
+	return; 
+
 }
 
 
@@ -246,9 +342,15 @@ valu_t val;
 short typ;
 word_t styp;
 {
-	if (typ=S_UND) return 0;
-	if (val & 0xFFFFFFE0) serror("shiftcount out of range");
-	if (styp && !val) warning("shiftcount 0");
+	if (typ == S_UND) 
+		return(0);
+
+	if (val & 0xFFFFFFE0) 
+		serror("shiftcount out of range");
+
+	if (styp && !val) 
+		warning("shiftcount 0");
+
 	return((val & 0x1F)<<7);
 }
 
@@ -265,3 +367,144 @@ long *x;
 	}
 	return;
 }
+
+
+/* 
+   This routine overcomes the 12-bit encoding problem by outputting a number
+   a byte at a time.  For a MOV, it first uses a MOV, then successive ADDs.  
+   It will not use any more ADDs than needed to completely output the number.  
+   A similar approach is used for ADDs and SUBs.
+   There is a problem here with optimisation in the third pass; if the 
+   instruction needed two ADDs in the second pass, but only one in the third 
+   pass, then the second ADD is replaced with a no-op.  We cannot emit one 
+   less instruction, because that will upset other addresses.
+*/
+
+putaddr(opc, ins, val, count)
+long opc, ins, val;			
+int count;
+{
+	long tmpval = val;
+	long reg = ins & 0x0000F000;
+
+	emit4(opc|ins|(val & 0x000000FF));
+
+	tmpval = (val & 0x0000FF00) >> 8 | 0x00000C00;
+
+/* Decide what to use for the additional instructions */
+
+	if (opc == 0x03a00000)		/* This one is for strldr */
+		opc = 0x02800000;
+
+	if (opc == MOV)
+		opc = ADD;
+
+	if (opc == MVN)
+		opc = SUB;
+
+	if ((tmpval & 0x000000FF) != 0)
+		emit4(opc|ins|reg<<4|tmpval);
+	else
+		emit4(0xF0000000);	/* No-op if a zero argument */
+
+	if (count == 3 || count == 4){	/* Must use three or more instructions */
+		if ((val & 0xFFFF0000) != 0){
+			tmpval = (val & 0x00FF0000) >> 16 | 0x00000800;
+			emit4(opc|ins|reg<<4|tmpval);
+		}
+		else 
+			emit4(0xF0000000);		/* No-op */
+	}
+
+	if (count == 4){	/* Must use four instructions */
+		if ((val & 0xFF000000) != 0){
+			tmpval = (val & 0xFF000000) >> 24 | 0x00000400;
+			emit4(opc|ins|reg<<4|tmpval);
+		}
+		else 
+			emit4(0xF0000000);		/* No-op */
+	}
+
+	return;
+}
+
+
+/* The following piece of code is stolen from comm7.c; it needs some minor 
+   fixes for the ARM, so it is included here rather than altering the existing
+   code.  It maintains a bit table to say whether or not an optimisation is
+   possible.  The original had some problems:
+   (a).	  It assumed that the memory returned by malloc() was cleared to zero.
+          This is true on a Sun, but not under Minix; small() should really
+	  use calloc() instead.
+   (b).   It assumed that if an optimisation was possible in pass 2, it must
+          also be possible in pass 3, and produced an assertion error if it
+	  wasn't.   This is OK for optimising things like long or short branch
+	  instructions on a 68000, but not for ADRs on the ARM.  A previous
+	  optimisation may place an address out of 12-bit encoding range on
+	  pass 3, when it was in range on pass 2.  However we have to be 
+	  careful here .....
+ */
+
+#define PBITTABSZ	128
+static char *pbittab[PBITTABSZ];
+
+oursmall(fitsmall, gain)
+{
+	register bit;
+	register char *p;
+
+	if (DOTSCT == NULL)
+		nosect();
+	if (bflag)
+		return(0);
+	if (nbits == BITCHUNK) {
+		bitindex++;
+		nbits = 0;
+		if (bitindex == PBITTABSZ) {
+			static int w_given;
+			if (pass == PASS_1 && ! w_given) {
+				w_given = 1;
+				warning("bit table overflow");
+			}
+			return(0);
+		}
+		if (pbittab[bitindex] == 0 && pass == PASS_1) {
+			if ((pbittab[bitindex] = malloc(MEMINCR)) == 0) {
+				static int w2_given;
+
+				if (!w2_given) {
+					w2_given = 1;
+					warning("out of space for bit table");
+				}
+			}
+		}
+		if (pbittab[bitindex] == 0)
+			return (0);
+	}
+	bit = 1 << (nbits&7);
+	p = pbittab[bitindex]+(nbits>>3);
+	nbits++;
+
+	switch (pass) {
+	case PASS_1:
+		*p = 0;
+		return(0);
+	case PASS_2:
+		if (fitsmall) {
+			DOTGAIN += gain;
+			*p |= bit;
+		}
+		return(fitsmall);
+	case PASS_3:
+		if (!(fitsmall || (*p & bit) == 0)){
+			printf("line: %ld - small failed\n", lineno);
+			printf("fitsmall: %d bit: %d\n", fitsmall, (*p & bit));
+			if (fitsmall) 
+				return(0);
+			else
+				serror("This one is fatal!");
+		}
+		return(*p & bit);
+	}
+	/*NOTREACHED*/
+}
-- 
2.34.1