Add outraw outrab and use them so that we can generate relocations
-Change the output format to some kind of binary folow
+- Need to add .code .equ and all the other .byte type stuff that is nice to have
+- Check error cases (seg +seg , unknown + seg etc)
+- Automatic symbols __code __data __bss __endcode __enddata __endbss
-Header
-Code
-Data
-BSS
-Symbols
-DebugSymbols (optional copy of local only symbols)
-Code/Data is packed as follows (BSS is 0 anyway but you can have a sym in
-BSS)
+What are the right rules for segment v absolute
-0 END
-1-223 copy this many bytes
+ seg - seg = absolute if same seg
+ seg - seg = error if not
-224-239 reserved
-241 relocate next byte low
-242 relocate next byte high
-243 relocate next word (native order)
- followed by symnum.w
- followed by data byte/word
+ seg + abs = seg
+ seg - abs = seg
-Special symnums for versus our own base (for simple relocs) or a different
-byte code (eg 240-247 / 248-255 where latter don't give sym ?)
+ abs any abs = abs
-Future
+ multiply/divide of segs -> disallow ? and require things like
+ x / 2 - y / 2 is done as (x - y) / 2 ?
-244 relocate ZP ref (6502)
-245 24bit
-246 32bit
+Proper error messaeges needed
-etc
+later:
+ need a syntax/operator for high/low byte of seg
+ output is then segbyteh or segbytel and follows same rules as segment
+ (what about mixing them ?)
-Symbol Table
-type.b seg.b {addr} sym\0
+- Proper output logic
+ - write an invalid header
+ - for each segment
+ - write segment (counting relocs)
+ - write reloc tab (or fold it in ?)
+ - write symbol table
+ - write debug table
+ - rewind and fix header
-type
+Relocation types
-bits 0-2 -> size of addr 1-4
-bits 3-6 -> free
-bit 7
- 0x00 import
- 0x80 export
+1. versus our segment base for this object module (usual)
+2. versus other segment base for this module
+3. versus unresolved symbol
+Need to support relocations for 16bit and also 8bit ZP. Do we need to extend
+the assembler to support relocations for high 8 / low 8 ? Probably not for Z80
+and means more reloc info in syms/addrs but would do for other processors like
+6502. Ugly because if we do then we need not just a segment for an address
+but also to know if its address high/address low/address/absolute - add to
+a_type somehow ?
-Write a matching nlist(3) and nm to test the basics
+link
+ read each module header that we include
+ load its symbols
+ compute base of each versus segment
+ compute base of segments (split I/D differs a bit etc)
+
+ resolve all symbols we can
+
+ write out segments relocating and resolving
+
+ Unresolved symbols allowed for -r
+
+ scan each module header again
+ copy all the debug symbols (relocating as we go)
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
+#include <stdint.h>
#include <ctype.h>
#include <setjmp.h>
#define NCODE 128 /* # of characters in code buffer */
#define NINPUT 128 /* # of characters in input line */
#define NLPP 60 /* # of lines on a page */
+#define NSEGMENT 4 /* # of segments */
#define XXXX 0 /* Unused value */
/*
* symbol flags hide in the register
* field of the address.
*/
-#define TMREG 0x00FF /* Register code */
+#define TMREG 0x007F /* Register code */
#define TMMDF 0x0001 /* Multidef */
#define TMASG 0x0002 /* Defined by "=" */
#define TMMODE 0xFF00 /* Mode */
#define TMINDIR 0x8000 /* Indirect flag in mode */
+#define TPUBLIC 0x0080 /* Exported symbol */
#define TNEW 0x0000 /* Virgin */
#define TUSER 0x0100 /* User name */
#define TLD 0x1C00 /* ld */
#define TCC 0x1D00 /* condition code */
#define TSUB 0x1E00 /* sub et al */
+#define TSEGMENT 0x1F00 /* segments by number */
+#define TEXPORT 0x2000 /* symbol export */
/*
* Registers.
#define CP 6
#define CM 7
-typedef unsigned int VALUE; /* For symbol values */
+/*
+ * Segments
+ */
+#define UNKNOWN -1
+#define ABSOLUTE 0
+#define CODE 1
+#define DATA 2
+#define BSS 3
+
+typedef uint16_t VALUE; /* For symbol values */
/*
* Address description.
typedef struct ADDR {
int a_type; /* Type */
VALUE a_value; /* Index offset, etc */
+ int a_segment; /* Segment relative to */
} ADDR;
/*
char s_id[NCPS]; /* Name */
int s_type; /* Type */
VALUE s_value; /* Value */
+ int s_segment; /* Segment this symbol is relative to */
} SYM;
/*
extern SYM *uhash[];
extern int lflag;
extern jmp_buf env;
-extern VALUE dot;
+extern VALUE dot[NSEGMENT];
+extern int segment;
extern void asmline(void);
extern void asmld(void);
extern void expr2(ADDR *);
extern void expr3(ADDR *, int);
extern void isokaors(ADDR *, int);
-extern void outaw(int);
-extern void outab(int);
+extern void outpass(void);
+extern void outabsolute(int);
+extern void outsegment(int);
+extern void outaw(uint16_t);
+extern void outab(uint8_t);
+extern void outraw(ADDR *);
+extern void outrab(ADDR *);
extern void outeof(void);
-extern void outbyte(int);
+extern void outbyte(uint8_t);
extern void outflush(void);
-extern void outhex(int);
extern void list(void);
extern void list1(char *, int, int);
extern void syminit(void);
int lflag;
VALUE laddr;
int lmode;
-VALUE dot;
+VALUE dot[NSEGMENT];
+int segment = CODE;
SYM *phash[NHASH];
SYM *uhash[NHASH];
int pass;
}
syminit();
for (pass=0; pass<2; ++pass) {
+ outpass();
line = 0;
- dot = 0;
+ memset(dot, 0, sizeof(dot));
fseek(ifp, 0L, 0);
while (fgets(ib, NINPUT, ifp) != NULL) {
++line;
ADDR a1;
ADDR a2;
- laddr = dot;
+ laddr = dot[segment];
lmode = SLIST;
loop:
if ((c=getnb())=='\n' || c==';')
return;
- if (isalpha(c) == 0)
+ if (isalpha(c) == 0 && c != '_' && c != '.')
qerr();
getid(id, c);
if ((c=getnb()) == ':') {
sp->s_type |= TMMDF;
sp->s_type &= ~TMMODE;
sp->s_type |= TUSER;
- sp->s_value = dot;
+ sp->s_value = dot[segment];
+ sp->s_segment = segment;
} else {
if ((sp->s_type&TMMDF) != 0)
err('m');
- if (sp->s_value != dot)
+ if (sp->s_value != dot[segment])
err('p');
}
lmode = ALIST;
sp->s_type &= ~TMMODE;
sp->s_type |= TUSER|TMASG;
sp->s_value = a1.a_value;
+ sp->s_segment = a1.a_segment;
laddr = a1.a_value;
lmode = ALIST;
goto loop;
case TORG:
getaddr(&a1);
istuser(&a1);
+ if (a1.a_segment != ABSOLUTE)
+ qerr();
lmode = ALIST;
- laddr = dot = a1.a_value;
+ segment = 0;
+ laddr = dot[segment] = a1.a_value;
+ /* Tell the binary generator we've got a new absolute
+ segment. */
+ outabsolute(laddr);
+ break;
+
+ case TEXPORT:
+ getid(id, getnb());
+ sp = lookup(id, phash, 1);
+ sp->s_type |= TPUBLIC;
+ break;
+ /* .code etc */
+ case TSEGMENT:
+ segment = sp->s_value;
+ /* Tell the binary generator about a segment switch to a non
+ absolute segnent */
+ outsegment(segment);
break;
case TDEFB:
do {
getaddr(&a1);
istuser(&a1);
- outab(a1.a_value);
+ outrab(&a1);
} while ((c=getnb()) == ',');
unget(c);
break;
do {
getaddr(&a1);
istuser(&a1);
- outaw(a1.a_value);
+ outraw(&a1);
} while ((c=getnb()) == ',');
unget(c);
break;
break;
case TDEFS:
- laddr = dot;
+ laddr = dot[segment];
lmode = ALIST;
getaddr(&a1);
istuser(&a1);
- dot += a1.a_value;
+ /* Write out the bytes. The BSS will deal with the rest */
+ for (value = 0 ; value < a1.a_value; value++)
+ outab(0);
+ dot[segment] += a1.a_value;
break;
case TNOP:
getaddr(&a1);
}
istuser(&a1);
- disp = a1.a_value-dot-2;
+ disp = a1.a_value-dot[segment]-2;
if (disp<-128 || disp>127)
aerr();
outab(opcode);
}
istuser(&a1);
outab(opcode);
- outaw(a1.a_value);
+ outraw(&a1);
break;
case TPUSH:
return;
}
if (msrc == (TMINDIR|TUSER)) {
- outab(0x3A); /* lda */
- outaw(src.a_value);
+ outab(0x3A); /* ld a,(addr) */
+ outraw(&src);
return;
}
if (msrc == (TMINDIR|TWR)) {
if (rsrc==BC || rsrc==DE) {
- outab(0x0A|(rsrc<<4)); /* ldax */
+ outab(0x0A|(rsrc<<4)); /* ld a,(r16) */
return;
}
}
return;
}
if (mdst == (TMINDIR|TUSER)) {
- outab(0x32); /* sta */
- outaw(dst.a_value);
+ outab(0x32); /* ld (addr),a */
+ outraw(&dst);
return;
}
if (mdst == (TMINDIR|TWR)) {
if (rdst==BC || rdst==DE) {
- outab(0x02|(rdst<<4)); /* stax */
+ outab(0x02|(rdst<<4)); /* ld (r16),a */
return;
}
}
}
if (dst.a_type==(TWR|SP) && msrc==TWR) {
if (rsrc == HL) {
- outab(0xF9); /* sphl */
+ outab(0xF9); /* ld sp,hl */
return;
}
}
if (msrc == TUSER) {
if (mdst == TBR) {
- outab(0x06|(rdst<<3)); /* mvi */
+ outab(0x06|(rdst<<3)); /* ld r8,#n */
if (indexap != NULL)
outab(indexap->a_value);
- outab(src.a_value);
+ outrab(&src);
return;
}
if (mdst == TWR) {
- outab(0x01|(rdst<<4)); /* lxi */
- outaw(src.a_value);
+ outab(0x01|(rdst<<4)); /* ld r16,#n */
+ outraw(&src);
return;
}
}
if (mdst==TWR && msrc==(TMINDIR|TUSER)) {
if (rdst == HL)
- outab(0x2A); /* lhld */
+ outab(0x2A); /* ld hl,(xxxx) */
else
outaw(0x4BED|(rdst<<12)); /* ld rp,(ppqq) */
- outaw(src.a_value);
+ outraw(&src);
return;
}
if (mdst==(TMINDIR|TUSER) && msrc==TWR) {
if (rsrc == HL)
- outab(0x22); /* shld */
+ outab(0x22); /* ld (xxxx),hl */
else
outaw(0x43ED|(rsrc<<12)); /* ld (ppqq),rp */
- outaw(dst.a_value);
+ outraw(&dst);
return;
}
if (mdst==TBR && msrc==TBR && (rdst!=M || rsrc!=M)) {
outab(0x40|(rdst<<3)|rsrc);
if (indexap != NULL)
- outab(indexap->a_value);
+ outrab(indexap);
return;
}
aerr();
if ((op&0xFF00) != 0) {
outab(op>>8);
if (needisp != 0) {
- outab(ap->a_value);
+ outrab(ap);
needisp = 0;
}
}
outab(op);
if (needisp != 0)
- outab(ap->a_value);
+ outrab(ap);
}
/*
if (c < 0) {
c = getnb();
- if (isalpha(c) == 0)
+ if (isalpha(c) == 0 && c != '_' && c != '.')
qerr();
}
p = &id[0];
}
if ((c = *ip) != '\n')
++ip;
- } while (c=='\'' || isalnum(c)!=0);
+ } while (c=='\'' || isalnum(c)!=0 || c == '_');
if (c != '\n')
--ip;
while (p < &id[NCPS])
htable[hash] = sp;
sp->s_type = TNEW;
sp->s_value = 0;
+ sp->s_segment = UNKNOWN;
symcopy(sp->s_id, id);
}
return (sp);
}
}
+static void chkabsolute(ADDR *a)
+{
+ /* Not symbols, doesn't matter */
+ if ((a->a_type & TMMODE) != TUSER)
+ return;
+ if (a->a_segment != ABSOLUTE)
+ aerr();
+}
+
+static void chksegment(ADDR *left, ADDR *right, int op)
+{
+ /* Not symbols, doesn't matter */
+ if ((left->a_type & TMMODE) != TUSER ||(right->a_type & TMMODE) != TUSER)
+ return;
+
+ /* Anything goes with absolutes */
+ if (left->a_segment == ABSOLUTE && right->a_segment == ABSOLUTE)
+ return;
+
+ /* This relies on ABSOLUTE being 0, an addition of segment offset and
+ absolute either way around produces a segment offset */
+ if ((left->a_segment == ABSOLUTE || right->a_segment == ABSOLUTE) &&
+ op == '+') {
+ left->a_segment += right->a_segment;
+ return;
+ }
+ /* Subtraction within segment produces an absolute */
+ if (left->a_segment == right->a_segment && op == '-') {
+ left->a_segment = ABSOLUTE;
+ return;
+ }
+ aerr();
+}
+
/*
* Expression reader,
* real work, part I. Read
else
ap->a_type = right.a_type;
isokaors(ap, paren);
+ chksegment(ap, &right, '+');
ap->a_value += right.a_value;
break;
case '-':
istuser(&right);
isokaors(ap, paren);
+ chksegment(ap, &right, '-');
ap->a_value -= right.a_value;
break;
case '*':
istuser(ap);
istuser(&right);
+ chksegment(ap, &right, '*');
ap->a_value *= right.a_value;
break;
case '/':
istuser(ap);
istuser(&right);
- ap->a_value /= right.a_value;
+ chksegment(ap, &right, '/');
+ if (right.a_value == 0)
+ err('z');
+ else
+ ap->a_value /= right.a_value;
}
}
unget(c);
if (c == '-') {
expr1(ap, HIPRI, 0);
istuser(ap);
+ chkabsolute(ap);
ap->a_value = -ap->a_value;
return;
}
if (c == '~') {
expr1(ap, HIPRI, 0);
istuser(ap);
+ chkabsolute(ap);
ap->a_value = ~ap->a_value;
return;
}
if (c == '\'') {
ap->a_type = TUSER;
ap->a_value = get();
+ ap->a_segment = ABSOLUTE;
while ((c=get()) != '\'') {
if (c == '\n')
qerr();
}
return;
}
- if (c>='0' && c<='9') {
+ if (c>='0' && c<='9' || c == '$') {
expr3(ap, c);
return;
}
uerr(id);
ap->a_type = TUSER;
ap->a_value = sp->s_value;
+ ap->a_segment = sp->s_segment;
return;
}
qerr();
}
np2 = &num[0];
value = 0;
+ /* No trailing tag, so look for 0octab, 0xhex and $xxxx */
+ if (radix == 10) {
+ if (*np2 == '0') {
+ radix = 8;
+ np2++;
+ if (*np2 == 'x') {
+ radix = 16;
+ np2++;
+ }
+ } else if (*np2 =='$') {
+ radix = 16;
+ np2++;
+ }
+ }
while (np2 < np1) {
if ((c = *np2++)>='0' && c<='9')
c -= '0';
}
ap->a_type = TUSER;
ap->a_value = value;
+ ap->a_segment = ABSOLUTE;
}
/*
* Output Intel compatable
* hex files.
*/
+
#include "as.h"
+#include "obj.h"
+
+#define NHEX 8 /* Nice format size */
+
+static uint16_t segsize[NSEGMENT];
+static uint16_t truesize[NSEGMENT];
+static off_t segbase[NSEGMENT];
+
+struct objhdr obh;
+
+static void outc(char c);
+
+void outpass(void)
+{
+ off_t base = sizeof(obh);
+ int i;
+ if (pass == 1) {
+ /* Lay the file out */
+ for (i = 1; i < NSEGMENT; i++) {
+ if (i != BSS) {
+ obh.o_segbase[i] = base;
+ segbase[i] = base;
+ printf("BASE %d %d\n", i, base);
+ base += segsize[i];
+ printf("SIZE %d %d\n", i, truesize[i]);
+ }
+ obh.o_size[i] = truesize[i];
+ }
+ obh.o_magic = 0;
+ obh.o_symbase = base;
+ obh.o_dbgbase = 0; /* for now */
+ }
+}
-#define NHEX 32 /* Longest record */
+/*
+ * Absolute address change
+ */
+
+void outabsolute(int addr)
+{
+}
-VALUE hexla;
-VALUE hexpc;
-char hexb[NHEX];
-char *hexp = &hexb[0];
+/*
+ * Segment change
+ */
+
+void outsegment(int seg)
+{
+ /* Seek to the current writing address for this segment */
+ if (pass == 1)
+ fseek(ofp, segbase[seg], SEEK_SET);
+}
/*
* Output a word. Use the
* standard Z-80 ordering (low
* byte then high byte).
*/
-void outaw(int w)
+void outaw(uint16_t w)
{
outab(w);
outab(w >> 8);
}
-/*
- * Output an absolute
- * byte to the code and listing
- * streams.
- */
-void outab(int b)
+void outraw(ADDR *a)
{
- if (pass != 0) {
- if (cp < &cb[NCODE])
- *cp++ = b;
- outbyte(b);
+ if (a->a_segment != ABSOLUTE) {
+ /* FIXME@ handle symbols */
+ if (segment == BSS && a->a_value)
+ err('b');
+ outbyte(REL_ESC);
+ outbyte((2 << 4) | REL_SIMPLE | a->a_segment);
}
- ++dot;
+ outaw(a->a_value);
}
/*
- * Put out the end of file
- * hex item at the very end of
- * the object file.
+ * Output an absolute
+ * byte to the code and listing
+ * streams.
*/
-void outeof(void)
+void outab(uint8_t b)
{
- outflush();
- fprintf(ofp, ":00000001FF\n");
+ /* Not allowed to put data in the BSS except zero */
+ if (segment == BSS && b)
+ err('b');
+ if (segment == ABSOLUTE)
+ err('A');
+ outbyte(b);
+ if (b == 0xDA) /* Quote relocation markers */
+ outbyte(0x00);
+ ++dot[segment];
+ ++truesize[segment];
+ if (truesize[segment] == 0 || dot[segment] == 0)
+ err('o');
}
-/*
- * Output a hex byte. Flush
- * the buffer if no room. Store the
- * byte in the buffer, for future
- * checksumming. Remember the load
- * address for flushing.
- */
-void outbyte(int b)
+void outrab(ADDR *a)
{
- if (hexp>=&hexb[NHEX] || hexpc!=dot) {
- outflush();
- hexp = &hexb[0];
+ /* FIXME: handle symbols */
+ if (segment == BSS && a->a_value) {
+ err('b');
+ outbyte(REL_ESC);
+ outbyte((2 << 4) | REL_SIMPLE | a->a_segment);
}
- if (hexp == &hexb[0]) {
- hexla = dot;
- hexpc = dot;
- }
- *hexp++ = b;
- ++hexpc;
+ outab(a->a_value);
}
/*
- * Flush out a block of
- * code to the hex file. Figure
- * out the length word and the
- * checksum byte.
+ * Put out the end of file
+ * hex item at the very end of
+ * the object file.
*/
-void outflush(void)
+void outeof(void)
{
- char *p;
- int b;
- int c;
-
- if ((b = hexp-&hexb[0]) != 0) {
- putc(':', ofp);
- outhex(b);
- outhex(hexla >> 8);
- outhex(hexla);
- outhex(0);
- c = b + (hexla>>8) + hexla;
- p = &hexb[0];
- while (p < hexp) {
- b = *p++;
- outhex(b);
- c += b;
- }
- outhex(-c);
- putc('\n', ofp);
- }
+ rewind(ofp);
+ obh.o_magic = MAGIC_OBJ;
+ fwrite(&obh, sizeof(obh), 1, ofp);
+ printf("Code %d byyes: Data %d bytes: BSS %d bytes\n",
+ truesize[CODE], truesize[DATA], truesize[BSS]);
}
/*
- * Put out "b", as a
- * two character hex value.
- * We cannot use printf because
- * of case problems on VMS.
- * Upper case ascii.
+ * Output a byte and track our position. For BSS we care about sizes
+ * only.
*/
-void outhex(int b)
+void outbyte(uint8_t b)
{
- static const char hex[] = {
- '0', '1', '2', '3',
- '4', '5', '6', '7',
- '8', '9', 'A', 'B',
- 'C', 'D', 'E', 'F'
- };
-
- putc(hex[(b>>4)&0x0F], ofp);
- putc(hex[b&0x0F], ofp);
+ if (pass == 1 && segment != BSS)
+ putc(b, ofp);
+ segbase[segment]++;
+ segsize[segment]++;
}
{ 0, "defm", TDEFM, XXXX },
{ 0, "org", TORG, XXXX },
{ 0, "equ", TEQU, XXXX },
+ { 0, "export", TEXPORT, XXXX },
+ { 0, ".byte", TDEFB, XXXX },
+ { 0, ".word", TDEFW, XXXX },
+ { 0, ".blkb", TDEFS, XXXX },
+ { 0, ".ascii", TDEFM, XXXX },
+ { 0, ".org", TORG, XXXX },
+ { 0, ".equ", TEQU, XXXX },
+ { 0, ".export", TEXPORT, XXXX },
{ 0, "cond", TCOND, XXXX },
{ 0, "endc", TENDC, XXXX },
+ { 0, "code", TSEGMENT, CODE },
+ { 0, "data", TSEGMENT, DATA },
+ { 0, "bss", TSEGMENT, BSS },
+ { 0, ".code", TSEGMENT, CODE },
+ { 0, ".data", TSEGMENT, DATA },
+ { 0, ".bss", TSEGMENT, BSS },
{ 0, "nop", TNOP, 0x0000 },
{ 0, "rlca", TNOP, 0x0007 },
{ 0, "rrca", TNOP, 0x000F },