From: Alan Cox Date: Thu, 26 Oct 2017 20:04:17 +0000 (+0100) Subject: asz80: some quick changes over tea X-Git-Url: https://git.ndcode.org/public/gitweb.cgi?a=commitdiff_plain;h=6e7523e285b16666c07addb65f15b203e9935dc7;p=FUZIX.git asz80: some quick changes over tea We now write out parts of something resembling a relocatable object file. We don't yet write out the symbols nor do we write out the longer relocations for symbol references rather than segment offsets. --- diff --git a/Applications/MWC/cmd/asz80/TODO b/Applications/MWC/cmd/asz80/TODO index 644cbaf3..aa4dbdea 100644 --- a/Applications/MWC/cmd/asz80/TODO +++ b/Applications/MWC/cmd/asz80/TODO @@ -1,49 +1,64 @@ Add outraw outrab and use them so that we can generate relocations -Change the output format to some kind of binary folow +- Need to add .code .equ and all the other .byte type stuff that is nice to have +- Check error cases (seg +seg , unknown + seg etc) +- Automatic symbols __code __data __bss __endcode __enddata __endbss -Header -Code -Data -BSS -Symbols -DebugSymbols (optional copy of local only symbols) -Code/Data is packed as follows (BSS is 0 anyway but you can have a sym in -BSS) +What are the right rules for segment v absolute -0 END -1-223 copy this many bytes + seg - seg = absolute if same seg + seg - seg = error if not -224-239 reserved -241 relocate next byte low -242 relocate next byte high -243 relocate next word (native order) - followed by symnum.w - followed by data byte/word + seg + abs = seg + seg - abs = seg -Special symnums for versus our own base (for simple relocs) or a different -byte code (eg 240-247 / 248-255 where latter don't give sym ?) + abs any abs = abs -Future + multiply/divide of segs -> disallow ? and require things like + x / 2 - y / 2 is done as (x - y) / 2 ? -244 relocate ZP ref (6502) -245 24bit -246 32bit +Proper error messaeges needed -etc +later: + need a syntax/operator for high/low byte of seg + output is then segbyteh or segbytel and follows same rules as segment + (what about mixing them ?) -Symbol Table -type.b seg.b {addr} sym\0 +- Proper output logic + - write an invalid header + - for each segment + - write segment (counting relocs) + - write reloc tab (or fold it in ?) + - write symbol table + - write debug table + - rewind and fix header -type +Relocation types -bits 0-2 -> size of addr 1-4 -bits 3-6 -> free -bit 7 - 0x00 import - 0x80 export +1. versus our segment base for this object module (usual) +2. versus other segment base for this module +3. versus unresolved symbol +Need to support relocations for 16bit and also 8bit ZP. Do we need to extend +the assembler to support relocations for high 8 / low 8 ? Probably not for Z80 +and means more reloc info in syms/addrs but would do for other processors like +6502. Ugly because if we do then we need not just a segment for an address +but also to know if its address high/address low/address/absolute - add to +a_type somehow ? -Write a matching nlist(3) and nm to test the basics +link + read each module header that we include + load its symbols + compute base of each versus segment + compute base of segments (split I/D differs a bit etc) + + resolve all symbols we can + + write out segments relocating and resolving + + Unresolved symbols allowed for -r + + scan each module header again + copy all the debug symbols (relocating as we go) diff --git a/Applications/MWC/cmd/asz80/as.h b/Applications/MWC/cmd/asz80/as.h index d69c3efe..bfd9d0e5 100644 --- a/Applications/MWC/cmd/asz80/as.h +++ b/Applications/MWC/cmd/asz80/as.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -20,6 +21,7 @@ #define NCODE 128 /* # of characters in code buffer */ #define NINPUT 128 /* # of characters in input line */ #define NLPP 60 /* # of lines on a page */ +#define NSEGMENT 4 /* # of segments */ #define XXXX 0 /* Unused value */ /* @@ -44,11 +46,12 @@ * symbol flags hide in the register * field of the address. */ -#define TMREG 0x00FF /* Register code */ +#define TMREG 0x007F /* Register code */ #define TMMDF 0x0001 /* Multidef */ #define TMASG 0x0002 /* Defined by "=" */ #define TMMODE 0xFF00 /* Mode */ #define TMINDIR 0x8000 /* Indirect flag in mode */ +#define TPUBLIC 0x0080 /* Exported symbol */ #define TNEW 0x0000 /* Virgin */ #define TUSER 0x0100 /* User name */ @@ -79,6 +82,8 @@ #define TLD 0x1C00 /* ld */ #define TCC 0x1D00 /* condition code */ #define TSUB 0x1E00 /* sub et al */ +#define TSEGMENT 0x1F00 /* segments by number */ +#define TEXPORT 0x2000 /* symbol export */ /* * Registers. @@ -116,7 +121,16 @@ #define CP 6 #define CM 7 -typedef unsigned int VALUE; /* For symbol values */ +/* + * Segments + */ +#define UNKNOWN -1 +#define ABSOLUTE 0 +#define CODE 1 +#define DATA 2 +#define BSS 3 + +typedef uint16_t VALUE; /* For symbol values */ /* * Address description. @@ -124,6 +138,7 @@ typedef unsigned int VALUE; /* For symbol values */ typedef struct ADDR { int a_type; /* Type */ VALUE a_value; /* Index offset, etc */ + int a_segment; /* Segment relative to */ } ADDR; /* @@ -134,6 +149,7 @@ typedef struct SYM { char s_id[NCPS]; /* Name */ int s_type; /* Type */ VALUE s_value; /* Value */ + int s_segment; /* Segment this symbol is relative to */ } SYM; /* @@ -157,7 +173,8 @@ extern SYM *phash[]; extern SYM *uhash[]; extern int lflag; extern jmp_buf env; -extern VALUE dot; +extern VALUE dot[NSEGMENT]; +extern int segment; extern void asmline(void); extern void asmld(void); @@ -184,12 +201,16 @@ extern void expr1(ADDR *, int, int); extern void expr2(ADDR *); extern void expr3(ADDR *, int); extern void isokaors(ADDR *, int); -extern void outaw(int); -extern void outab(int); +extern void outpass(void); +extern void outabsolute(int); +extern void outsegment(int); +extern void outaw(uint16_t); +extern void outab(uint8_t); +extern void outraw(ADDR *); +extern void outrab(ADDR *); extern void outeof(void); -extern void outbyte(int); +extern void outbyte(uint8_t); extern void outflush(void); -extern void outhex(int); extern void list(void); extern void list1(char *, int, int); extern void syminit(void); diff --git a/Applications/MWC/cmd/asz80/as0.c b/Applications/MWC/cmd/asz80/as0.c index 4fe954fb..59d1aa02 100644 --- a/Applications/MWC/cmd/asz80/as0.c +++ b/Applications/MWC/cmd/asz80/as0.c @@ -17,7 +17,8 @@ char *ip; int lflag; VALUE laddr; int lmode; -VALUE dot; +VALUE dot[NSEGMENT]; +int segment = CODE; SYM *phash[NHASH]; SYM *uhash[NHASH]; int pass; @@ -103,8 +104,9 @@ int main(int argc, char *argv[]) } syminit(); for (pass=0; pass<2; ++pass) { + outpass(); line = 0; - dot = 0; + memset(dot, 0, sizeof(dot)); fseek(ifp, 0L, 0); while (fgets(ib, NINPUT, ifp) != NULL) { ++line; diff --git a/Applications/MWC/cmd/asz80/as1.c b/Applications/MWC/cmd/asz80/as1.c index 336028fa..360732f9 100644 --- a/Applications/MWC/cmd/asz80/as1.c +++ b/Applications/MWC/cmd/asz80/as1.c @@ -52,12 +52,12 @@ void asmline(void) ADDR a1; ADDR a2; - laddr = dot; + laddr = dot[segment]; lmode = SLIST; loop: if ((c=getnb())=='\n' || c==';') return; - if (isalpha(c) == 0) + if (isalpha(c) == 0 && c != '_' && c != '.') qerr(); getid(id, c); if ((c=getnb()) == ':') { @@ -68,11 +68,12 @@ loop: sp->s_type |= TMMDF; sp->s_type &= ~TMMODE; sp->s_type |= TUSER; - sp->s_value = dot; + sp->s_value = dot[segment]; + sp->s_segment = segment; } else { if ((sp->s_type&TMMDF) != 0) err('m'); - if (sp->s_value != dot) + if (sp->s_value != dot[segment]) err('p'); } lmode = ALIST; @@ -100,6 +101,7 @@ loop: sp->s_type &= ~TMMODE; sp->s_type |= TUSER|TMASG; sp->s_value = a1.a_value; + sp->s_segment = a1.a_segment; laddr = a1.a_value; lmode = ALIST; goto loop; @@ -111,15 +113,34 @@ loop: case TORG: getaddr(&a1); istuser(&a1); + if (a1.a_segment != ABSOLUTE) + qerr(); lmode = ALIST; - laddr = dot = a1.a_value; + segment = 0; + laddr = dot[segment] = a1.a_value; + /* Tell the binary generator we've got a new absolute + segment. */ + outabsolute(laddr); + break; + + case TEXPORT: + getid(id, getnb()); + sp = lookup(id, phash, 1); + sp->s_type |= TPUBLIC; + break; + /* .code etc */ + case TSEGMENT: + segment = sp->s_value; + /* Tell the binary generator about a segment switch to a non + absolute segnent */ + outsegment(segment); break; case TDEFB: do { getaddr(&a1); istuser(&a1); - outab(a1.a_value); + outrab(&a1); } while ((c=getnb()) == ','); unget(c); break; @@ -129,7 +150,7 @@ loop: do { getaddr(&a1); istuser(&a1); - outaw(a1.a_value); + outraw(&a1); } while ((c=getnb()) == ','); unget(c); break; @@ -145,11 +166,14 @@ loop: break; case TDEFS: - laddr = dot; + laddr = dot[segment]; lmode = ALIST; getaddr(&a1); istuser(&a1); - dot += a1.a_value; + /* Write out the bytes. The BSS will deal with the rest */ + for (value = 0 ; value < a1.a_value; value++) + outab(0); + dot[segment] += a1.a_value; break; case TNOP: @@ -178,7 +202,7 @@ loop: getaddr(&a1); } istuser(&a1); - disp = a1.a_value-dot-2; + disp = a1.a_value-dot[segment]-2; if (disp<-128 || disp>127) aerr(); outab(opcode); @@ -214,7 +238,7 @@ loop: } istuser(&a1); outab(opcode); - outaw(a1.a_value); + outraw(&a1); break; case TPUSH: @@ -480,13 +504,13 @@ void asmld(void) return; } if (msrc == (TMINDIR|TUSER)) { - outab(0x3A); /* lda */ - outaw(src.a_value); + outab(0x3A); /* ld a,(addr) */ + outraw(&src); return; } if (msrc == (TMINDIR|TWR)) { if (rsrc==BC || rsrc==DE) { - outab(0x0A|(rsrc<<4)); /* ldax */ + outab(0x0A|(rsrc<<4)); /* ld a,(r16) */ return; } } @@ -500,57 +524,57 @@ void asmld(void) return; } if (mdst == (TMINDIR|TUSER)) { - outab(0x32); /* sta */ - outaw(dst.a_value); + outab(0x32); /* ld (addr),a */ + outraw(&dst); return; } if (mdst == (TMINDIR|TWR)) { if (rdst==BC || rdst==DE) { - outab(0x02|(rdst<<4)); /* stax */ + outab(0x02|(rdst<<4)); /* ld (r16),a */ return; } } } if (dst.a_type==(TWR|SP) && msrc==TWR) { if (rsrc == HL) { - outab(0xF9); /* sphl */ + outab(0xF9); /* ld sp,hl */ return; } } if (msrc == TUSER) { if (mdst == TBR) { - outab(0x06|(rdst<<3)); /* mvi */ + outab(0x06|(rdst<<3)); /* ld r8,#n */ if (indexap != NULL) outab(indexap->a_value); - outab(src.a_value); + outrab(&src); return; } if (mdst == TWR) { - outab(0x01|(rdst<<4)); /* lxi */ - outaw(src.a_value); + outab(0x01|(rdst<<4)); /* ld r16,#n */ + outraw(&src); return; } } if (mdst==TWR && msrc==(TMINDIR|TUSER)) { if (rdst == HL) - outab(0x2A); /* lhld */ + outab(0x2A); /* ld hl,(xxxx) */ else outaw(0x4BED|(rdst<<12)); /* ld rp,(ppqq) */ - outaw(src.a_value); + outraw(&src); return; } if (mdst==(TMINDIR|TUSER) && msrc==TWR) { if (rsrc == HL) - outab(0x22); /* shld */ + outab(0x22); /* ld (xxxx),hl */ else outaw(0x43ED|(rsrc<<12)); /* ld (ppqq),rp */ - outaw(dst.a_value); + outraw(&dst); return; } if (mdst==TBR && msrc==TBR && (rdst!=M || rsrc!=M)) { outab(0x40|(rdst<<3)|rsrc); if (indexap != NULL) - outab(indexap->a_value); + outrab(indexap); return; } aerr(); @@ -628,13 +652,13 @@ void outop(int op, ADDR *ap) if ((op&0xFF00) != 0) { outab(op>>8); if (needisp != 0) { - outab(ap->a_value); + outrab(ap); needisp = 0; } } outab(op); if (needisp != 0) - outab(ap->a_value); + outrab(ap); } /* diff --git a/Applications/MWC/cmd/asz80/as2.c b/Applications/MWC/cmd/asz80/as2.c index a7272a1c..67f4aef2 100644 --- a/Applications/MWC/cmd/asz80/as2.c +++ b/Applications/MWC/cmd/asz80/as2.c @@ -108,7 +108,7 @@ void getid(char *id, int c) if (c < 0) { c = getnb(); - if (isalpha(c) == 0) + if (isalpha(c) == 0 && c != '_' && c != '.') qerr(); } p = &id[0]; @@ -120,7 +120,7 @@ void getid(char *id, int c) } if ((c = *ip) != '\n') ++ip; - } while (c=='\'' || isalnum(c)!=0); + } while (c=='\'' || isalnum(c)!=0 || c == '_'); if (c != '\n') --ip; while (p < &id[NCPS]) @@ -154,6 +154,7 @@ SYM *lookup(char *id, SYM *htable[], int cf) htable[hash] = sp; sp->s_type = TNEW; sp->s_value = 0; + sp->s_segment = UNKNOWN; symcopy(sp->s_id, id); } return (sp); diff --git a/Applications/MWC/cmd/asz80/as3.c b/Applications/MWC/cmd/asz80/as3.c index bacbc447..7e8f9cbf 100644 --- a/Applications/MWC/cmd/asz80/as3.c +++ b/Applications/MWC/cmd/asz80/as3.c @@ -58,6 +58,40 @@ void getaddr(ADDR *ap) } } +static void chkabsolute(ADDR *a) +{ + /* Not symbols, doesn't matter */ + if ((a->a_type & TMMODE) != TUSER) + return; + if (a->a_segment != ABSOLUTE) + aerr(); +} + +static void chksegment(ADDR *left, ADDR *right, int op) +{ + /* Not symbols, doesn't matter */ + if ((left->a_type & TMMODE) != TUSER ||(right->a_type & TMMODE) != TUSER) + return; + + /* Anything goes with absolutes */ + if (left->a_segment == ABSOLUTE && right->a_segment == ABSOLUTE) + return; + + /* This relies on ABSOLUTE being 0, an addition of segment offset and + absolute either way around produces a segment offset */ + if ((left->a_segment == ABSOLUTE || right->a_segment == ABSOLUTE) && + op == '+') { + left->a_segment += right->a_segment; + return; + } + /* Subtraction within segment produces an absolute */ + if (left->a_segment == right->a_segment && op == '-') { + left->a_segment = ABSOLUTE; + return; + } + aerr(); +} + /* * Expression reader, * real work, part I. Read @@ -88,22 +122,29 @@ void expr1(ADDR *ap, int lpri, int paren) else ap->a_type = right.a_type; isokaors(ap, paren); + chksegment(ap, &right, '+'); ap->a_value += right.a_value; break; case '-': istuser(&right); isokaors(ap, paren); + chksegment(ap, &right, '-'); ap->a_value -= right.a_value; break; case '*': istuser(ap); istuser(&right); + chksegment(ap, &right, '*'); ap->a_value *= right.a_value; break; case '/': istuser(ap); istuser(&right); - ap->a_value /= right.a_value; + chksegment(ap, &right, '/'); + if (right.a_value == 0) + err('z'); + else + ap->a_value /= right.a_value; } } unget(c); @@ -131,18 +172,21 @@ void expr2(ADDR *ap) if (c == '-') { expr1(ap, HIPRI, 0); istuser(ap); + chkabsolute(ap); ap->a_value = -ap->a_value; return; } if (c == '~') { expr1(ap, HIPRI, 0); istuser(ap); + chkabsolute(ap); ap->a_value = ~ap->a_value; return; } if (c == '\'') { ap->a_type = TUSER; ap->a_value = get(); + ap->a_segment = ABSOLUTE; while ((c=get()) != '\'') { if (c == '\n') qerr(); @@ -150,7 +194,7 @@ void expr2(ADDR *ap) } return; } - if (c>='0' && c<='9') { + if (c>='0' && c<='9' || c == '$') { expr3(ap, c); return; } @@ -169,6 +213,7 @@ void expr2(ADDR *ap) uerr(id); ap->a_type = TUSER; ap->a_value = sp->s_value; + ap->a_segment = sp->s_segment; return; } qerr(); @@ -215,6 +260,20 @@ void expr3(ADDR *ap, int c) } np2 = &num[0]; value = 0; + /* No trailing tag, so look for 0octab, 0xhex and $xxxx */ + if (radix == 10) { + if (*np2 == '0') { + radix = 8; + np2++; + if (*np2 == 'x') { + radix = 16; + np2++; + } + } else if (*np2 =='$') { + radix = 16; + np2++; + } + } while (np2 < np1) { if ((c = *np2++)>='0' && c<='9') c -= '0'; @@ -228,6 +287,7 @@ void expr3(ADDR *ap, int c) } ap->a_type = TUSER; ap->a_value = value; + ap->a_segment = ABSOLUTE; } /* diff --git a/Applications/MWC/cmd/asz80/as4.c b/Applications/MWC/cmd/asz80/as4.c index d9a0853b..ec091988 100644 --- a/Applications/MWC/cmd/asz80/as4.c +++ b/Applications/MWC/cmd/asz80/as4.c @@ -3,119 +3,138 @@ * Output Intel compatable * hex files. */ + #include "as.h" +#include "obj.h" + +#define NHEX 8 /* Nice format size */ + +static uint16_t segsize[NSEGMENT]; +static uint16_t truesize[NSEGMENT]; +static off_t segbase[NSEGMENT]; + +struct objhdr obh; + +static void outc(char c); + +void outpass(void) +{ + off_t base = sizeof(obh); + int i; + if (pass == 1) { + /* Lay the file out */ + for (i = 1; i < NSEGMENT; i++) { + if (i != BSS) { + obh.o_segbase[i] = base; + segbase[i] = base; + printf("BASE %d %d\n", i, base); + base += segsize[i]; + printf("SIZE %d %d\n", i, truesize[i]); + } + obh.o_size[i] = truesize[i]; + } + obh.o_magic = 0; + obh.o_symbase = base; + obh.o_dbgbase = 0; /* for now */ + } +} -#define NHEX 32 /* Longest record */ +/* + * Absolute address change + */ + +void outabsolute(int addr) +{ +} -VALUE hexla; -VALUE hexpc; -char hexb[NHEX]; -char *hexp = &hexb[0]; +/* + * Segment change + */ + +void outsegment(int seg) +{ + /* Seek to the current writing address for this segment */ + if (pass == 1) + fseek(ofp, segbase[seg], SEEK_SET); +} /* * Output a word. Use the * standard Z-80 ordering (low * byte then high byte). */ -void outaw(int w) +void outaw(uint16_t w) { outab(w); outab(w >> 8); } -/* - * Output an absolute - * byte to the code and listing - * streams. - */ -void outab(int b) +void outraw(ADDR *a) { - if (pass != 0) { - if (cp < &cb[NCODE]) - *cp++ = b; - outbyte(b); + if (a->a_segment != ABSOLUTE) { + /* FIXME@ handle symbols */ + if (segment == BSS && a->a_value) + err('b'); + outbyte(REL_ESC); + outbyte((2 << 4) | REL_SIMPLE | a->a_segment); } - ++dot; + outaw(a->a_value); } /* - * Put out the end of file - * hex item at the very end of - * the object file. + * Output an absolute + * byte to the code and listing + * streams. */ -void outeof(void) +void outab(uint8_t b) { - outflush(); - fprintf(ofp, ":00000001FF\n"); + /* Not allowed to put data in the BSS except zero */ + if (segment == BSS && b) + err('b'); + if (segment == ABSOLUTE) + err('A'); + outbyte(b); + if (b == 0xDA) /* Quote relocation markers */ + outbyte(0x00); + ++dot[segment]; + ++truesize[segment]; + if (truesize[segment] == 0 || dot[segment] == 0) + err('o'); } -/* - * Output a hex byte. Flush - * the buffer if no room. Store the - * byte in the buffer, for future - * checksumming. Remember the load - * address for flushing. - */ -void outbyte(int b) +void outrab(ADDR *a) { - if (hexp>=&hexb[NHEX] || hexpc!=dot) { - outflush(); - hexp = &hexb[0]; + /* FIXME: handle symbols */ + if (segment == BSS && a->a_value) { + err('b'); + outbyte(REL_ESC); + outbyte((2 << 4) | REL_SIMPLE | a->a_segment); } - if (hexp == &hexb[0]) { - hexla = dot; - hexpc = dot; - } - *hexp++ = b; - ++hexpc; + outab(a->a_value); } /* - * Flush out a block of - * code to the hex file. Figure - * out the length word and the - * checksum byte. + * Put out the end of file + * hex item at the very end of + * the object file. */ -void outflush(void) +void outeof(void) { - char *p; - int b; - int c; - - if ((b = hexp-&hexb[0]) != 0) { - putc(':', ofp); - outhex(b); - outhex(hexla >> 8); - outhex(hexla); - outhex(0); - c = b + (hexla>>8) + hexla; - p = &hexb[0]; - while (p < hexp) { - b = *p++; - outhex(b); - c += b; - } - outhex(-c); - putc('\n', ofp); - } + rewind(ofp); + obh.o_magic = MAGIC_OBJ; + fwrite(&obh, sizeof(obh), 1, ofp); + printf("Code %d byyes: Data %d bytes: BSS %d bytes\n", + truesize[CODE], truesize[DATA], truesize[BSS]); } /* - * Put out "b", as a - * two character hex value. - * We cannot use printf because - * of case problems on VMS. - * Upper case ascii. + * Output a byte and track our position. For BSS we care about sizes + * only. */ -void outhex(int b) +void outbyte(uint8_t b) { - static const char hex[] = { - '0', '1', '2', '3', - '4', '5', '6', '7', - '8', '9', 'A', 'B', - 'C', 'D', 'E', 'F' - }; - - putc(hex[(b>>4)&0x0F], ofp); - putc(hex[b&0x0F], ofp); + if (pass == 1 && segment != BSS) + putc(b, ofp); + segbase[segment]++; + segsize[segment]++; } diff --git a/Applications/MWC/cmd/asz80/as6.c b/Applications/MWC/cmd/asz80/as6.c index 1e6b9772..7f43a490 100644 --- a/Applications/MWC/cmd/asz80/as6.c +++ b/Applications/MWC/cmd/asz80/as6.c @@ -44,8 +44,22 @@ SYM sym[] = { { 0, "defm", TDEFM, XXXX }, { 0, "org", TORG, XXXX }, { 0, "equ", TEQU, XXXX }, + { 0, "export", TEXPORT, XXXX }, + { 0, ".byte", TDEFB, XXXX }, + { 0, ".word", TDEFW, XXXX }, + { 0, ".blkb", TDEFS, XXXX }, + { 0, ".ascii", TDEFM, XXXX }, + { 0, ".org", TORG, XXXX }, + { 0, ".equ", TEQU, XXXX }, + { 0, ".export", TEXPORT, XXXX }, { 0, "cond", TCOND, XXXX }, { 0, "endc", TENDC, XXXX }, + { 0, "code", TSEGMENT, CODE }, + { 0, "data", TSEGMENT, DATA }, + { 0, "bss", TSEGMENT, BSS }, + { 0, ".code", TSEGMENT, CODE }, + { 0, ".data", TSEGMENT, DATA }, + { 0, ".bss", TSEGMENT, BSS }, { 0, "nop", TNOP, 0x0000 }, { 0, "rlca", TNOP, 0x0007 }, { 0, "rrca", TNOP, 0x000F },