From: Nick Downing Date: Tue, 2 Jan 2024 06:39:24 +0000 (+1100) Subject: Move Toomey's Perl /as7 to /doc, add /as7.py which is my Python translation X-Git-Url: https://git.ndcode.org/public/gitweb.cgi?a=commitdiff_plain;h=5d194b02c6bc6e47acc62de83e8e11a1e69c4811;p=b_18bit.git Move Toomey's Perl /as7 to /doc, add /as7.py which is my Python translation --- diff --git a/as7.py b/as7.py new file mode 100755 index 0000000..5e382ba --- /dev/null +++ b/as7.py @@ -0,0 +1,673 @@ +#!/usr/bin/env python3 +# +# Read in files of PDP-7 assembly code in Ken Thompson's as format +# and convert them into PDP-7 machine code +# +# (c) 2016 Warren Toomey, GPL3 +# Tweaked by Phil Budne (line, expression parsing, output formats) +# + +import getopt +import re +import sys + +EXIT_SUCCESS = 0 +EXIT_FAILURE = 1 + +### Global variables ### +var = {} # Variables such as ., .., t +g_label = {} # Global labels that are defined once +l_label = {} # Local labels that are defined once +is_local = {} # True if the label is a local label +r_label = {} # Relative labels, e.g. 1:, 2: + # with an array of locations for each label + +mem = [] # Actual PDP-7 memory locations +m_line = [] # Source lines associated with mem locations +origline = None # The original current input line of code +line = None # line being parsed +stage = 1 # Pass one or pass two +errors = EXIT_SUCCESS # set to EXIT_FAILURE on error +line_error = ' ' +file = None # current file name +lineno = None # current line number +fout = None # output file +RELATIVE = 0o1000000 # set on non-absolute symbol values +BASE = 0 | RELATIVE # starting value for "." + +## command line options +debug = False # Run in debug mode +format = 'a7out' # output format +namelist = False # output n.out file +output = 'a.out' # output file +no_label_warnings = False # suppress multiply defined label warnings + +### Subroutines ### + +# report an assembly error: +# sets error flag +# reports filename:lineno for emacs m-x compile +def err(_line_error, msg): + global line_error + line_error = _line_error + + errors = EXIT_FAILURE # exit status + if stage == 2: + sys.stderr.write(f'{file:s}:{lineno:d}: {msg:s}\n') + if format == 'list': + fout.write(f'{file:s}:{lineno:d}: {msg:s}\n') + return 0 # expression value + +# Set a label, either global or local +def set_label(label, loc): + # PLB: truncate to eight: moo.s declares "standing" + # but references it as "standings" + label = label[:8] + + # It is a local label if we're told it is, or if it starts with "L" + if (file in is_local and label in is_local[file]) or label[:1] == 'L': + # An error to have different values + if ( + file in l_label and + label in l_label[file] and + l_label[file][label] != loc + ): + # non-fatal: as.s doesn't even warn!!!! + if stage == 2 and not no_label_warnings: + sys.stderr.write( + f'{file:s}:{lineno:d}: Local label label multiply defined\n' + ) + else: + if file not in l_label: + l_label[file] = {} + l_label[file][label] = loc + if debug: + print(f'Set local label {label:s} to {loc:o}') + else: + # original as doesn't complain about multiple definitions of labels + # (Space Travel depends on this). Now a warning (on by default) + if label in g_label and g_label[label] != loc: + if stage == 2 and not no_label_warnings: + sys.stderr.write( + f'{file:s}:{lineno:d}: Warning: Global label label multiply defined\n' + ) + g_label[label] = loc + if debug: + print(f'Set global label {label:s} to {loc:o}') + +# Get the value of a global or local label +def get_label(label): + # PLB: truncate to eight: moo.s declares "standing" + # but references it as "standings" + label = label[:8] + if file in l_label and label in l_label[file]: + return l_label[file][label] + if label in g_label: + return g_label[label] + return None + +# Open and parse the given file +def parse_file(file): + global lineno, line, origline + + with open(file) as fin: + lineno = 0 + for _line in fin: + line = _line # does this matter? + + lineno += 1 + line = line.rstrip() # Lose the end of line + origline = line + if stage == 2 and line != '' and format == 'list': + fout.write(f'\t\t{line:s}\n') + parse_line() + +# process a label and set its value to the location counter +# OK for symbolic label to be entered twice, so long as it's the same value +# (ie; both passes) +re_numeric = re.compile('\\d+$') +def process_label(label): + loc = var['.'] + + if debug: + print('process_label', label) + + if re_numeric.match(label) is not None: # numeric (relative) label? + if stage == 1: + if label not in r_label: + r_label[label] = [] + r_label[label].append(loc) + if debug: + print(f'Pushing {loc:o} for label {label:s}') + else: # symbolic label + set_label(label, loc) + +# Parse assembler directives. These were not in the original +# PDP-7 Unix source, but we need them so that we can write +# compilers that target this assembler. +re_local = re.compile('\\@local\\s+(\\S+)') +def parse_directive(directive): + if debug: + print('Got directive', directive) + + # Set this as a local label + match = re_local.match(directive) + if match is not None: + if file not in is_local: + is_local[file] = set() + is_local[file].add(match.group(1)) + +def eol(): + return line == '' or line[0] == '"' # empty or comment + +# Blame Phil for this.... +# parses global line based on prefixes, nibbling of a bit at a time +# (: and ; can appear in char literals) +# handles multiple ';' separated words per line +# allows " in character literals (tho none appear in listings) +re_label = re.compile('([A-Za-z0-9_\\.]+):\\s*') +re_assignment = re.compile('([^;= \\t]+)\\s*=') +re_semicolon = re.compile('\\s*;?') +def parse_line(): + global line_error, line, origline + + while True: + line_error = ' ' # clear listing error indicator + + if eol(): + return + + if debug: + print(f'parse_line: \'{line:s}\'') + + # Lose any leading whitespace + line = line.lstrip() + + # Assembler directives start with an @ + if line[:1] == '@': + parse_directive(line) + return + + while True: # labels + match = re_label.match(line) + if match is None: + break + line = line[len(match.group(0)):] + process_label(match.group(1)) + + if eol(): + return + + match = re_assignment.match(line) + if match is not None: + line = line[len(match.group(0)):] + lhs = match.group(1) + word = parse_expression() + if debug: + print(f'Setting variable {lhs:s} to {word:o}') + var[lhs] = word + if stage == 2 and format == 'list': + fout.write(f'\t{word:06o} {line_error:s}\n') + else: # bare expression (not assignment) + # Get its value on pass two and save to memory + # Also save the input line that altered memory + word = parse_expression() + if stage == 2: + location = var['.'] + if location & RELATIVE: # non-absolute location? + location &= 0o777777 + location += var['..'] & 0o777777 # relocate + # XXX check for overflow? + if word & RELATIVE: # word created from relative addresses? + word &= 0o777777 + word += var['..'] & 0o777777 # relocate + # XXX check for overflow? + if location < 0: + err('.', 'below base') + else: + if len(mem) < location + 1: + mem.extend([None] * (location + 1 - len(mem))) + mem[location] = word + if len(m_line) < location + 1: + m_line.extend([None] * (location + 1 - len(m_line))) + m_line[location] = origline + origline = '' + if format == 'list' and word is not None: + # show flags?? + fout.write(f'{location:06o}: {word:06o} {line_error:s}\n') + # Move up to the next location in both passes + var['.'] += 1 + + # eat trailing whitespace and ";", if any + match = re_semicolon.match(line) + if match is not None: + line = line[len(match.group(0)):] + +# Blame Phil for this bit too... +# Parse an expression off line and return a PDP-7 word +# as a series of whitespace separated "syllables" +# ORed, added, or subtracted +re_symbol = re.compile('([A-Za-z_\\.][A-Za-z0-9_\\.]*)') +re_relative = re.compile('(\\d+)([fb])') +re_constant = re.compile('(\\d+)') +def parse_expression(): + global line + + word = 0 + flags = 0 + + if debug: + print(f'expression: \'{line:s}\'') + + while True: + syllable = 0 + op = '|' + + line = line.lstrip() # as.s accepts ",' as whitespace too! + + if ( + line == '' or + line[0] == '"' or + line[0] == ';' + ): # EOL ; and " terminate expr + word |= flags + if debug: + print(f'\tparse_expression => {word:o}') + return word + + if debug: + print(f' \'{line:s}\'') + + if line[:1] == '-': + line = line[1:] + if debug: + print('\tfound -') + op = '-' + elif line[:1] == '+': + line = line[1:] + if debug: + print('\tfound +') + op = '+' + + if len(line) >= 2 and line[0] == '<': # = 2 and line[1] == '>': # x> + if debug: + print('\tfound x>') + syllable = ord(line[0]) # absolute + line = line[2:] + elif len(line) >= 2 and line[0] == '>': # >char !! + if debug: + print('\tfound >x') + syllable = ord(line[1]) # absolute + else: + match = re_symbol.match(line) + if match is not None: + line = line[len(match.group(0)):] + # could optimize the below to match.group(0) and avoid () in the RE + sym = match.group(1) + if debug: + print(f'\tsym: {sym:s}') + + if sym in var: + syllable = var[sym] + if debug: + print(f'\tvar: {sym:s}: {syllable:o}') + elif get_label(sym) is not None: + syllable = get_label(sym) + if debug: + print(f'\tlbl: {sym:s}: {syllable:o}') + elif stage == 2: + err('U', 'sym not defined') + else: + match = re_relative.match(line) + if match is not None: + line = line[len(match.group(0)):] + if debug: + # could optimize the below to match.group(0) + print(f'\tfound relative: {match.group(1):s}{match.group(2):s}') + if stage == 2: + syllable = find_relative_label(match.group(1), match.group(2)) + else: + match = re_constant.match(line) + if match is not None: + line = line[len(match.group(0)):] + # could optimize the below to match.group(0) and avoid () in the RE + value = match.group(1) + if debug: + print(f'\tfound constant: {value:s}') + if value[:1] == '0': + # PLB 2020-10-05: behave like as.s + syllable = int(value, 8) + else: + syllable = int(value, 10) + syllable &= 0o777777 # absolute + else: + # From the BSD fortune file: + # Ken Thompson has an automobile which he helped design. + # Unlike most automobiles, it has neither speedometer, + # nor gas gauge, nor any of the numerous idiot lights + # which plague the modern driver. Rather, if the driver + # makes any mistake, a giant "?" lights up in the center + # of the dashboard. "The experienced driver", + # he says, "will usually know what's wrong. + err('?', f'huh? \'{line:s}\'') + line = '' # abort processing + return None + + sylflags = syllable & RELATIVE + syllable &= 0o777777 + + if op == '+': + word += syllable + flags |= sylflags + elif op == '-': + word -= syllable + if flags & RELATIVE: + # relative-relative => absolute! + if sylflags & RELATIVE: + flags &= ~RELATIVE + # else: relative-abs => relative (no change) + else: # word is absolute + if sylflags & RELATIVE: + err('A', 'absolute value minus relative??') + # else: absolute-absolute => absolute (no change) + else: + word |= syllable + flags |= sylflags + word &= 0o777777 + if debug: + print(f'\tsyllable: {syllable:o} word: {word:o}') + +# Given a relative label number and a direction, +# return the location of this relative label or +# die if we don't have one +def find_relative_label(label, direction): + curlocation = var['.'] + + # Error check: no labels at all + if label not in r_label: + err('U', 'relative label label never defined') + return None + + # Get the list of possible locations for this label + locarray = r_label[label] + + # Error check: no locations (CANNOT HAPPEN) + #if len(locarray) == 0: + # err('U', 'No relative labels') + # return None + + if direction == 'f': + # Search forward for first location larger then the current one + for reflocation in locarray: + if debug: + print(f'forward {reflocation:o} {curlocation:o}') + if reflocation > curlocation: + return reflocation + else: + # Search backwards for first location smaller than the current one + for reflocation in locarray[::-1]: + if debug: + print(f'backward {reflocation:o} {curlocation:o}') + if reflocation < curlocation: + return reflocation + err('U', 'undefined relative reference labeldirection') + return None + +def punch(word, final = 0): # output a word in paper tape binary format + fout.write( + bytes( + [ + ((word >> 12) & 0o77) | 0o200, + ((word >> 6) & 0o77) | 0o200, + (word & 0o77) | 0o200 | final + ] + ) + ) + +def dump_labels(file): # for 'list' and --namelist + print('xxx', g_label.items()) + for key, addr in sorted(g_label.items()): + flags = 'r' if addr & RELATIVE else '' # could move this into the if-block + if addr & RELATIVE: + addr &= 0o777777 + addr += var['..'] + # is masking necessary in the below? + file.write(f'{key:<8s} {addr & 0o777777:06o} {flags:s}\n') + +# keep this near the gnu_getopt call to make it easy to add documentation! +def usage(): + sys.stderr.write( + f'''Usage: {argv[0]:s} [--debug] [--format=a7out|list|ptr|rim] + [-n] [--out file] file1.s [file2.s ...] +''') + sys.exit(EXIT_FAILURE) + +### Main program ### + +try: + opts, args = getopt.gnu_getopt( + sys.argv[1:], + 'df:no:', + ['debug', 'format=', 'namelist', 'output=', 'no-label-warnings'] + ) +except getopt.GetoptError as err: + # print help information and exit: + print(err) # will print something like "option -a not recognized" + usage() +for o, a in opts: + if o == '-d' or o == '--debug': + debug = True + elif o == '-f' or o == '--format': + format = a + elif o == '-n' or o == '--namelist': + namelist = True + elif o == '-o' or o == '--output': + output = a + elif o == '--no-label-warnings': + no_label_warnings = True + else: + assert False +if len(args) < 1: + usage() + +# http://minnie.tuhs.org/cgi-bin/utree.pl?file=V3/man/manx/as.1 +# ".." is the relocation constant and is added to each relocatable +# reference. On a PDP-11 with relocation hardware, its value is 0; on +# most systems without protection, its value is 40000(8). + +# PLB: "relocatable" values are flagged with RELATIVE + +# start with the location counter at zero +# predefine syscall and opcodes as variables +var = { + '.': BASE, + '..': 4096, # output base addr? + + # as.s does not have an initial symbol table + # (except for the above), so there must have been a + # user "ops" file + + 'save': 1, # saves core dump & user area! + 'getuid': 2, + 'open': 3, + 'read': 4, + 'write': 5, + 'creat': 6, + 'seek': 7, + 'tell': 8, + 'close': 9, + 'link': 10, + 'unlink': 11, + 'setuid': 12, + 'rename': 13, + 'exit': 14, + 'time': 15, + 'intrp': 16, + 'chdir': 17, + 'chmod': 18, + 'chown': 19, + # 20 removed + 'sysloc': 21, # return system addresses + # 22 removed + 'capt': 23, # capture display? + 'rele': 24, # release display? + 'status': 25, # "stat" + 'smes': 27, + 'rmes': 28, + 'fork': 29, + + # List of instruction names and machine code values + # These come from https://raw.githubusercontent.com/simh/ + + 'sys': 0o020000, # "cal i" instruction (trap indirect thru 0o20) + 'i': 0o020000, # indirect bit + + # memory reference instructions + 'dac': 0o040000, # deposit AC + 'jms': 0o100000, # jump to subroutine + 'dzm': 0o140000, # deposit zero in memory + 'lac': 0o200000, # load AC + 'xor': 0o240000, # exclusive or + 'add': 0o300000, # one's complement add + 'tad': 0o340000, # two's complement add + 'xct': 0o400000, # execute + 'isz': 0o440000, # increment and skip if zero + 'and': 0o500000, # AND with contents of Y + 'sad': 0o540000, # skip if AC different from content of Y + 'jmp': 0o600000, # jump to Y + + # Type 177 Extended Arithmetic Element (EAE) + 'eae': 0o640000, # base instruction (nop) + 'osc': 0o640001, # OR SC into AC + 'omq': 0o640002, # OR MQ into AC + 'cmq': 0o640004, # Complement MQ + 'div': 0o640323, # divide + 'norm': 0o640444, # normalize, unsigned + 'lls': 0o640600, # long left shift + 'clls': 0o641600, # lls but clear AC first + 'als': 0o640700, # AC shift + 'lrs': 0o640500, # long right shift + 'ecla': 0o641000, # clear AC + 'lacs': 0o641001, # load AC with SC + 'lacq': 0o641002, # load AC with MQ + 'abs': 0o644000, # absolute value + 'divs': 0o644323, # divide, signed + + 'clq': 0o650000, # clear MQ + 'frdiv': 0o650323, # fractional divide + 'lmq': 0o652000, # load MQ from AC + 'mul': 0o653122, # multiply + 'idiv': 0o653323, # integer divide + 'idivs': 0o657323, # integer divide, signed + 'frdivs': 0o654323, # fractional divide, signed + 'muls': 0o657122, # multiply, signed + + 'norms': 0o660444, # normalize, signed + 'gsm': 0o664000, # get sign and magnitude + 'lrss': 0o660500, # long right shift, signed + 'llss': 0o660600, # long left shift, signed + 'alss': 0o660700, # AC left shift, signed + + # PLB: removed I/OT instructions: kernel uses sop.s + + # Operate Instructions + + # Group 1 (OPR 1) instructions + 'opr': 0o740000, # base operate instruction (nop) + 'nop': 0o740000, + 'cma': 0o740001, # complement accumulator + 'cml': 0o740002, # complement link + 'oas': 0o740004, # inclusive or accumulator switches + 'ral': 0o740010, # rotate (ac, link) left + 'rar': 0o740020, # rotate (ac, link) right + 'hlt': 0o740040, # HALT + 'xx': 0o740040, + 'sma': 0o740100, # skip on minus accumulator + 'sza': 0o740200, # skip on zero accumulator + 'snl': 0o740400, # skip on non-zero link + + 'skp': 0o741000, # unconditional skip + 'spa': 0o741100, # skip on positive accumulator + 'sna': 0o741200, # skip on negative accumulator + 'szl': 0o741400, # skip on zero link + + 'rtl': 0o742010, # rotate two left (ral*2) + 'rtr': 0o742020, # rotate two right (rar*2) + + 'cll': 0o744000, # clear link + 'stl': 0o744002, # set link + 'rcl': 0o744010, # clear link, rotate left + 'rcr': 0o744020, # clear link, rotate right + + 'cla': 0o750000, # clear accumulator + 'clc': 0o750001, # clear and complement acc + 'las': 0o750004, # load acc from switches + 'glk': 0o750010, # get link + + # Group 2 operate + 'law': 0o760000, # load accumulator with (instruction) +# 'lam': 0o777777, # (load accumulator minus) +} + +# Parse all the files +sys.stderr.write('I\n') # like the real as +for file in args: + parse_file(file) + +# Now do it all again, pass two +var['.'] = BASE +stage = 2 +with open( + output, + 'wb' if format == 'ptr' or format == 'rim' else 'w' +) as fout: + sys.stderr.write('II\n') # like the real as + for file in args: + sys.stderr.write(f'{file:s}\n') # like the real as + parse_file(file) + + if format == 'a7out': + # print out the contents of memory + for i in range(len(mem)): + if mem[i] is not None: + # note: the original as7 has a bug, as m_line of '0' counts as None + fout.write( + '{0:06o}: {1:06o}\t{2:s}\n'.format( + i, + mem[i], + m_line[i] if m_line[i] is not None else '' + ) + ) + elif format == 'list': + fout.write('\n') + fout.write('Labels:\n') + dump_labels(fout) + elif format == 'ptr': # dump absolute memory in PTR binary + # the below does not make any sense as '..' doesn't affect mem + #for loc in range(var['..'], len(mem)): + for loc in range(len(mem)): + punch(mem[loc] if mem[loc] is not None else 0) + elif format == 'rim': # "Hardware Read In" tape + # only handles continguous memory, but no overhead + base = var['..'] + # the below does not make any sense as '..' doesn't affect mem + #for loc in range(base, len(mem)): + for loc in range(len(mem)): + punch(mem[loc] if mem[loc] is not None else 0) + # final word: command; has 0o100 lit on last frame + punch(0o600000 | base, 0o100) + else: + sys.stderr.write(f'unknown format {format:s}\n') + sys.exit(EXIT_FAILURE) + +if namelist: + # as.s writes a binary file named n.out, ours is ascii + with open('n.out', 'w') as nfout: + dump_labels(nfout) + +sys.exit(errors) diff --git a/as7 b/doc/as7 similarity index 100% rename from as7 rename to doc/as7 diff --git a/n.sh b/n.sh index 239a167..b59dbf9 100755 --- a/n.sh +++ b/n.sh @@ -4,7 +4,7 @@ ./b hello.b hello.s # original version with opcode table, b library, hello, b interpreter -#./as7 doc/op.s doc/brt.s brtb.s hello.s doc/bi.s +#./as7.py doc/op.s doc/brt.s brtb.s hello.s doc/bi.s # cut-down version for the VM here -./as7 header.s brtb.s hello.s footer.s +./as7.py header.s brtb.s hello.s footer.s diff --git a/o.sh b/o.sh index 82ba7c4..92e69c9 100755 --- a/o.sh +++ b/o.sh @@ -3,7 +3,7 @@ ./b b.b b.s # original version with opcode table, b library, b, b interpreter -#./as7 doc/op.s doc/brt.s b.s doc/bi.s +#./as7.py doc/op.s doc/brt.s b.s doc/bi.s # cut-down version for the VM here -./as7 header.s b.s footer.s +./as7.py header.s b.s footer.s >a1 2>b1