--- /dev/null
+#!/usr/bin/env python3
+#
+# Read in files of PDP-7 assembly code in Ken Thompson's as format
+# and convert them into PDP-7 machine code
+#
+# (c) 2016 Warren Toomey, GPL3
+# Tweaked by Phil Budne (line, expression parsing, output formats)
+#
+
+import getopt
+import re
+import sys
+
+EXIT_SUCCESS = 0
+EXIT_FAILURE = 1
+
+### Global variables ###
+var = {} # Variables such as ., .., t
+g_label = {} # Global labels that are defined once
+l_label = {} # Local labels that are defined once
+is_local = {} # True if the label is a local label
+r_label = {} # Relative labels, e.g. 1:, 2:
+ # with an array of locations for each label
+
+mem = [] # Actual PDP-7 memory locations
+m_line = [] # Source lines associated with mem locations
+origline = None # The original current input line of code
+line = None # line being parsed
+stage = 1 # Pass one or pass two
+errors = EXIT_SUCCESS # set to EXIT_FAILURE on error
+line_error = ' '
+file = None # current file name
+lineno = None # current line number
+fout = None # output file
+RELATIVE = 0o1000000 # set on non-absolute symbol values
+BASE = 0 | RELATIVE # starting value for "."
+
+## command line options
+debug = False # Run in debug mode
+format = 'a7out' # output format
+namelist = False # output n.out file
+output = 'a.out' # output file
+no_label_warnings = False # suppress multiply defined label warnings
+
+### Subroutines ###
+
+# report an assembly error:
+# sets error flag
+# reports filename:lineno for emacs m-x compile
+def err(_line_error, msg):
+ global line_error
+ line_error = _line_error
+
+ errors = EXIT_FAILURE # exit status
+ if stage == 2:
+ sys.stderr.write(f'{file:s}:{lineno:d}: {msg:s}\n')
+ if format == 'list':
+ fout.write(f'{file:s}:{lineno:d}: {msg:s}\n')
+ return 0 # expression value
+
+# Set a label, either global or local
+def set_label(label, loc):
+ # PLB: truncate to eight: moo.s declares "standing"
+ # but references it as "standings"
+ label = label[:8]
+
+ # It is a local label if we're told it is, or if it starts with "L"
+ if (file in is_local and label in is_local[file]) or label[:1] == 'L':
+ # An error to have different values
+ if (
+ file in l_label and
+ label in l_label[file] and
+ l_label[file][label] != loc
+ ):
+ # non-fatal: as.s doesn't even warn!!!!
+ if stage == 2 and not no_label_warnings:
+ sys.stderr.write(
+ f'{file:s}:{lineno:d}: Local label label multiply defined\n'
+ )
+ else:
+ if file not in l_label:
+ l_label[file] = {}
+ l_label[file][label] = loc
+ if debug:
+ print(f'Set local label {label:s} to {loc:o}')
+ else:
+ # original as doesn't complain about multiple definitions of labels
+ # (Space Travel depends on this). Now a warning (on by default)
+ if label in g_label and g_label[label] != loc:
+ if stage == 2 and not no_label_warnings:
+ sys.stderr.write(
+ f'{file:s}:{lineno:d}: Warning: Global label label multiply defined\n'
+ )
+ g_label[label] = loc
+ if debug:
+ print(f'Set global label {label:s} to {loc:o}')
+
+# Get the value of a global or local label
+def get_label(label):
+ # PLB: truncate to eight: moo.s declares "standing"
+ # but references it as "standings"
+ label = label[:8]
+ if file in l_label and label in l_label[file]:
+ return l_label[file][label]
+ if label in g_label:
+ return g_label[label]
+ return None
+
+# Open and parse the given file
+def parse_file(file):
+ global lineno, line, origline
+
+ with open(file) as fin:
+ lineno = 0
+ for _line in fin:
+ line = _line # does this matter?
+
+ lineno += 1
+ line = line.rstrip() # Lose the end of line
+ origline = line
+ if stage == 2 and line != '' and format == 'list':
+ fout.write(f'\t\t{line:s}\n')
+ parse_line()
+
+# process a label and set its value to the location counter
+# OK for symbolic label to be entered twice, so long as it's the same value
+# (ie; both passes)
+re_numeric = re.compile('\\d+$')
+def process_label(label):
+ loc = var['.']
+
+ if debug:
+ print('process_label', label)
+
+ if re_numeric.match(label) is not None: # numeric (relative) label?
+ if stage == 1:
+ if label not in r_label:
+ r_label[label] = []
+ r_label[label].append(loc)
+ if debug:
+ print(f'Pushing {loc:o} for label {label:s}')
+ else: # symbolic label
+ set_label(label, loc)
+
+# Parse assembler directives. These were not in the original
+# PDP-7 Unix source, but we need them so that we can write
+# compilers that target this assembler.
+re_local = re.compile('\\@local\\s+(\\S+)')
+def parse_directive(directive):
+ if debug:
+ print('Got directive', directive)
+
+ # Set this as a local label
+ match = re_local.match(directive)
+ if match is not None:
+ if file not in is_local:
+ is_local[file] = set()
+ is_local[file].add(match.group(1))
+
+def eol():
+ return line == '' or line[0] == '"' # empty or comment
+
+# Blame Phil for this....
+# parses global line based on prefixes, nibbling of a bit at a time
+# (: and ; can appear in char literals)
+# handles multiple ';' separated words per line
+# allows " in character literals (tho none appear in listings)
+re_label = re.compile('([A-Za-z0-9_\\.]+):\\s*')
+re_assignment = re.compile('([^;= \\t]+)\\s*=')
+re_semicolon = re.compile('\\s*;?')
+def parse_line():
+ global line_error, line, origline
+
+ while True:
+ line_error = ' ' # clear listing error indicator
+
+ if eol():
+ return
+
+ if debug:
+ print(f'parse_line: \'{line:s}\'')
+
+ # Lose any leading whitespace
+ line = line.lstrip()
+
+ # Assembler directives start with an @
+ if line[:1] == '@':
+ parse_directive(line)
+ return
+
+ while True: # labels
+ match = re_label.match(line)
+ if match is None:
+ break
+ line = line[len(match.group(0)):]
+ process_label(match.group(1))
+
+ if eol():
+ return
+
+ match = re_assignment.match(line)
+ if match is not None:
+ line = line[len(match.group(0)):]
+ lhs = match.group(1)
+ word = parse_expression()
+ if debug:
+ print(f'Setting variable {lhs:s} to {word:o}')
+ var[lhs] = word
+ if stage == 2 and format == 'list':
+ fout.write(f'\t{word:06o} {line_error:s}\n')
+ else: # bare expression (not assignment)
+ # Get its value on pass two and save to memory
+ # Also save the input line that altered memory
+ word = parse_expression()
+ if stage == 2:
+ location = var['.']
+ if location & RELATIVE: # non-absolute location?
+ location &= 0o777777
+ location += var['..'] & 0o777777 # relocate
+ # XXX check for overflow?
+ if word & RELATIVE: # word created from relative addresses?
+ word &= 0o777777
+ word += var['..'] & 0o777777 # relocate
+ # XXX check for overflow?
+ if location < 0:
+ err('.', 'below base')
+ else:
+ if len(mem) < location + 1:
+ mem.extend([None] * (location + 1 - len(mem)))
+ mem[location] = word
+ if len(m_line) < location + 1:
+ m_line.extend([None] * (location + 1 - len(m_line)))
+ m_line[location] = origline
+ origline = ''
+ if format == 'list' and word is not None:
+ # show flags??
+ fout.write(f'{location:06o}: {word:06o} {line_error:s}\n')
+ # Move up to the next location in both passes
+ var['.'] += 1
+
+ # eat trailing whitespace and ";", if any
+ match = re_semicolon.match(line)
+ if match is not None:
+ line = line[len(match.group(0)):]
+
+# Blame Phil for this bit too...
+# Parse an expression off line and return a PDP-7 word
+# as a series of whitespace separated "syllables"
+# ORed, added, or subtracted
+re_symbol = re.compile('([A-Za-z_\\.][A-Za-z0-9_\\.]*)')
+re_relative = re.compile('(\\d+)([fb])')
+re_constant = re.compile('(\\d+)')
+def parse_expression():
+ global line
+
+ word = 0
+ flags = 0
+
+ if debug:
+ print(f'expression: \'{line:s}\'')
+
+ while True:
+ syllable = 0
+ op = '|'
+
+ line = line.lstrip() # as.s accepts ",' as whitespace too!
+
+ if (
+ line == '' or
+ line[0] == '"' or
+ line[0] == ';'
+ ): # EOL ; and " terminate expr
+ word |= flags
+ if debug:
+ print(f'\tparse_expression => {word:o}')
+ return word
+
+ if debug:
+ print(f' \'{line:s}\'')
+
+ if line[:1] == '-':
+ line = line[1:]
+ if debug:
+ print('\tfound -')
+ op = '-'
+ elif line[:1] == '+':
+ line = line[1:]
+ if debug:
+ print('\tfound +')
+ op = '+'
+
+ if len(line) >= 2 and line[0] == '<': # <char
+ if debug:
+ print('\tfound <x')
+ syllable = ord(line[1]) << 9 # absolute
+ line = line[2:]
+ elif len(line) >= 2 and line[1] == '>': # x>
+ if debug:
+ print('\tfound x>')
+ syllable = ord(line[0]) # absolute
+ line = line[2:]
+ elif len(line) >= 2 and line[0] == '>': # >char !!
+ if debug:
+ print('\tfound >x')
+ syllable = ord(line[1]) # absolute
+ else:
+ match = re_symbol.match(line)
+ if match is not None:
+ line = line[len(match.group(0)):]
+ # could optimize the below to match.group(0) and avoid () in the RE
+ sym = match.group(1)
+ if debug:
+ print(f'\tsym: {sym:s}')
+
+ if sym in var:
+ syllable = var[sym]
+ if debug:
+ print(f'\tvar: {sym:s}: {syllable:o}')
+ elif get_label(sym) is not None:
+ syllable = get_label(sym)
+ if debug:
+ print(f'\tlbl: {sym:s}: {syllable:o}')
+ elif stage == 2:
+ err('U', 'sym not defined')
+ else:
+ match = re_relative.match(line)
+ if match is not None:
+ line = line[len(match.group(0)):]
+ if debug:
+ # could optimize the below to match.group(0)
+ print(f'\tfound relative: {match.group(1):s}{match.group(2):s}')
+ if stage == 2:
+ syllable = find_relative_label(match.group(1), match.group(2))
+ else:
+ match = re_constant.match(line)
+ if match is not None:
+ line = line[len(match.group(0)):]
+ # could optimize the below to match.group(0) and avoid () in the RE
+ value = match.group(1)
+ if debug:
+ print(f'\tfound constant: {value:s}')
+ if value[:1] == '0':
+ # PLB 2020-10-05: behave like as.s
+ syllable = int(value, 8)
+ else:
+ syllable = int(value, 10)
+ syllable &= 0o777777 # absolute
+ else:
+ # From the BSD fortune file:
+ # Ken Thompson has an automobile which he helped design.
+ # Unlike most automobiles, it has neither speedometer,
+ # nor gas gauge, nor any of the numerous idiot lights
+ # which plague the modern driver. Rather, if the driver
+ # makes any mistake, a giant "?" lights up in the center
+ # of the dashboard. "The experienced driver",
+ # he says, "will usually know what's wrong.
+ err('?', f'huh? \'{line:s}\'')
+ line = '' # abort processing
+ return None
+
+ sylflags = syllable & RELATIVE
+ syllable &= 0o777777
+
+ if op == '+':
+ word += syllable
+ flags |= sylflags
+ elif op == '-':
+ word -= syllable
+ if flags & RELATIVE:
+ # relative-relative => absolute!
+ if sylflags & RELATIVE:
+ flags &= ~RELATIVE
+ # else: relative-abs => relative (no change)
+ else: # word is absolute
+ if sylflags & RELATIVE:
+ err('A', 'absolute value minus relative??')
+ # else: absolute-absolute => absolute (no change)
+ else:
+ word |= syllable
+ flags |= sylflags
+ word &= 0o777777
+ if debug:
+ print(f'\tsyllable: {syllable:o} word: {word:o}')
+
+# Given a relative label number and a direction,
+# return the location of this relative label or
+# die if we don't have one
+def find_relative_label(label, direction):
+ curlocation = var['.']
+
+ # Error check: no labels at all
+ if label not in r_label:
+ err('U', 'relative label label never defined')
+ return None
+
+ # Get the list of possible locations for this label
+ locarray = r_label[label]
+
+ # Error check: no locations (CANNOT HAPPEN)
+ #if len(locarray) == 0:
+ # err('U', 'No relative labels')
+ # return None
+
+ if direction == 'f':
+ # Search forward for first location larger then the current one
+ for reflocation in locarray:
+ if debug:
+ print(f'forward {reflocation:o} {curlocation:o}')
+ if reflocation > curlocation:
+ return reflocation
+ else:
+ # Search backwards for first location smaller than the current one
+ for reflocation in locarray[::-1]:
+ if debug:
+ print(f'backward {reflocation:o} {curlocation:o}')
+ if reflocation < curlocation:
+ return reflocation
+ err('U', 'undefined relative reference labeldirection')
+ return None
+
+def punch(word, final = 0): # output a word in paper tape binary format
+ fout.write(
+ bytes(
+ [
+ ((word >> 12) & 0o77) | 0o200,
+ ((word >> 6) & 0o77) | 0o200,
+ (word & 0o77) | 0o200 | final
+ ]
+ )
+ )
+
+def dump_labels(file): # for 'list' and --namelist
+ print('xxx', g_label.items())
+ for key, addr in sorted(g_label.items()):
+ flags = 'r' if addr & RELATIVE else '' # could move this into the if-block
+ if addr & RELATIVE:
+ addr &= 0o777777
+ addr += var['..']
+ # is masking necessary in the below?
+ file.write(f'{key:<8s} {addr & 0o777777:06o} {flags:s}\n')
+
+# keep this near the gnu_getopt call to make it easy to add documentation!
+def usage():
+ sys.stderr.write(
+ f'''Usage: {argv[0]:s} [--debug] [--format=a7out|list|ptr|rim]
+ [-n] [--out file] file1.s [file2.s ...]
+''')
+ sys.exit(EXIT_FAILURE)
+
+### Main program ###
+
+try:
+ opts, args = getopt.gnu_getopt(
+ sys.argv[1:],
+ 'df:no:',
+ ['debug', 'format=', 'namelist', 'output=', 'no-label-warnings']
+ )
+except getopt.GetoptError as err:
+ # print help information and exit:
+ print(err) # will print something like "option -a not recognized"
+ usage()
+for o, a in opts:
+ if o == '-d' or o == '--debug':
+ debug = True
+ elif o == '-f' or o == '--format':
+ format = a
+ elif o == '-n' or o == '--namelist':
+ namelist = True
+ elif o == '-o' or o == '--output':
+ output = a
+ elif o == '--no-label-warnings':
+ no_label_warnings = True
+ else:
+ assert False
+if len(args) < 1:
+ usage()
+
+# http://minnie.tuhs.org/cgi-bin/utree.pl?file=V3/man/manx/as.1
+# ".." is the relocation constant and is added to each relocatable
+# reference. On a PDP-11 with relocation hardware, its value is 0; on
+# most systems without protection, its value is 40000(8).
+
+# PLB: "relocatable" values are flagged with RELATIVE
+
+# start with the location counter at zero
+# predefine syscall and opcodes as variables
+var = {
+ '.': BASE,
+ '..': 4096, # output base addr?
+
+ # as.s does not have an initial symbol table
+ # (except for the above), so there must have been a
+ # user "ops" file
+
+ 'save': 1, # saves core dump & user area!
+ 'getuid': 2,
+ 'open': 3,
+ 'read': 4,
+ 'write': 5,
+ 'creat': 6,
+ 'seek': 7,
+ 'tell': 8,
+ 'close': 9,
+ 'link': 10,
+ 'unlink': 11,
+ 'setuid': 12,
+ 'rename': 13,
+ 'exit': 14,
+ 'time': 15,
+ 'intrp': 16,
+ 'chdir': 17,
+ 'chmod': 18,
+ 'chown': 19,
+ # 20 removed
+ 'sysloc': 21, # return system addresses
+ # 22 removed
+ 'capt': 23, # capture display?
+ 'rele': 24, # release display?
+ 'status': 25, # "stat"
+ 'smes': 27,
+ 'rmes': 28,
+ 'fork': 29,
+
+ # List of instruction names and machine code values
+ # These come from https://raw.githubusercontent.com/simh/
+
+ 'sys': 0o020000, # "cal i" instruction (trap indirect thru 0o20)
+ 'i': 0o020000, # indirect bit
+
+ # memory reference instructions
+ 'dac': 0o040000, # deposit AC
+ 'jms': 0o100000, # jump to subroutine
+ 'dzm': 0o140000, # deposit zero in memory
+ 'lac': 0o200000, # load AC
+ 'xor': 0o240000, # exclusive or
+ 'add': 0o300000, # one's complement add
+ 'tad': 0o340000, # two's complement add
+ 'xct': 0o400000, # execute
+ 'isz': 0o440000, # increment and skip if zero
+ 'and': 0o500000, # AND with contents of Y
+ 'sad': 0o540000, # skip if AC different from content of Y
+ 'jmp': 0o600000, # jump to Y
+
+ # Type 177 Extended Arithmetic Element (EAE)
+ 'eae': 0o640000, # base instruction (nop)
+ 'osc': 0o640001, # OR SC into AC
+ 'omq': 0o640002, # OR MQ into AC
+ 'cmq': 0o640004, # Complement MQ
+ 'div': 0o640323, # divide
+ 'norm': 0o640444, # normalize, unsigned
+ 'lls': 0o640600, # long left shift
+ 'clls': 0o641600, # lls but clear AC first
+ 'als': 0o640700, # AC shift
+ 'lrs': 0o640500, # long right shift
+ 'ecla': 0o641000, # clear AC
+ 'lacs': 0o641001, # load AC with SC
+ 'lacq': 0o641002, # load AC with MQ
+ 'abs': 0o644000, # absolute value
+ 'divs': 0o644323, # divide, signed
+
+ 'clq': 0o650000, # clear MQ
+ 'frdiv': 0o650323, # fractional divide
+ 'lmq': 0o652000, # load MQ from AC
+ 'mul': 0o653122, # multiply
+ 'idiv': 0o653323, # integer divide
+ 'idivs': 0o657323, # integer divide, signed
+ 'frdivs': 0o654323, # fractional divide, signed
+ 'muls': 0o657122, # multiply, signed
+
+ 'norms': 0o660444, # normalize, signed
+ 'gsm': 0o664000, # get sign and magnitude
+ 'lrss': 0o660500, # long right shift, signed
+ 'llss': 0o660600, # long left shift, signed
+ 'alss': 0o660700, # AC left shift, signed
+
+ # PLB: removed I/OT instructions: kernel uses sop.s
+
+ # Operate Instructions
+
+ # Group 1 (OPR 1) instructions
+ 'opr': 0o740000, # base operate instruction (nop)
+ 'nop': 0o740000,
+ 'cma': 0o740001, # complement accumulator
+ 'cml': 0o740002, # complement link
+ 'oas': 0o740004, # inclusive or accumulator switches
+ 'ral': 0o740010, # rotate (ac, link) left
+ 'rar': 0o740020, # rotate (ac, link) right
+ 'hlt': 0o740040, # HALT
+ 'xx': 0o740040,
+ 'sma': 0o740100, # skip on minus accumulator
+ 'sza': 0o740200, # skip on zero accumulator
+ 'snl': 0o740400, # skip on non-zero link
+
+ 'skp': 0o741000, # unconditional skip
+ 'spa': 0o741100, # skip on positive accumulator
+ 'sna': 0o741200, # skip on negative accumulator
+ 'szl': 0o741400, # skip on zero link
+
+ 'rtl': 0o742010, # rotate two left (ral*2)
+ 'rtr': 0o742020, # rotate two right (rar*2)
+
+ 'cll': 0o744000, # clear link
+ 'stl': 0o744002, # set link
+ 'rcl': 0o744010, # clear link, rotate left
+ 'rcr': 0o744020, # clear link, rotate right
+
+ 'cla': 0o750000, # clear accumulator
+ 'clc': 0o750001, # clear and complement acc
+ 'las': 0o750004, # load acc from switches
+ 'glk': 0o750010, # get link
+
+ # Group 2 operate
+ 'law': 0o760000, # load accumulator with (instruction)
+# 'lam': 0o777777, # (load accumulator minus)
+}
+
+# Parse all the files
+sys.stderr.write('I\n') # like the real as
+for file in args:
+ parse_file(file)
+
+# Now do it all again, pass two
+var['.'] = BASE
+stage = 2
+with open(
+ output,
+ 'wb' if format == 'ptr' or format == 'rim' else 'w'
+) as fout:
+ sys.stderr.write('II\n') # like the real as
+ for file in args:
+ sys.stderr.write(f'{file:s}\n') # like the real as
+ parse_file(file)
+
+ if format == 'a7out':
+ # print out the contents of memory
+ for i in range(len(mem)):
+ if mem[i] is not None:
+ # note: the original as7 has a bug, as m_line of '0' counts as None
+ fout.write(
+ '{0:06o}: {1:06o}\t{2:s}\n'.format(
+ i,
+ mem[i],
+ m_line[i] if m_line[i] is not None else ''
+ )
+ )
+ elif format == 'list':
+ fout.write('\n')
+ fout.write('Labels:\n')
+ dump_labels(fout)
+ elif format == 'ptr': # dump absolute memory in PTR binary
+ # the below does not make any sense as '..' doesn't affect mem
+ #for loc in range(var['..'], len(mem)):
+ for loc in range(len(mem)):
+ punch(mem[loc] if mem[loc] is not None else 0)
+ elif format == 'rim': # "Hardware Read In" tape
+ # only handles continguous memory, but no overhead
+ base = var['..']
+ # the below does not make any sense as '..' doesn't affect mem
+ #for loc in range(base, len(mem)):
+ for loc in range(len(mem)):
+ punch(mem[loc] if mem[loc] is not None else 0)
+ # final word: command; has 0o100 lit on last frame
+ punch(0o600000 | base, 0o100)
+ else:
+ sys.stderr.write(f'unknown format {format:s}\n')
+ sys.exit(EXIT_FAILURE)
+
+if namelist:
+ # as.s writes a binary file named n.out, ours is ascii
+ with open('n.out', 'w') as nfout:
+ dump_labels(nfout)
+
+sys.exit(errors)