From 81f84075692d5b766d88e879bf8d577a94cd6fc0 Mon Sep 17 00:00:00 2001 From: Nick Downing Date: Tue, 16 Apr 2019 21:13:29 +1000 Subject: [PATCH] Pristine Ack-5.5 --- Action | 276 + Copyright | 32 + NEW | 34 + README | 3 + TakeAction | 144 + bin/cc-and-mkdep.ack | 7 + bin/cc-and-mkdep.all | 20 + bin/cc-and-mkdep.sun | 7 + bin/do_deps | 18 + bin/do_resolve | 47 + bin/lint-lib.ack | 12 + bin/lint-lib.unix | 12 + bin/mk_manpage | 18 + bin/rm_deps | 8 + doc/6500.doc | 1893 ++++ doc/LLgen/LLgen.n | 1077 +++ doc/LLgen/LLgen.refs | 54 + doc/LLgen/proto.make | 20 + doc/Makefile | 82 + doc/READ_ME | 8 + doc/ack.doc | 444 + doc/ansi_C.doc | 365 + doc/basic.doc | 949 ++ doc/ceg/ceg.ref | 42 + doc/ceg/ceg.tr | 1587 ++++ doc/ceg/proto.make | 12 + doc/cg.doc | 1864 ++++ doc/crefman.doc | 629 ++ doc/ego/bo/bo1 | 162 + doc/ego/ca/ca1 | 65 + doc/ego/cf/cf1 | 94 + doc/ego/cf/cf2 | 50 + doc/ego/cf/cf3 | 53 + doc/ego/cf/cf4 | 93 + doc/ego/cf/cf5 | 82 + doc/ego/cf/cf6 | 21 + doc/ego/cj/cj1 | 144 + doc/ego/cs/cs1 | 45 + doc/ego/cs/cs2 | 86 + doc/ego/cs/cs3 | 250 + doc/ego/cs/cs4 | 311 + doc/ego/cs/cs5 | 46 + doc/ego/ic/ic1 | 57 + doc/ego/ic/ic2 | 150 + doc/ego/ic/ic3 | 431 + doc/ego/ic/ic4 | 83 + doc/ego/ic/ic5 | 166 + doc/ego/il/il1 | 112 + doc/ego/il/il2 | 93 + doc/ego/il/il3 | 164 + doc/ego/il/il4 | 135 + doc/ego/il/il5 | 446 + doc/ego/il/il6 | 27 + doc/ego/intro/head | 10 + doc/ego/intro/intro1 | 79 + doc/ego/intro/tail | 17 + doc/ego/lv/lv1 | 95 + doc/ego/ov/ov1 | 374 + doc/ego/proto.make | 64 + doc/ego/ra/ra1 | 33 + doc/ego/ra/ra2 | 139 + doc/ego/ra/ra3 | 386 + doc/ego/ra/ra4 | 28 + doc/ego/refs.gen | 120 + doc/ego/refs.opt | 546 ++ doc/ego/refs.stat | 29 + doc/ego/sp/sp1 | 184 + doc/ego/sr/sr1 | 47 + doc/ego/sr/sr2 | 223 + doc/ego/sr/sr3 | 244 + doc/ego/sr/sr4 | 28 + doc/ego/ud/ud1 | 58 + doc/ego/ud/ud2 | 64 + doc/ego/ud/ud3 | 26 + doc/ego/ud/ud4 | 78 + doc/ego/ud/ud5 | 19 + doc/em/READ_ME | 6 + doc/em/app.codes.nr | 153 + doc/em/app.exam.nr | 275 + doc/em/assem.nr | 802 ++ doc/em/cont.nr | 4 + doc/em/descr.nr | 153 + doc/em/dispat1.sed | 6 + doc/em/dispat2.sed | 6 + doc/em/dispat3.sed | 6 + doc/em/dspace.nr | 376 + doc/em/em.i | 1678 ++++ doc/em/env.nr | 193 + doc/em/even.c | 9 + doc/em/exam.e | 178 + doc/em/exam.p | 40 + doc/em/int/READ_ME | 5 + doc/em/int/em.p | 1768 ++++ doc/em/int/emdmp.c | 200 + doc/em/int/mktables.c | 237 + doc/em/int/proto.make | 33 + doc/em/intro.nr | 173 + doc/em/ip.awk | 11 + doc/em/ispace.nr | 57 + doc/em/mach.nr | 360 + doc/em/macr.nr | 113 + doc/em/mapping.nr | 232 + doc/em/mem.nr | 80 + doc/em/mkdispatch.c | 492 + doc/em/proto.make | 49 + doc/em/title.nr | 34 + doc/em/traps.nr | 169 + doc/em/types.nr | 142 + doc/i80.doc | 816 ++ doc/install.doc | 1237 +++ doc/install.pr | 1320 +++ doc/int/Makefile | 10 + doc/int/README | 4 + doc/int/appA | 280 + doc/int/appB | 486 + doc/int/bib | 25 + doc/int/cover | 26 + doc/int/draw.mac | 24 + doc/int/proto.make | 18 + doc/int/txt1 | 215 + doc/int/txt2 | 589 ++ doc/int/txt3 | 180 + doc/lint/abstract | 18 + doc/lint/appendix_A | 56 + doc/lint/appendix_B | 52 + doc/lint/chap1 | 34 + doc/lint/chap2 | 57 + doc/lint/chap3 | 294 + doc/lint/chap4 | 979 ++ doc/lint/chap5 | 107 + doc/lint/chap6 | 107 + doc/lint/chap7 | 139 + doc/lint/chap8 | 56 + doc/lint/chap9 | 48 + doc/lint/contents | 59 + doc/lint/frontpage | 14 + doc/lint/proto.make | 24 + doc/m2ref.doc | 545 ++ doc/m68020.doc | 1408 +++ doc/ncg.doc | 3020 +++++++ doc/nopt.doc | 591 ++ doc/occam/ctot | 8 + doc/occam/p0 | 21 + doc/occam/p1 | 87 + doc/occam/p2 | 151 + doc/occam/p3 | 337 + doc/occam/p4 | 42 + doc/occam/p5 | 18 + doc/occam/p6 | 5 + doc/occam/p7 | 23 + doc/occam/p8 | 16 + doc/occam/p9 | 60 + doc/occam/proto.make | 32 + doc/pascal/ab+intro.doc | 50 + doc/pascal/compar.doc | 89 + doc/pascal/conf.doc | 88 + doc/pascal/contents.doc | 41 + doc/pascal/deviations.doc | 118 + doc/pascal/example.doc | 92 + doc/pascal/extensions.doc | 60 + doc/pascal/hints.doc | 76 + doc/pascal/his.doc | 36 + doc/pascal/improv.doc | 87 + doc/pascal/internal.doc | 342 + doc/pascal/options.doc | 166 + doc/pascal/proto.make | 28 + doc/pascal/reference.doc | 50 + doc/pascal/rtl.doc | 85 + doc/pascal/syntax.doc | 269 + doc/pascal/test.doc | 19 + doc/pascal/titlepg.doc | 13 + doc/pascal/transpem.doc | 407 + doc/pascal/vrk.doc | 23 + doc/pcref.doc | 1204 +++ doc/peep.doc | 521 ++ doc/proto.make | 161 + doc/regadd.doc | 131 + doc/sparc/1 | 53 + doc/sparc/2 | 109 + doc/sparc/3 | 82 + doc/sparc/4 | 468 + doc/sparc/5 | 153 + doc/sparc/A | 184 + doc/sparc/B | 128 + doc/sparc/init | 20 + doc/sparc/intro | 23 + doc/sparc/note_on_reg_wins | 58 + doc/sparc/pics/EM_stack.orig | 34 + doc/sparc/pics/EM_stack.ours | 106 + doc/sparc/pics/compile_bars | 49 + doc/sparc/pics/mem_config | 34 + doc/sparc/pics/perf | 12 + doc/sparc/pics/perf.comp | 7 + doc/sparc/pics/perf.d | 4 + doc/sparc/pics/perf.dhry | 7 + doc/sparc/pics/reg_layout | 24 + doc/sparc/pics/run-time_bars | 101 + doc/sparc/pics/run-time_bars.bup | 100 + doc/sparc/pics/signal_stack | 42 + doc/sparc/proto.make | 32 + doc/sparc/refs | 185 + doc/sparc/timing | 22 + doc/sparc/title | 15 + doc/toolkit.doc | 896 ++ doc/top/proto.make | 11 + doc/top/refs.top | 84 + doc/top/top.n | 869 ++ doc/v7bugs.doc | 303 + doc/val.doc | 753 ++ doc/z80.doc | 80 + emtest/Makefile | 20 + emtest/READ_ME | 136 + emtest/ok | 10 + emtest/select.c | 256 + emtest/test.h | 8 + emtest/tests | 4597 ++++++++++ etc/em_table | 175 + etc/ip_spec.t | 354 + etc/new_table | 72 + etc/new_table_done | 0 etc/pop_push | 15 + etc/proto.make | 31 + etc/traps | 28 + fast/Action | 15 + fast/driver/afcc.1 | 158 + fast/driver/afm2.1 | 209 + fast/driver/afpc.1 | 227 + fast/driver/driver.c | 860 ++ fast/driver/proto.make | 57 + fast/f_c.ansi/Parameters | 143 + fast/f_c.ansi/proto.main | 65 + fast/f_c.ansi/proto.make | 268 + fast/f_c/Parameters | 148 + fast/f_c/proto.main | 65 + fast/f_c/proto.make | 264 + fast/f_m2/Parameters | 101 + fast/f_m2/proto.main | 65 + fast/f_m2/proto.make | 156 + fast/f_pc/Parameters | 62 + fast/f_pc/proto.main | 65 + fast/f_pc/proto.make | 157 + fcc/Action | 6 + fcc/cemcom/Parameters.sun3 | 148 + fcc/cemcom/Parameters.vax4 | 148 + fcc/cemcom/proto.main | 64 + fcc/cemcom/proto.make | 275 + fcc/driver/fcc.1 | 148 + fcc/driver/fcc.c | 676 ++ fcc/driver/proto.make | 38 + first/cp_dir | 16 + first/create_dir | 11 + first/em_path.h.src | 11 + first/first | 214 + first/get_answer | 5 + first/get_makepars | 104 + first/get_sys | 155 + first/get_sysvax | 21 + first/install_tail | 84 + first/limit_enquire | 207 + first/limit_impl | 206 + first/lint_params | 18 + first/local.h.src | 19 + first/mk_config | 130 + first/mk_makefile | 34 + first/mk_target | 39 + first/target_comp | 27 + first/util_comp | 23 + h/Makefile | 10 + h/arch.h | 25 + h/as_spec.h | 6 + h/bc_io.h | 33 + h/bc_string.h | 21 + h/cg_pattern.h | 161 + h/cgg_cg.h | 167 + h/em_abs.h | 35 + h/em_ego.h | 17 + h/em_flag.h | 30 + h/em_mes.h | 29 + h/em_mnem.h | 133 + h/em_pseu.h | 12 + h/em_ptyp.h | 25 + h/em_reg.h | 15 + h/em_spec.h | 29 + h/ip_spec.h | 40 + h/m2_traps.h | 16 + h/ocm_chan.h | 52 + h/ocm_parco.h | 23 + h/ocm_proc.h | 61 + h/out.h | 123 + h/pc_err.h | 29 + h/pc_file.h | 24 + h/pc_math.h | 42 + h/ranlib.h | 34 + h/stb.h | 25 + include/_tail_cc/assert.h | 19 + include/_tail_cc/ctype.h | 55 + include/_tail_cc/errno.h | 20 + include/_tail_cc/fcntl.h | 55 + include/_tail_cc/grp.h | 21 + include/_tail_cc/math.h | 81 + include/_tail_cc/pwd.h | 26 + include/_tail_cc/setjmp.h | 12 + include/_tail_cc/sgtty.h | 119 + include/_tail_cc/signal.h | 60 + include/_tail_cc/stdio.h | 83 + include/_tail_cc/sys/dir.h | 45 + include/_tail_cc/sys/errno.h | 144 + include/_tail_cc/sys/stat.h | 85 + include/_tail_cc/sys/stdtypes.h | 25 + include/_tail_cc/sys/types.h | 79 + include/_tail_cc/time.h | 27 + include/_tail_cc/varargs.h | 16 + include/_tail_mon/errno.h | 47 + include/_tail_mon/sgtty.h | 91 + include/_tail_mon/signal.h | 26 + include/_tail_mon/sys/timeb.h | 11 + include/_tail_mon/sys/types.h | 79 + include/occam/dec.ocm | 62 + include/occam/printd.ocm | 36 + include/occam/prints.ocm | 40 + lang/basic/lib/LIST | 34 + lang/basic/lib/tail_bc.a | Bin 0 -> 28140 bytes lang/basic/src/basic.g | 792 ++ lang/basic/src/basic.lex | 613 ++ lang/basic/src/bem.c | 53 + lang/basic/src/bem.h | 83 + lang/basic/src/compile.c | 30 + lang/basic/src/eval.c | 536 ++ lang/basic/src/func.c | 269 + lang/basic/src/gencode.c | 705 ++ lang/basic/src/graph.c | 340 + lang/basic/src/graph.h | 37 + lang/basic/src/initialize.c | 49 + lang/basic/src/llmess.c | 62 + lang/basic/src/maketokentab | 16 + lang/basic/src/parsepar.c | 85 + lang/basic/src/proto.main | 59 + lang/basic/src/proto.make | 116 + lang/basic/src/symbols.c | 376 + lang/basic/src/symbols.h | 88 + lang/basic/src/util.c | 97 + lang/basic/src/yylexp.c | 22 + lang/basic/test/Makefile | 15 + lang/basic/test/Out.std | 54 + lang/basic/test/bull.b | 37 + lang/basic/test/bull.b.g | 3 + lang/basic/test/buzzword.b | 66 + lang/basic/test/buzzword.b.g | 12 + lang/basic/test/checker.b | 192 + lang/basic/test/checker.b.g | 10 + lang/basic/test/creator.b | 29 + lang/basic/test/grafiek.b | 55 + lang/basic/test/grafiek.b.g | 34 + lang/basic/test/gunner.b | 58 + lang/basic/test/gunner.b.g | 1 + lang/basic/test/learn.b | 25 + lang/basic/test/learn.b.g | 1 + lang/basic/test/opg1.b | 6 + lang/basic/test/opg1.b.g | 1 + lang/basic/test/opg2.b | 16 + lang/basic/test/opg2.b.g | 1 + lang/basic/test/opg3.b | 12 + lang/basic/test/opg3.b.g | 21 + lang/basic/test/opg4.b | 10 + lang/basic/test/opg4.b.g | 21 + lang/basic/test/opg5.b | 13 + lang/basic/test/opg5.b.g | 1 + lang/basic/test/opg6.b | 14 + lang/basic/test/opg6.b.g | 18 + lang/basic/test/runcmp | 23 + lang/basic/test/som4.b | 17 + lang/basic/test/som4.b.g | 2 + lang/basic/test/test01.b | 5 + lang/basic/test/test01.b.g | 3 + lang/basic/test/test02.b | 2 + lang/basic/test/test02.b.g | 3 + lang/basic/test/test03.b | 2 + lang/basic/test/test03.b.g | 2 + lang/basic/test/test04.b | 5 + lang/basic/test/test04.b.g | 5 + lang/basic/test/test05.b | 7 + lang/basic/test/test05.b.g | 2 + lang/basic/test/test06.b | 5 + lang/basic/test/test06.b.g | 13 + lang/basic/test/test07.b | 6 + lang/basic/test/test07.b.g | 3 + lang/basic/test/test08.b | 8 + lang/basic/test/test08.b.g | 3 + lang/basic/test/test09.b | 18 + lang/basic/test/test09.b.g | 17 + lang/basic/test/test10.b | 7 + lang/basic/test/test10.b.g | 6 + lang/basic/test/test11.b | 4 + lang/basic/test/test11.b.g | 2 + lang/basic/test/test12.b | 6 + lang/basic/test/test12.b.g | 3 + lang/basic/test/test13.b | 7 + lang/basic/test/test13.b.g | 4 + lang/basic/test/test14.b | 11 + lang/basic/test/test14.b.g | 17 + lang/basic/test/test15.b | 6 + lang/basic/test/test15.b.g | 3 + lang/basic/test/test16.b | 11 + lang/basic/test/test16.b.g | 21 + lang/basic/test/test17.b | 3 + lang/basic/test/test17.b.g | 1 + lang/basic/test/test18.b | 11 + lang/basic/test/test18.b.g | 5 + lang/basic/test/test19.b | 15 + lang/basic/test/test19.b.g | 5 + lang/basic/test/test20.b | 9 + lang/basic/test/test20.b.g | 2 + lang/basic/test/test21.b | 10 + lang/basic/test/test21.b.g | 4 + lang/basic/test/test22.b | 9 + lang/basic/test/test22.b.g | 4 + lang/basic/test/test23.b | 11 + lang/basic/test/test23.b.g | 2 + lang/basic/test/test24.b | 8 + lang/basic/test/test24.b.g | 6 + lang/basic/test/test25.b | 7 + lang/basic/test/test25.b.g | 3 + lang/basic/test/test26.b | 17 + lang/basic/test/test26.b.g | 17 + lang/basic/test/test27.b | 12 + lang/basic/test/test27.b.g | 7 + lang/basic/test/test28.b | 7 + lang/basic/test/test28.b.g | 2 + lang/basic/test/test29.b | 14 + lang/basic/test/test29.b.g | 14 + lang/basic/test/test30.b | 8 + lang/basic/test/test30.b.g | 3 + lang/basic/test/test31.b | 11 + lang/basic/test/test31.b.g | 0 lang/basic/test/test32.b | 5 + lang/basic/test/test32.b.g | 3 + lang/basic/test/test33.b | 6 + lang/basic/test/test33.b.g | 2 + lang/basic/test/test34.b | 4 + lang/basic/test/test35.b | 2 + lang/basic/test/test35.b.g | 0 lang/basic/test/tst/data | 1 + lang/basic/test/tst/data1 | 2 + lang/cem/cemcom.ansi/BigPars | 143 + lang/cem/cemcom.ansi/LLlex.c | 849 ++ lang/cem/cemcom.ansi/LLlex.h | 65 + lang/cem/cemcom.ansi/LLmessage.c | 64 + lang/cem/cemcom.ansi/SmallPars | 143 + lang/cem/cemcom.ansi/align.h | 30 + lang/cem/cemcom.ansi/arith.c | 637 ++ lang/cem/cemcom.ansi/arith.h | 32 + lang/cem/cemcom.ansi/assert.h | 24 + lang/cem/cemcom.ansi/atw.h | 10 + lang/cem/cemcom.ansi/blocks.c | 200 + lang/cem/cemcom.ansi/cemcom.ansi.1 | 103 + lang/cem/cemcom.ansi/ch3.c | 729 ++ lang/cem/cemcom.ansi/ch3bin.c | 400 + lang/cem/cemcom.ansi/ch3mon.c | 167 + lang/cem/cemcom.ansi/char.tab | 67 + lang/cem/cemcom.ansi/class.h | 47 + lang/cem/cemcom.ansi/code.c | 802 ++ lang/cem/cemcom.ansi/code.str | 22 + lang/cem/cemcom.ansi/conversion.c | 152 + lang/cem/cemcom.ansi/cstoper.c | 260 + lang/cem/cemcom.ansi/dataflow.c | 37 + lang/cem/cemcom.ansi/declar.g | 767 ++ lang/cem/cemcom.ansi/declar.str | 41 + lang/cem/cemcom.ansi/declarator.c | 143 + lang/cem/cemcom.ansi/decspecs.c | 168 + lang/cem/cemcom.ansi/decspecs.h | 21 + lang/cem/cemcom.ansi/def.str | 40 + lang/cem/cemcom.ansi/domacro.c | 863 ++ lang/cem/cemcom.ansi/dumpidf.c | 503 ++ lang/cem/cemcom.ansi/error.c | 627 ++ lang/cem/cemcom.ansi/estack.str | 21 + lang/cem/cemcom.ansi/eval.c | 1037 +++ lang/cem/cemcom.ansi/expr.c | 502 ++ lang/cem/cemcom.ansi/expr.str | 99 + lang/cem/cemcom.ansi/expression.g | 351 + lang/cem/cemcom.ansi/field.c | 174 + lang/cem/cemcom.ansi/field.str | 15 + lang/cem/cemcom.ansi/file_info.h | 20 + lang/cem/cemcom.ansi/fltcstoper.c | 102 + lang/cem/cemcom.ansi/idf.c | 654 ++ lang/cem/cemcom.ansi/idf.str | 49 + lang/cem/cemcom.ansi/init.c | 96 + lang/cem/cemcom.ansi/input.c | 91 + lang/cem/cemcom.ansi/input.h | 15 + lang/cem/cemcom.ansi/interface.h | 8 + lang/cem/cemcom.ansi/ival.g | 766 ++ lang/cem/cemcom.ansi/l_brace.str | 28 + lang/cem/cemcom.ansi/l_class.h | 28 + lang/cem/cemcom.ansi/l_comment.c | 211 + lang/cem/cemcom.ansi/l_comment.h | 15 + lang/cem/cemcom.ansi/l_em.h | 77 + lang/cem/cemcom.ansi/l_ev_ord.c | 109 + lang/cem/cemcom.ansi/l_lint.c | 455 + lang/cem/cemcom.ansi/l_lint.h | 18 + lang/cem/cemcom.ansi/l_misc.c | 430 + lang/cem/cemcom.ansi/l_outdef.c | 620 ++ lang/cem/cemcom.ansi/l_outdef.str | 48 + lang/cem/cemcom.ansi/l_state.str | 93 + lang/cem/cemcom.ansi/l_states.c | 1315 +++ lang/cem/cemcom.ansi/label.c | 54 + lang/cem/cemcom.ansi/label.h | 28 + lang/cem/cemcom.ansi/level.h | 23 + lang/cem/cemcom.ansi/macro.str | 52 + lang/cem/cemcom.ansi/main.c | 514 ++ lang/cem/cemcom.ansi/make.allocd | 14 + lang/cem/cemcom.ansi/make.hfiles | 35 + lang/cem/cemcom.ansi/make.next | 7 + lang/cem/cemcom.ansi/make.tokcase | 41 + lang/cem/cemcom.ansi/make.tokfile | 11 + lang/cem/cemcom.ansi/mes.h | 8 + lang/cem/cemcom.ansi/options | 34 + lang/cem/cemcom.ansi/options.c | 317 + lang/cem/cemcom.ansi/pragma.c | 46 + lang/cem/cemcom.ansi/program.g | 238 + lang/cem/cemcom.ansi/proto.c | 474 + lang/cem/cemcom.ansi/proto.main | 65 + lang/cem/cemcom.ansi/proto.make | 272 + lang/cem/cemcom.ansi/proto.str | 22 + lang/cem/cemcom.ansi/replace.c | 807 ++ lang/cem/cemcom.ansi/replace.str | 51 + lang/cem/cemcom.ansi/sizes.h | 37 + lang/cem/cemcom.ansi/skip.c | 104 + lang/cem/cemcom.ansi/specials.h | 18 + lang/cem/cemcom.ansi/stab.c | 308 + lang/cem/cemcom.ansi/stack.c | 281 + lang/cem/cemcom.ansi/stack.str | 34 + lang/cem/cemcom.ansi/statement.g | 509 ++ lang/cem/cemcom.ansi/stb.c | 15 + lang/cem/cemcom.ansi/stmt.str | 14 + lang/cem/cemcom.ansi/struct.c | 474 + lang/cem/cemcom.ansi/struct.str | 30 + lang/cem/cemcom.ansi/switch.c | 248 + lang/cem/cemcom.ansi/switch.str | 29 + lang/cem/cemcom.ansi/tokenname.c | 148 + lang/cem/cemcom.ansi/tokenname.h | 13 + lang/cem/cemcom.ansi/type.c | 312 + lang/cem/cemcom.ansi/type.str | 85 + lang/cem/cemcom.ansi/util.c | 247 + lang/cem/cemcom.ansi/util.str | 11 + lang/cem/cemcom/BigPars | 151 + lang/cem/cemcom/LLlex.c | 580 ++ lang/cem/cemcom/LLlex.h | 73 + lang/cem/cemcom/LLmessage.c | 67 + lang/cem/cemcom/SmallPars | 152 + lang/cem/cemcom/align.h | 34 + lang/cem/cemcom/arith.c | 522 ++ lang/cem/cemcom/arith.h | 27 + lang/cem/cemcom/asm.c | 16 + lang/cem/cemcom/assert.h | 24 + lang/cem/cemcom/atw.h | 10 + lang/cem/cemcom/blocks.c | 168 + lang/cem/cemcom/cemcom.1 | 99 + lang/cem/cemcom/ch7.c | 442 + lang/cem/cemcom/ch7bin.c | 377 + lang/cem/cemcom/ch7mon.c | 161 + lang/cem/cemcom/char.tab | 57 + lang/cem/cemcom/class.h | 41 + lang/cem/cemcom/code.c | 732 ++ lang/cem/cemcom/code.str | 22 + lang/cem/cemcom/conversion.c | 156 + lang/cem/cemcom/cstoper.c | 237 + lang/cem/cemcom/dataflow.c | 37 + lang/cem/cemcom/declar.g | 540 ++ lang/cem/cemcom/declar.str | 38 + lang/cem/cemcom/declarator.c | 112 + lang/cem/cemcom/decspecs.c | 111 + lang/cem/cemcom/decspecs.str | 18 + lang/cem/cemcom/def.str | 40 + lang/cem/cemcom/domacro.c | 759 ++ lang/cem/cemcom/dumpidf.c | 402 + lang/cem/cemcom/error.c | 497 ++ lang/cem/cemcom/estack.str | 21 + lang/cem/cemcom/eval.c | 1008 +++ lang/cem/cemcom/expr.c | 512 ++ lang/cem/cemcom/expr.str | 114 + lang/cem/cemcom/expression.g | 337 + lang/cem/cemcom/field.c | 182 + lang/cem/cemcom/field.str | 16 + lang/cem/cemcom/file_info.h | 20 + lang/cem/cemcom/idf.c | 737 ++ lang/cem/cemcom/idf.str | 51 + lang/cem/cemcom/init.c | 82 + lang/cem/cemcom/input.c | 83 + lang/cem/cemcom/input.h | 7 + lang/cem/cemcom/interface.h | 8 + lang/cem/cemcom/ival.g | 710 ++ lang/cem/cemcom/l_brace.str | 28 + lang/cem/cemcom/l_class.h | 32 + lang/cem/cemcom/l_comment.c | 211 + lang/cem/cemcom/l_comment.h | 15 + lang/cem/cemcom/l_em.h | 77 + lang/cem/cemcom/l_ev_ord.c | 109 + lang/cem/cemcom/l_lint.c | 455 + lang/cem/cemcom/l_lint.h | 18 + lang/cem/cemcom/l_misc.c | 427 + lang/cem/cemcom/l_outdef.c | 583 ++ lang/cem/cemcom/l_outdef.str | 48 + lang/cem/cemcom/l_state.str | 93 + lang/cem/cemcom/l_states.c | 1308 +++ lang/cem/cemcom/label.c | 74 + lang/cem/cemcom/label.h | 28 + lang/cem/cemcom/level.h | 19 + lang/cem/cemcom/macro.str | 60 + lang/cem/cemcom/main.c | 483 + lang/cem/cemcom/make.allocd | 8 + lang/cem/cemcom/make.hfiles | 35 + lang/cem/cemcom/make.next | 3 + lang/cem/cemcom/make.tokcase | 41 + lang/cem/cemcom/make.tokfile | 11 + lang/cem/cemcom/mcomm.c | 246 + lang/cem/cemcom/mes.h | 8 + lang/cem/cemcom/options | 32 + lang/cem/cemcom/options.c | 369 + lang/cem/cemcom/program.g | 212 + lang/cem/cemcom/proto.main | 65 + lang/cem/cemcom/proto.make | 266 + lang/cem/cemcom/replace.c | 242 + lang/cem/cemcom/scan.c | 239 + lang/cem/cemcom/sizes.h | 32 + lang/cem/cemcom/skip.c | 113 + lang/cem/cemcom/specials.h | 18 + lang/cem/cemcom/stab.c | 307 + lang/cem/cemcom/stack.c | 281 + lang/cem/cemcom/stack.str | 34 + lang/cem/cemcom/statement.g | 520 ++ lang/cem/cemcom/stb.c | 15 + lang/cem/cemcom/stmt.str | 14 + lang/cem/cemcom/struct.c | 499 ++ lang/cem/cemcom/struct.str | 30 + lang/cem/cemcom/switch.c | 238 + lang/cem/cemcom/switch.str | 29 + lang/cem/cemcom/tokenname.c | 147 + lang/cem/cemcom/tokenname.h | 13 + lang/cem/cemcom/type.c | 228 + lang/cem/cemcom/type.str | 55 + lang/cem/cemcom/util.c | 242 + lang/cem/cemcom/util.str | 11 + lang/cem/cpp.ansi/LLlex.c | 576 ++ lang/cem/cpp.ansi/LLlex.h | 46 + lang/cem/cpp.ansi/LLmessage.c | 24 + lang/cem/cpp.ansi/Parameters | 72 + lang/cem/cpp.ansi/arith.h | 18 + lang/cem/cpp.ansi/bits.h | 18 + lang/cem/cpp.ansi/ch3bin.c | 173 + lang/cem/cpp.ansi/ch3mon.c | 27 + lang/cem/cpp.ansi/char.tab | 67 + lang/cem/cpp.ansi/class.h | 47 + lang/cem/cpp.ansi/domacro.c | 811 ++ lang/cem/cpp.ansi/error.c | 176 + lang/cem/cpp.ansi/expr.c | 58 + lang/cem/cpp.ansi/expression.g | 136 + lang/cem/cpp.ansi/file_info.h | 18 + lang/cem/cpp.ansi/idf.c | 7 + lang/cem/cpp.ansi/idf.h | 20 + lang/cem/cpp.ansi/init.c | 93 + lang/cem/cpp.ansi/input.c | 62 + lang/cem/cpp.ansi/input.h | 15 + lang/cem/cpp.ansi/macro.str | 47 + lang/cem/cpp.ansi/main.c | 160 + lang/cem/cpp.ansi/make.allocd | 8 + lang/cem/cpp.ansi/make.hfiles | 35 + lang/cem/cpp.ansi/make.next | 3 + lang/cem/cpp.ansi/make.tokcase | 34 + lang/cem/cpp.ansi/make.tokfile | 6 + lang/cem/cpp.ansi/ncpp.6 | 95 + lang/cem/cpp.ansi/options.c | 143 + lang/cem/cpp.ansi/preprocess.c | 458 + lang/cem/cpp.ansi/proto.main | 54 + lang/cem/cpp.ansi/proto.make | 155 + lang/cem/cpp.ansi/replace.c | 796 ++ lang/cem/cpp.ansi/replace.str | 50 + lang/cem/cpp.ansi/skip.c | 92 + lang/cem/cpp.ansi/tokenname.c | 72 + lang/cem/ctest/Out2.nf.std | 906 ++ lang/cem/ctest/Out2.std | 531 ++ lang/cem/ctest/Out4.nf.std | 543 ++ lang/cem/ctest/Out4.std | 150 + lang/cem/ctest/READ_ME | 62 + lang/cem/ctest/ctconv/conv.c | 153 + lang/cem/ctest/ctconv/conv.cem.g | 34 + lang/cem/ctest/ctconv/run | 1 + lang/cem/ctest/ctdecl/decl.c | 76 + lang/cem/ctest/ctdecl/decl.cem.g | 7 + lang/cem/ctest/ctdecl/run | 1 + lang/cem/ctest/ctdivers/ops.c | 156 + lang/cem/ctest/ctdivers/ops.cem.g | 121 + lang/cem/ctest/ctdivers/run | 1 + lang/cem/ctest/cterr/bugs.c | 169 + lang/cem/ctest/cterr/bugs.cem.g | 10 + lang/cem/ctest/cterr/run | 1 + lang/cem/ctest/ctest1/run | 1 + lang/cem/ctest/ctest1/test.c | 1245 +++ lang/cem/ctest/ctest1/test.cem.g | 1 + lang/cem/ctest/ctest2/run | 1 + lang/cem/ctest/ctest2/t7.c | 628 ++ lang/cem/ctest/ctest2/t7.cem.g | 1 + lang/cem/ctest/ctest3/run | 1 + lang/cem/ctest/ctest3/test2.c | 450 + lang/cem/ctest/ctest3/test2.cem.g | 1 + lang/cem/ctest/ctest5/run | 1 + lang/cem/ctest/ctest5/test1.c | 471 + lang/cem/ctest/ctest5/test1.cem.g | 3 + lang/cem/ctest/ctgen/OPS | 143 + lang/cem/ctest/ctgen/bf.cem.g | 111 + lang/cem/ctest/ctgen/bf.sed | 26 + lang/cem/ctest/ctgen/bfu.cem.g | 111 + lang/cem/ctest/ctgen/bfu.sed | 26 + lang/cem/ctest/ctgen/cel.cem.g | 111 + lang/cem/ctest/ctgen/cel.sed | 22 + lang/cem/ctest/ctgen/clu.cem.g | 111 + lang/cem/ctest/ctgen/clu.sed | 22 + lang/cem/ctest/ctgen/ec.cem.g | 111 + lang/cem/ctest/ctgen/ec.sed | 22 + lang/cem/ctest/ctgen/ef.cem.g | 72 + lang/cem/ctest/ctgen/ef.sed | 33 + lang/cem/ctest/ctgen/ei.cem.g | 111 + lang/cem/ctest/ctgen/ei.sed | 23 + lang/cem/ctest/ctgen/el.cem.g | 111 + lang/cem/ctest/ctgen/el.sed | 22 + lang/cem/ctest/ctgen/eu.cem.g | 111 + lang/cem/ctest/ctgen/eu.sed | 22 + lang/cem/ctest/ctgen/id.cem.g | 72 + lang/cem/ctest/ctgen/id.sed | 33 + lang/cem/ctest/ctgen/lc.cem.g | 111 + lang/cem/ctest/ctgen/lc.sed | 22 + lang/cem/ctest/ctgen/ld.cem.g | 72 + lang/cem/ctest/ctgen/ld.sed | 33 + lang/cem/ctest/ctgen/lf.cem.g | 72 + lang/cem/ctest/ctgen/lf.sed | 33 + lang/cem/ctest/ctgen/li.cem.g | 111 + lang/cem/ctest/ctgen/li.sed | 23 + lang/cem/ctest/ctgen/ll.cem.g | 111 + lang/cem/ctest/ctgen/ll.sed | 22 + lang/cem/ctest/ctgen/lu.cem.g | 111 + lang/cem/ctest/ctgen/lu.sed | 22 + lang/cem/ctest/ctgen/makefile | 2 + lang/cem/ctest/ctgen/mkc | 1 + lang/cem/ctest/ctgen/run | 4 + lang/cem/ctest/ctgen/run1 | 9 + lang/cem/ctest/ctill/noarg.c | 14 + lang/cem/ctest/ctill/noarg.cem.g | 3 + lang/cem/ctest/ctill/run | 2 + lang/cem/ctest/ctinit/init.c | 253 + lang/cem/ctest/ctinit/init.cem.g | 112 + lang/cem/ctest/ctinit/run | 1 + lang/cem/ctest/ctmargt/margt.c | 21 + lang/cem/ctest/ctmargt/margt.cem.g | 14 + lang/cem/ctest/ctmargt/run | 1 + lang/cem/ctest/ctprof/makefile | 28 + lang/cem/ctest/ctprof/procentry.c | 27 + lang/cem/ctest/ctprof/run | 10 + lang/cem/ctest/ctprof/tp.c | 18 + lang/cem/ctest/ctprof/tp.cem.g | 16 + lang/cem/ctest/ctsetjmp/run | 1 + lang/cem/ctest/ctsetjmp/stjmp.c | 242 + lang/cem/ctest/ctsetjmp/stjmp.cem.g | 13 + lang/cem/ctest/ctstruct/run | 1 + lang/cem/ctest/ctstruct/str.c | 168 + lang/cem/ctest/ctstruct/str.cem.g | 152 + lang/cem/ctest/ctsys/run | 1 + lang/cem/ctest/ctsys/signal.c | 18 + lang/cem/ctest/ctsys/tfork.c | 37 + lang/cem/ctest/ctsys/tfork.cem.g | 1 + lang/cem/ctest/local.h | 14 + lang/cem/ctest/makefile.std | 53 + lang/cem/ctest/run | 28 + lang/cem/libcc.ansi/LIST | 13 + lang/cem/libcc.ansi/assert/LIST | 1 + lang/cem/libcc.ansi/assert/Makefile | 2 + lang/cem/libcc.ansi/assert/assert.c | 14 + lang/cem/libcc.ansi/ctype/LIST | 15 + lang/cem/libcc.ansi/ctype/Makefile | 15 + lang/cem/libcc.ansi/ctype/char.tab | 26 + lang/cem/libcc.ansi/ctype/genfiles | 12 + lang/cem/libcc.ansi/ctype/tolower.c | 5 + lang/cem/libcc.ansi/ctype/toupper.c | 5 + lang/cem/libcc.ansi/errno/LIST | 1 + lang/cem/libcc.ansi/errno/Makefile | 2 + lang/cem/libcc.ansi/errno/errlist.c | 47 + lang/cem/libcc.ansi/head_ac.e | 100 + lang/cem/libcc.ansi/headers/LIST | 20 + lang/cem/libcc.ansi/headers/Makefile | 2 + lang/cem/libcc.ansi/headers/assert.h | 23 + lang/cem/libcc.ansi/headers/ctype.h | 47 + lang/cem/libcc.ansi/headers/dirent.h | 34 + lang/cem/libcc.ansi/headers/errno.h | 20 + lang/cem/libcc.ansi/headers/float.h | 79 + lang/cem/libcc.ansi/headers/grp.h | 21 + lang/cem/libcc.ansi/headers/limits.h | 37 + lang/cem/libcc.ansi/headers/locale.h | 43 + lang/cem/libcc.ansi/headers/math.h | 43 + lang/cem/libcc.ansi/headers/mathconst.h | 29 + lang/cem/libcc.ansi/headers/setjmp.h | 39 + lang/cem/libcc.ansi/headers/signal.h | 97 + lang/cem/libcc.ansi/headers/stdarg.h | 20 + lang/cem/libcc.ansi/headers/stddef.h | 34 + lang/cem/libcc.ansi/headers/stdio.h | 135 + lang/cem/libcc.ansi/headers/stdlib.h | 64 + lang/cem/libcc.ansi/headers/string.h | 42 + lang/cem/libcc.ansi/headers/sys/dirent.h | 35 + lang/cem/libcc.ansi/headers/sys/errno.h | 144 + lang/cem/libcc.ansi/headers/time.h | 64 + lang/cem/libcc.ansi/headers/varargs.h | 12 + lang/cem/libcc.ansi/locale/LIST | 2 + lang/cem/libcc.ansi/locale/Makefile | 2 + lang/cem/libcc.ansi/locale/localeconv.c | 36 + lang/cem/libcc.ansi/locale/setlocale.c | 28 + lang/cem/libcc.ansi/math/LIST | 22 + lang/cem/libcc.ansi/math/Makefile | 4 + lang/cem/libcc.ansi/math/asin.c | 82 + lang/cem/libcc.ansi/math/atan.c | 72 + lang/cem/libcc.ansi/math/atan2.c | 42 + lang/cem/libcc.ansi/math/ceil.c | 20 + lang/cem/libcc.ansi/math/exp.c | 72 + lang/cem/libcc.ansi/math/fabs.c | 13 + lang/cem/libcc.ansi/math/floor.c | 20 + lang/cem/libcc.ansi/math/fmod.c | 34 + lang/cem/libcc.ansi/math/frexp.e | 18 + lang/cem/libcc.ansi/math/hugeval.c | 14 + lang/cem/libcc.ansi/math/isnan.c | 11 + lang/cem/libcc.ansi/math/ldexp.c | 55 + lang/cem/libcc.ansi/math/localmath.h | 42 + lang/cem/libcc.ansi/math/log.c | 67 + lang/cem/libcc.ansi/math/log10.c | 30 + lang/cem/libcc.ansi/math/modf.e | 22 + lang/cem/libcc.ansi/math/pow.c | 100 + lang/cem/libcc.ansi/math/sin.c | 99 + lang/cem/libcc.ansi/math/sinh.c | 81 + lang/cem/libcc.ansi/math/sqrt.c | 43 + lang/cem/libcc.ansi/math/tan.c | 76 + lang/cem/libcc.ansi/math/tanh.c | 55 + lang/cem/libcc.ansi/misc/LIST | 22 + lang/cem/libcc.ansi/misc/Makefile | 5 + lang/cem/libcc.ansi/misc/closedir.c | 36 + lang/cem/libcc.ansi/misc/environ.c | 20 + lang/cem/libcc.ansi/misc/fdopen.c | 61 + lang/cem/libcc.ansi/misc/getdents.c | 289 + lang/cem/libcc.ansi/misc/getgrent.c | 135 + lang/cem/libcc.ansi/misc/getopt.c | 62 + lang/cem/libcc.ansi/misc/getpass.c | 44 + lang/cem/libcc.ansi/misc/getpw.c | 38 + lang/cem/libcc.ansi/misc/getw.c | 19 + lang/cem/libcc.ansi/misc/hypot.c | 38 + lang/cem/libcc.ansi/misc/isatty.c | 11 + lang/cem/libcc.ansi/misc/mktemp.c | 30 + lang/cem/libcc.ansi/misc/opendir.c | 67 + lang/cem/libcc.ansi/misc/popen.c | 110 + lang/cem/libcc.ansi/misc/putenv.c | 79 + lang/cem/libcc.ansi/misc/putw.c | 19 + lang/cem/libcc.ansi/misc/readdir.c | 47 + lang/cem/libcc.ansi/misc/rewinddir.c | 37 + lang/cem/libcc.ansi/misc/seekdir.c | 109 + lang/cem/libcc.ansi/misc/sleep.c | 47 + lang/cem/libcc.ansi/misc/telldir.c | 34 + lang/cem/libcc.ansi/misc/termcap.c | 427 + lang/cem/libcc.ansi/proto.make | 20 + lang/cem/libcc.ansi/setjmp/LIST | 2 + lang/cem/libcc.ansi/setjmp/Makefile | 2 + lang/cem/libcc.ansi/setjmp/setjmp.e | 112 + lang/cem/libcc.ansi/setjmp/sigmisc.c | 39 + lang/cem/libcc.ansi/signal/LIST | 1 + lang/cem/libcc.ansi/signal/Makefile | 2 + lang/cem/libcc.ansi/signal/raise.c | 21 + lang/cem/libcc.ansi/stdio/LIST | 52 + lang/cem/libcc.ansi/stdio/Makefile | 10 + lang/cem/libcc.ansi/stdio/clearerr.c | 12 + lang/cem/libcc.ansi/stdio/data.c | 28 + lang/cem/libcc.ansi/stdio/doprnt.c | 310 + lang/cem/libcc.ansi/stdio/doscan.c | 450 + lang/cem/libcc.ansi/stdio/ecvt.c | 31 + lang/cem/libcc.ansi/stdio/fclose.c | 31 + lang/cem/libcc.ansi/stdio/feof.c | 12 + lang/cem/libcc.ansi/stdio/ferror.c | 12 + lang/cem/libcc.ansi/stdio/fflush.c | 76 + lang/cem/libcc.ansi/stdio/fgetc.c | 12 + lang/cem/libcc.ansi/stdio/fgetpos.c | 14 + lang/cem/libcc.ansi/stdio/fgets.c | 27 + lang/cem/libcc.ansi/stdio/fileno.c | 12 + lang/cem/libcc.ansi/stdio/fillbuf.c | 70 + lang/cem/libcc.ansi/stdio/fltpr.c | 178 + lang/cem/libcc.ansi/stdio/flushbuf.c | 127 + lang/cem/libcc.ansi/stdio/fopen.c | 117 + lang/cem/libcc.ansi/stdio/fprintf.c | 23 + lang/cem/libcc.ansi/stdio/fputc.c | 12 + lang/cem/libcc.ansi/stdio/fputs.c | 18 + lang/cem/libcc.ansi/stdio/fread.c | 29 + lang/cem/libcc.ansi/stdio/freopen.c | 96 + lang/cem/libcc.ansi/stdio/fscanf.c | 23 + lang/cem/libcc.ansi/stdio/fseek.c | 44 + lang/cem/libcc.ansi/stdio/fsetpos.c | 12 + lang/cem/libcc.ansi/stdio/ftell.c | 38 + lang/cem/libcc.ansi/stdio/fwrite.c | 29 + lang/cem/libcc.ansi/stdio/getc.c | 12 + lang/cem/libcc.ansi/stdio/getchar.c | 12 + lang/cem/libcc.ansi/stdio/gets.c | 27 + lang/cem/libcc.ansi/stdio/icompute.c | 21 + lang/cem/libcc.ansi/stdio/isatty.c | 17 + lang/cem/libcc.ansi/stdio/loc_incl.h | 38 + lang/cem/libcc.ansi/stdio/perror.c | 19 + lang/cem/libcc.ansi/stdio/printf.c | 23 + lang/cem/libcc.ansi/stdio/putc.c | 12 + lang/cem/libcc.ansi/stdio/putchar.c | 12 + lang/cem/libcc.ansi/stdio/puts.c | 20 + lang/cem/libcc.ansi/stdio/remove.c | 13 + lang/cem/libcc.ansi/stdio/rename.c | 17 + lang/cem/libcc.ansi/stdio/rewind.c | 14 + lang/cem/libcc.ansi/stdio/scanf.c | 25 + lang/cem/libcc.ansi/stdio/setbuf.c | 13 + lang/cem/libcc.ansi/stdio/setvbuf.c | 48 + lang/cem/libcc.ansi/stdio/sprintf.c | 31 + lang/cem/libcc.ansi/stdio/sscanf.c | 30 + lang/cem/libcc.ansi/stdio/tmpfile.c | 28 + lang/cem/libcc.ansi/stdio/tmpnam.c | 28 + lang/cem/libcc.ansi/stdio/ungetc.c | 26 + lang/cem/libcc.ansi/stdio/vfprintf.c | 14 + lang/cem/libcc.ansi/stdio/vprintf.c | 14 + lang/cem/libcc.ansi/stdio/vsprintf.c | 26 + lang/cem/libcc.ansi/stdlib/LIST | 25 + lang/cem/libcc.ansi/stdlib/Makefile | 14 + lang/cem/libcc.ansi/stdlib/abort.c | 21 + lang/cem/libcc.ansi/stdlib/abs.c | 13 + lang/cem/libcc.ansi/stdlib/atexit.c | 17 + lang/cem/libcc.ansi/stdlib/atof.c | 19 + lang/cem/libcc.ansi/stdlib/atoi.c | 29 + lang/cem/libcc.ansi/stdlib/atol.c | 29 + lang/cem/libcc.ansi/stdlib/bsearch.c | 28 + lang/cem/libcc.ansi/stdlib/div.c | 23 + lang/cem/libcc.ansi/stdlib/exit.c | 36 + lang/cem/libcc.ansi/stdlib/ext_comp.c | 743 ++ lang/cem/libcc.ansi/stdlib/ext_fmt.h | 13 + lang/cem/libcc.ansi/stdlib/getenv.c | 28 + lang/cem/libcc.ansi/stdlib/labs.c | 13 + lang/cem/libcc.ansi/stdlib/ldiv.c | 23 + lang/cem/libcc.ansi/stdlib/malloc.c | 1284 +++ lang/cem/libcc.ansi/stdlib/malloc/Makefile | 20 + lang/cem/libcc.ansi/stdlib/malloc/READ_ME | 27 + lang/cem/libcc.ansi/stdlib/malloc/add_file | 14 + lang/cem/libcc.ansi/stdlib/malloc/check.c | 303 + lang/cem/libcc.ansi/stdlib/malloc/check.h | 21 + lang/cem/libcc.ansi/stdlib/malloc/global.c | 11 + lang/cem/libcc.ansi/stdlib/malloc/impl.h | 84 + lang/cem/libcc.ansi/stdlib/malloc/log.c | 129 + lang/cem/libcc.ansi/stdlib/malloc/log.h | 26 + lang/cem/libcc.ansi/stdlib/malloc/mal.c | 384 + lang/cem/libcc.ansi/stdlib/malloc/param.h | 56 + lang/cem/libcc.ansi/stdlib/malloc/phys.c | 96 + lang/cem/libcc.ansi/stdlib/malloc/phys.h | 76 + lang/cem/libcc.ansi/stdlib/malloc/size_type.h | 8 + lang/cem/libcc.ansi/stdlib/mblen.c | 18 + lang/cem/libcc.ansi/stdlib/mbstowcs.c | 20 + lang/cem/libcc.ansi/stdlib/mbtowc.c | 17 + lang/cem/libcc.ansi/stdlib/qsort.c | 139 + lang/cem/libcc.ansi/stdlib/rand.c | 20 + lang/cem/libcc.ansi/stdlib/strtod.c | 16 + lang/cem/libcc.ansi/stdlib/strtol.c | 96 + lang/cem/libcc.ansi/stdlib/system.c | 59 + lang/cem/libcc.ansi/stdlib/wcstombs.c | 21 + lang/cem/libcc.ansi/stdlib/wctomb.c | 17 + lang/cem/libcc.ansi/string/LIST | 22 + lang/cem/libcc.ansi/string/Makefile | 5 + lang/cem/libcc.ansi/string/memchr.c | 23 + lang/cem/libcc.ansi/string/memcmp.c | 22 + lang/cem/libcc.ansi/string/memcpy.c | 23 + lang/cem/libcc.ansi/string/memmove.c | 32 + lang/cem/libcc.ansi/string/memset.c | 21 + lang/cem/libcc.ansi/string/strcat.c | 20 + lang/cem/libcc.ansi/string/strchr.c | 18 + lang/cem/libcc.ansi/string/strcmp.c | 20 + lang/cem/libcc.ansi/string/strcoll.c | 19 + lang/cem/libcc.ansi/string/strcpy.c | 18 + lang/cem/libcc.ansi/string/strcspn.c | 21 + lang/cem/libcc.ansi/string/strerror.c | 22 + lang/cem/libcc.ansi/string/strlen.c | 18 + lang/cem/libcc.ansi/string/strncat.c | 25 + lang/cem/libcc.ansi/string/strncmp.c | 26 + lang/cem/libcc.ansi/string/strncpy.c | 24 + lang/cem/libcc.ansi/string/strpbrk.c | 22 + lang/cem/libcc.ansi/string/strrchr.c | 22 + lang/cem/libcc.ansi/string/strspn.c | 21 + lang/cem/libcc.ansi/string/strstr.c | 19 + lang/cem/libcc.ansi/string/strtok.c | 31 + lang/cem/libcc.ansi/string/strxfrm.c | 24 + lang/cem/libcc.ansi/time/LIST | 12 + lang/cem/libcc.ansi/time/Makefile | 3 + lang/cem/libcc.ansi/time/asctime.c | 60 + lang/cem/libcc.ansi/time/clock.c | 85 + lang/cem/libcc.ansi/time/ctime.c | 12 + lang/cem/libcc.ansi/time/difftime.c | 17 + lang/cem/libcc.ansi/time/gmtime.c | 41 + lang/cem/libcc.ansi/time/loc_time.h | 26 + lang/cem/libcc.ansi/time/localtime.c | 34 + lang/cem/libcc.ansi/time/misc.c | 506 ++ lang/cem/libcc.ansi/time/mktime.c | 130 + lang/cem/libcc.ansi/time/strftime.c | 172 + lang/cem/libcc.ansi/time/time.c | 36 + lang/cem/libcc.ansi/time/tzset.c | 15 + lang/cem/libcc/gen/LIST | 74 + lang/cem/libcc/gen/head_cc.e | 88 + lang/cem/libcc/gen/tail_cc.2g.a | Bin 0 -> 68930 bytes lang/cem/libcc/math/LIST | 21 + lang/cem/libcc/math/tail_m.a | Bin 0 -> 32522 bytes lang/cem/libcc/math/test.c | 205 + lang/cem/libcc/mon/LIST | 91 + lang/cem/libcc/mon/tail_mon.a | Bin 0 -> 21372 bytes lang/cem/libcc/mon/types | 65 + lang/cem/libcc/stdio/LIST | 48 + lang/cem/libcc/stdio/tail_cc.1s.a | Bin 0 -> 45456 bytes lang/cem/lint/README | 24 + lang/cem/lint/llib/ChangeLog | 11 + lang/cem/lint/llib/README | 22 + lang/cem/lint/llib/ansi_c.llb | 259 + lang/cem/lint/llib/c.llb | 258 + lang/cem/lint/llib/curses.llb | 0 lang/cem/lint/llib/m.llb | 0 lang/cem/lint/llib/proto.make | 72 + lang/cem/lint/llib/termlib.llb | 0 lang/cem/lint/llib/unix7.c | 138 + lang/cem/lint/lpass1.ansi/Parameters | 144 + lang/cem/lint/lpass1.ansi/proto.main | 59 + lang/cem/lint/lpass1.ansi/proto.make | 268 + lang/cem/lint/lpass1/Parameters | 147 + lang/cem/lint/lpass1/proto.main | 59 + lang/cem/lint/lpass1/proto.make | 262 + lang/cem/lint/lpass2/ChangeLog | 62 + lang/cem/lint/lpass2/checkargs.c | 283 + lang/cem/lint/lpass2/class.c | 29 + lang/cem/lint/lpass2/class.h | 30 + lang/cem/lint/lpass2/inpdef.str | 38 + lang/cem/lint/lpass2/l_print3ack.c | 30 + lang/cem/lint/lpass2/lint | 186 + lang/cem/lint/lpass2/lint.1 | 239 + lang/cem/lint/lpass2/lpass2.c | 514 ++ lang/cem/lint/lpass2/private.h | 7 + lang/cem/lint/lpass2/proto.make | 92 + lang/cem/lint/lpass2/read.c | 223 + lang/cem/lint/lpass2/report.c | 161 + lang/cem/lint/proto.make | 17 + lang/fortran/changes | 1184 +++ lang/fortran/comp/Notice | 23 + lang/fortran/comp/README | 73 + lang/fortran/comp/cds.c | 178 + lang/fortran/comp/data.c | 436 + lang/fortran/comp/defines.h | 289 + lang/fortran/comp/defs.h | 769 ++ lang/fortran/comp/equiv.c | 372 + lang/fortran/comp/error.c | 252 + lang/fortran/comp/exec.c | 831 ++ lang/fortran/comp/expr.c | 2882 ++++++ lang/fortran/comp/f2c.1 | 182 + lang/fortran/comp/f2c.1t | 326 + lang/fortran/comp/f2c.6 | 317 + lang/fortran/comp/f2c.h | 209 + lang/fortran/comp/format.c | 2108 +++++ lang/fortran/comp/format.h | 10 + lang/fortran/comp/formatdata.c | 1037 +++ lang/fortran/comp/ftypes.h | 39 + lang/fortran/comp/gram.dcl | 399 + lang/fortran/comp/gram.exec | 143 + lang/fortran/comp/gram.expr | 141 + lang/fortran/comp/gram.head | 299 + lang/fortran/comp/gram.io | 173 + lang/fortran/comp/init.c | 446 + lang/fortran/comp/intr.c | 846 ++ lang/fortran/comp/io.c | 1416 +++ lang/fortran/comp/iob.h | 24 + lang/fortran/comp/lex.c | 1453 +++ lang/fortran/comp/machdefs.h | 31 + lang/fortran/comp/main.c | 590 ++ lang/fortran/comp/makefile | 84 + lang/fortran/comp/malloc.c | 142 + lang/fortran/comp/mem.c | 230 + lang/fortran/comp/memset.c | 66 + lang/fortran/comp/misc.c | 1041 +++ lang/fortran/comp/mk_tokdefs | 1 + lang/fortran/comp/names.c | 711 ++ lang/fortran/comp/names.h | 22 + lang/fortran/comp/niceprintf.c | 367 + lang/fortran/comp/niceprintf.h | 16 + lang/fortran/comp/output.c | 1431 +++ lang/fortran/comp/output.h | 65 + lang/fortran/comp/p1defs.h | 160 + lang/fortran/comp/p1output.c | 568 ++ lang/fortran/comp/parse.h | 39 + lang/fortran/comp/parse_args.c | 499 ++ lang/fortran/comp/pccdefs.h | 64 + lang/fortran/comp/pread.c | 881 ++ lang/fortran/comp/proc.c | 1562 ++++ lang/fortran/comp/proto.make | 369 + lang/fortran/comp/put.c | 399 + lang/fortran/comp/putpcc.c | 1781 ++++ lang/fortran/comp/string.h | 16 + lang/fortran/comp/sysdep.c | 445 + lang/fortran/comp/sysdep.h | 83 + lang/fortran/comp/tokens | 99 + lang/fortran/comp/usignal.h | 7 + lang/fortran/comp/vax.c | 325 + lang/fortran/comp/version.c | 2 + lang/fortran/comp/xsum.c | 174 + lang/fortran/comp/xsum0.out | 56 + lang/fortran/disclaimer | 15 + lang/fortran/fc | 180 + lang/fortran/fixes | 1184 +++ lang/fortran/index | 392 + lang/fortran/lib/LIST | 2 + lang/fortran/lib/libF77/LIST | 113 + lang/fortran/lib/libF77/Notice | 23 + lang/fortran/lib/libF77/README | 20 + lang/fortran/lib/libF77/Version.c | 18 + lang/fortran/lib/libF77/abort_.c | 9 + lang/fortran/lib/libF77/c_abs.c | 9 + lang/fortran/lib/libF77/c_cos.c | 10 + lang/fortran/lib/libF77/c_div.c | 32 + lang/fortran/lib/libF77/c_exp.c | 12 + lang/fortran/lib/libF77/c_log.c | 10 + lang/fortran/lib/libF77/c_sin.c | 10 + lang/fortran/lib/libF77/c_sqrt.c | 25 + lang/fortran/lib/libF77/cabs.c | 21 + lang/fortran/lib/libF77/d_abs.c | 9 + lang/fortran/lib/libF77/d_acos.c | 8 + lang/fortran/lib/libF77/d_asin.c | 8 + lang/fortran/lib/libF77/d_atan.c | 8 + lang/fortran/lib/libF77/d_atn2.c | 8 + lang/fortran/lib/libF77/d_cnjg.c | 8 + lang/fortran/lib/libF77/d_cos.c | 8 + lang/fortran/lib/libF77/d_cosh.c | 8 + lang/fortran/lib/libF77/d_dim.c | 7 + lang/fortran/lib/libF77/d_exp.c | 8 + lang/fortran/lib/libF77/d_imag.c | 7 + lang/fortran/lib/libF77/d_int.c | 9 + lang/fortran/lib/libF77/d_lg10.c | 11 + lang/fortran/lib/libF77/d_log.c | 8 + lang/fortran/lib/libF77/d_mod.c | 26 + lang/fortran/lib/libF77/d_nint.c | 10 + lang/fortran/lib/libF77/d_prod.c | 7 + lang/fortran/lib/libF77/d_sign.c | 9 + lang/fortran/lib/libF77/d_sin.c | 8 + lang/fortran/lib/libF77/d_sinh.c | 8 + lang/fortran/lib/libF77/d_sqrt.c | 8 + lang/fortran/lib/libF77/d_tan.c | 8 + lang/fortran/lib/libF77/d_tanh.c | 8 + lang/fortran/lib/libF77/derf_.c | 9 + lang/fortran/lib/libF77/derfc_.c | 9 + lang/fortran/lib/libF77/ef1asc_.c | 15 + lang/fortran/lib/libF77/ef1cmc_.c | 12 + lang/fortran/lib/libF77/erf_.c | 9 + lang/fortran/lib/libF77/erfc_.c | 9 + lang/fortran/lib/libF77/getarg_.c | 27 + lang/fortran/lib/libF77/getenv_.c | 49 + lang/fortran/lib/libF77/h_abs.c | 11 + lang/fortran/lib/libF77/h_dim.c | 9 + lang/fortran/lib/libF77/h_dnnt.c | 12 + lang/fortran/lib/libF77/h_indx.c | 26 + lang/fortran/lib/libF77/h_len.c | 10 + lang/fortran/lib/libF77/h_mod.c | 9 + lang/fortran/lib/libF77/h_nint.c | 12 + lang/fortran/lib/libF77/h_sign.c | 11 + lang/fortran/lib/libF77/hl_ge.c | 10 + lang/fortran/lib/libF77/hl_gt.c | 10 + lang/fortran/lib/libF77/hl_le.c | 10 + lang/fortran/lib/libF77/hl_lt.c | 10 + lang/fortran/lib/libF77/i_abs.c | 9 + lang/fortran/lib/libF77/i_dim.c | 7 + lang/fortran/lib/libF77/i_dnnt.c | 10 + lang/fortran/lib/libF77/i_indx.c | 24 + lang/fortran/lib/libF77/i_len.c | 8 + lang/fortran/lib/libF77/i_mod.c | 7 + lang/fortran/lib/libF77/i_nint.c | 10 + lang/fortran/lib/libF77/i_sign.c | 9 + lang/fortran/lib/libF77/iargc_.c | 7 + lang/fortran/lib/libF77/l_ge.c | 10 + lang/fortran/lib/libF77/l_gt.c | 10 + lang/fortran/lib/libF77/l_le.c | 10 + lang/fortran/lib/libF77/l_lt.c | 8 + lang/fortran/lib/libF77/libF77.xsum | 116 + lang/fortran/lib/libF77/main.c | 95 + lang/fortran/lib/libF77/makefile | 74 + lang/fortran/lib/libF77/pow_ci.c | 16 + lang/fortran/lib/libF77/pow_dd.c | 9 + lang/fortran/lib/libF77/pow_di.c | 36 + lang/fortran/lib/libF77/pow_hh.c | 25 + lang/fortran/lib/libF77/pow_ii.c | 25 + lang/fortran/lib/libF77/pow_ri.c | 36 + lang/fortran/lib/libF77/pow_zi.c | 46 + lang/fortran/lib/libF77/pow_zz.c | 17 + lang/fortran/lib/libF77/r_abs.c | 9 + lang/fortran/lib/libF77/r_acos.c | 8 + lang/fortran/lib/libF77/r_asin.c | 8 + lang/fortran/lib/libF77/r_atan.c | 8 + lang/fortran/lib/libF77/r_atn2.c | 8 + lang/fortran/lib/libF77/r_cnjg.c | 8 + lang/fortran/lib/libF77/r_cos.c | 8 + lang/fortran/lib/libF77/r_cosh.c | 8 + lang/fortran/lib/libF77/r_dim.c | 7 + lang/fortran/lib/libF77/r_exp.c | 8 + lang/fortran/lib/libF77/r_imag.c | 7 + lang/fortran/lib/libF77/r_int.c | 9 + lang/fortran/lib/libF77/r_lg10.c | 11 + lang/fortran/lib/libF77/r_log.c | 8 + lang/fortran/lib/libF77/r_mod.c | 26 + lang/fortran/lib/libF77/r_nint.c | 10 + lang/fortran/lib/libF77/r_sign.c | 9 + lang/fortran/lib/libF77/r_sin.c | 8 + lang/fortran/lib/libF77/r_sinh.c | 8 + lang/fortran/lib/libF77/r_sqrt.c | 8 + lang/fortran/lib/libF77/r_tan.c | 8 + lang/fortran/lib/libF77/r_tanh.c | 8 + lang/fortran/lib/libF77/s_cat.c | 23 + lang/fortran/lib/libF77/s_cmp.c | 38 + lang/fortran/lib/libF77/s_copy.c | 23 + lang/fortran/lib/libF77/s_paus.c | 41 + lang/fortran/lib/libF77/s_rnge.c | 21 + lang/fortran/lib/libF77/s_stop.c | 19 + lang/fortran/lib/libF77/sig_die.c | 28 + lang/fortran/lib/libF77/signal_.c | 15 + lang/fortran/lib/libF77/system_.c | 19 + lang/fortran/lib/libF77/z_abs.c | 9 + lang/fortran/lib/libF77/z_cos.c | 10 + lang/fortran/lib/libF77/z_div.c | 33 + lang/fortran/lib/libF77/z_exp.c | 12 + lang/fortran/lib/libF77/z_log.c | 10 + lang/fortran/lib/libF77/z_sin.c | 10 + lang/fortran/lib/libF77/z_sqrt.c | 22 + lang/fortran/lib/libI77/LIST | 37 + lang/fortran/lib/libI77/Notice | 23 + lang/fortran/lib/libI77/README | 92 + lang/fortran/lib/libI77/Version.c | 94 + lang/fortran/lib/libI77/backspace.c | 63 + lang/fortran/lib/libI77/close.c | 59 + lang/fortran/lib/libI77/dfe.c | 136 + lang/fortran/lib/libI77/dolio.c | 7 + lang/fortran/lib/libI77/due.c | 51 + lang/fortran/lib/libI77/endfile.c | 83 + lang/fortran/lib/libI77/err.c | 223 + lang/fortran/lib/libI77/fio.h | 65 + lang/fortran/lib/libI77/fmt.c | 434 + lang/fortran/lib/libI77/fmt.h | 57 + lang/fortran/lib/libI77/fmtlib.c | 24 + lang/fortran/lib/libI77/fp.h | 26 + lang/fortran/lib/libI77/iio.c | 116 + lang/fortran/lib/libI77/ilnw.c | 62 + lang/fortran/lib/libI77/inquire.c | 93 + lang/fortran/lib/libI77/libI77.xsum | 39 + lang/fortran/lib/libI77/lio.h | 41 + lang/fortran/lib/libI77/local.h | 0 lang/fortran/lib/libI77/lread.c | 526 ++ lang/fortran/lib/libI77/lwrite.c | 148 + lang/fortran/lib/libI77/makefile | 92 + lang/fortran/lib/libI77/open.c | 190 + lang/fortran/lib/libI77/rdfmt.c | 324 + lang/fortran/lib/libI77/rewind.c | 20 + lang/fortran/lib/libI77/rsfe.c | 70 + lang/fortran/lib/libI77/rsli.c | 80 + lang/fortran/lib/libI77/rsne.c | 444 + lang/fortran/lib/libI77/sfe.c | 28 + lang/fortran/lib/libI77/sue.c | 67 + lang/fortran/lib/libI77/typesize.c | 6 + lang/fortran/lib/libI77/uio.c | 43 + lang/fortran/lib/libI77/util.c | 53 + lang/fortran/lib/libI77/wref.c | 224 + lang/fortran/lib/libI77/wrtfmt.c | 250 + lang/fortran/lib/libI77/wsfe.c | 85 + lang/fortran/lib/libI77/wsle.c | 33 + lang/fortran/lib/libI77/wsne.c | 27 + lang/fortran/lib/libI77/xwsne.c | 53 + lang/m2/comp/BigPars | 100 + lang/m2/comp/LLlex.c | 671 ++ lang/m2/comp/LLlex.h | 70 + lang/m2/comp/LLmessage.c | 78 + lang/m2/comp/SYSTEM.h | 25 + lang/m2/comp/SmallPars | 100 + lang/m2/comp/casestat.C | 385 + lang/m2/comp/char.tab | 53 + lang/m2/comp/chk_expr.c | 1576 ++++ lang/m2/comp/chk_expr.h | 25 + lang/m2/comp/class.h | 45 + lang/m2/comp/code.c | 1254 +++ lang/m2/comp/cstoper.c | 697 ++ lang/m2/comp/debug.h | 20 + lang/m2/comp/declar.g | 567 ++ lang/m2/comp/def.H | 143 + lang/m2/comp/def.c | 393 + lang/m2/comp/defmodule.c | 166 + lang/m2/comp/desig.c | 705 ++ lang/m2/comp/desig.h | 67 + lang/m2/comp/em_m2.6 | 93 + lang/m2/comp/enter.c | 561 ++ lang/m2/comp/error.c | 414 + lang/m2/comp/expression.g | 293 + lang/m2/comp/f_info.h | 21 + lang/m2/comp/idf.c | 13 + lang/m2/comp/idf.h | 23 + lang/m2/comp/input.c | 32 + lang/m2/comp/input.h | 18 + lang/m2/comp/lookup.c | 120 + lang/m2/comp/main.c | 288 + lang/m2/comp/main.h | 25 + lang/m2/comp/make.allocd | 26 + lang/m2/comp/make.hfiles | 35 + lang/m2/comp/make.next | 7 + lang/m2/comp/make.tokcase | 35 + lang/m2/comp/make.tokfile | 6 + lang/m2/comp/misc.c | 67 + lang/m2/comp/misc.h | 16 + lang/m2/comp/modula-2.1 | 99 + lang/m2/comp/node.H | 63 + lang/m2/comp/node.c | 158 + lang/m2/comp/options | 28 + lang/m2/comp/options.c | 260 + lang/m2/comp/program.g | 279 + lang/m2/comp/proto.main | 65 + lang/m2/comp/proto.make | 158 + lang/m2/comp/real.H | 19 + lang/m2/comp/scope.C | 218 + lang/m2/comp/scope.h | 61 + lang/m2/comp/stab.c | 447 + lang/m2/comp/standards.h | 41 + lang/m2/comp/statement.g | 291 + lang/m2/comp/tmpvar.C | 138 + lang/m2/comp/tokenname.c | 114 + lang/m2/comp/tokenname.h | 17 + lang/m2/comp/type.H | 246 + lang/m2/comp/type.c | 937 ++ lang/m2/comp/typequiv.c | 314 + lang/m2/comp/walk.c | 1152 +++ lang/m2/comp/walk.h | 29 + lang/m2/comp/warning.h | 30 + lang/m2/libm2/ASCII.def | 20 + lang/m2/libm2/Arguments.def | 34 + lang/m2/libm2/ArraySort.def | 26 + lang/m2/libm2/CSP.def | 53 + lang/m2/libm2/Conversion.def | 25 + lang/m2/libm2/EM.def | 38 + lang/m2/libm2/Epilogue.def | 21 + lang/m2/libm2/InOut.def | 116 + lang/m2/libm2/LIST | 47 + lang/m2/libm2/MathLib0.def | 27 + lang/m2/libm2/Mathlib.def | 117 + lang/m2/libm2/PascalIO.def | 158 + lang/m2/libm2/Processes.def | 36 + lang/m2/libm2/RealConver.def | 41 + lang/m2/libm2/RealInOut.def | 48 + lang/m2/libm2/Semaphores.def | 33 + lang/m2/libm2/Storage.def | 41 + lang/m2/libm2/Streams.def | 153 + lang/m2/libm2/Strings.def | 56 + lang/m2/libm2/StripUnix.def | 33 + lang/m2/libm2/Termcap.def | 45 + lang/m2/libm2/Terminal.def | 35 + lang/m2/libm2/Traps.def | 39 + lang/m2/libm2/Unix.def | 119 + lang/m2/libm2/XXTermcap.def | 47 + lang/m2/libm2/head_m2.e | 70 + lang/m2/libm2/proto.make | 31 + lang/m2/libm2/random.def | 22 + lang/m2/libm2/tail_m2.a | Bin 0 -> 126990 bytes lang/m2/m2mm/LLlex.c | 425 + lang/m2/m2mm/LLlex.h | 28 + lang/m2/m2mm/LLmessage.c | 50 + lang/m2/m2mm/char.tab | 53 + lang/m2/m2mm/class.h | 45 + lang/m2/m2mm/declar.g | 278 + lang/m2/m2mm/error.c | 237 + lang/m2/m2mm/expression.g | 150 + lang/m2/m2mm/f_info.h | 21 + lang/m2/m2mm/file_list.h | 24 + lang/m2/m2mm/idf.c | 13 + lang/m2/m2mm/idf.h | 40 + lang/m2/m2mm/idfsize.h | 1 + lang/m2/m2mm/input.c | 31 + lang/m2/m2mm/input.h | 18 + lang/m2/m2mm/inputtype.h | 1 + lang/m2/m2mm/lib.c | 65 + lang/m2/m2mm/m2mm.1 | 54 + lang/m2/m2mm/main.c | 475 + lang/m2/m2mm/main.h | 15 + lang/m2/m2mm/make.tokcase | 41 + lang/m2/m2mm/make.tokfile | 6 + lang/m2/m2mm/misc.c | 31 + lang/m2/m2mm/options.c | 103 + lang/m2/m2mm/program.g | 237 + lang/m2/m2mm/proto.main | 52 + lang/m2/m2mm/proto.make | 116 + lang/m2/m2mm/statement.g | 137 + lang/m2/m2mm/tokenname.c | 113 + lang/m2/m2mm/tokenname.h | 17 + lang/m2/test/Thalmann/LifeGame.mod | 151 + lang/m2/test/Thalmann/Shoes.mod | 54 + lang/m2/test/Thalmann/StoreFetch.mod | 91 + lang/m2/test/Thalmann/bold.mod | 133 + lang/m2/test/Thalmann/characters.mod | 29 + lang/m2/test/Wirth/PowersOf2.mod | 57 + lang/m2/test/Wirth/TableHandl.def | 17 + lang/m2/test/Wirth/TableHandl.mod | 183 + lang/m2/test/Wirth/XREF.mod | 153 + lang/m2/test/getenv.mod | 29 + lang/m2/test/m2p.mod | 1306 +++ lang/m2/test/queens.mod | 55 + lang/occam/comp/builtin.c | 78 + lang/occam/comp/code.c | 612 ++ lang/occam/comp/code.h | 24 + lang/occam/comp/em.c | 465 + lang/occam/comp/em.h | 27 + lang/occam/comp/expr.c | 477 + lang/occam/comp/expr.h | 66 + lang/occam/comp/keytab.c | 88 + lang/occam/comp/lex.l | 352 + lang/occam/comp/occam.g | 757 ++ lang/occam/comp/proto.main | 59 + lang/occam/comp/proto.make | 115 + lang/occam/comp/report.c | 35 + lang/occam/comp/sizes.h | 9 + lang/occam/comp/symtab.c | 208 + lang/occam/comp/symtab.h | 96 + lang/occam/comp/token.h | 16 + lang/occam/lib/LIST | 11 + lang/occam/lib/tail_ocm.a | Bin 0 -> 17588 bytes lang/occam/test/Huffman.ocm | 193 + lang/occam/test/Makefile | 40 + lang/occam/test/READ_ME | 1 + lang/occam/test/aatob.ocm | 25 + lang/occam/test/copy.ocm | 26 + lang/occam/test/key.ocm | 14 + lang/occam/test/lifegame.ocm | 248 + lang/occam/test/matmul.ocm | 98 + lang/occam/test/sort.ocm | 49 + lang/occam/test/use_prnt.ocm | 17 + lang/occam/test/xxtoy.ocm | 32 + lang/pc/comp/LLlex.c | 577 ++ lang/pc/comp/LLlex.h | 48 + lang/pc/comp/LLmessage.c | 73 + lang/pc/comp/Parameters | 62 + lang/pc/comp/body.c | 367 + lang/pc/comp/casestat.C | 256 + lang/pc/comp/char.tab | 36 + lang/pc/comp/chk_expr.c | 1350 +++ lang/pc/comp/chk_expr.h | 12 + lang/pc/comp/class.h | 34 + lang/pc/comp/code.c | 1261 +++ lang/pc/comp/const.h | 10 + lang/pc/comp/cstoper.c | 495 ++ lang/pc/comp/debug.h | 10 + lang/pc/comp/declar.g | 1011 +++ lang/pc/comp/def.H | 155 + lang/pc/comp/def.c | 303 + lang/pc/comp/desig.H | 59 + lang/pc/comp/desig.c | 611 ++ lang/pc/comp/em_pc.6 | 82 + lang/pc/comp/enter.c | 264 + lang/pc/comp/error.c | 409 + lang/pc/comp/expression.g | 297 + lang/pc/comp/f_info.h | 11 + lang/pc/comp/idf.c | 4 + lang/pc/comp/idf.h | 12 + lang/pc/comp/input.c | 17 + lang/pc/comp/input.h | 9 + lang/pc/comp/label.c | 168 + lang/pc/comp/lookup.c | 112 + lang/pc/comp/main.c | 276 + lang/pc/comp/main.h | 13 + lang/pc/comp/make.allocd | 26 + lang/pc/comp/make.hfiles | 35 + lang/pc/comp/make.next | 7 + lang/pc/comp/make.tokcase | 41 + lang/pc/comp/make.tokfile | 6 + lang/pc/comp/misc.c | 61 + lang/pc/comp/misc.h | 19 + lang/pc/comp/node.H | 48 + lang/pc/comp/node.c | 95 + lang/pc/comp/options | 22 + lang/pc/comp/options.c | 170 + lang/pc/comp/program.g | 77 + lang/pc/comp/progs.c | 80 + lang/pc/comp/proto.main | 65 + lang/pc/comp/proto.make | 154 + lang/pc/comp/readwrite.c | 482 + lang/pc/comp/required.h | 48 + lang/pc/comp/scope.H | 32 + lang/pc/comp/scope.c | 117 + lang/pc/comp/stab.c | 396 + lang/pc/comp/statement.g | 475 + lang/pc/comp/tmpvar.C | 127 + lang/pc/comp/tokenname.c | 100 + lang/pc/comp/tokenname.h | 8 + lang/pc/comp/type.H | 197 + lang/pc/comp/type.c | 707 ++ lang/pc/comp/typequiv.c | 300 + lang/pc/libpc/LIST | 73 + lang/pc/libpc/Makefile | 13 + lang/pc/libpc/READ_ME | 11 + lang/pc/libpc/head_pc.e | 3 + lang/pc/libpc/tail_pc.a | Bin 0 -> 79276 bytes lang/pc/test/Makefile | 45 + lang/pc/test/b.p | 5 + lang/pc/test/callc.p | 61 + lang/pc/test/cmod.c | 66 + lang/pc/test/machar.p | 226 + lang/pc/test/t1.p | 730 ++ lang/pc/test/t2.p | 739 ++ lang/pc/test/t3.p | 442 + lang/pc/test/t4.p | 411 + lang/pc/test/t5.p | 13 + lang/pc/test/tstenc.p | 66 + lang/pc/test/tstgto.p | 75 + lib/6500/descr | 47 + lib/6800/descr | 43 + lib/6805/descr | 43 + lib/6809/descr | 44 + lib/arm/descr | 58 + lib/descr/cpm | 29 + lib/descr/fe | 254 + lib/em22/descr | 43 + lib/em24/descr | 43 + lib/em44/descr | 45 + lib/i386/descr | 69 + lib/i80/descr | 54 + lib/i86/descr | 63 + lib/m68020/descr | 77 + lib/m68k2/descr | 76 + lib/m68k4/descr | 78 + lib/mantra/descr | 79 + lib/minix/descr | 64 + lib/minix/include/ansi.h | 54 + lib/minix/include/errno.h | 96 + lib/minix/include/fcntl.h | 68 + lib/minix/include/lib.h | 41 + lib/minix/include/limits.h | 69 + lib/minix/include/minix/callnr.h | 58 + lib/minix/include/minix/com.h | 166 + lib/minix/include/minix/config.h | 121 + lib/minix/include/minix/const.h | 88 + lib/minix/include/minix/type.h | 134 + lib/minix/include/sgtty.h | 96 + lib/minix/include/signal.h | 115 + lib/minix/include/string.h | 57 + lib/minix/include/sys/errno.h | 96 + lib/minix/include/sys/stat.h | 74 + lib/minix/include/sys/times.h | 26 + lib/minix/include/sys/types.h | 63 + lib/minix/include/sys/wait.h | 40 + lib/minix/include/time.h | 66 + lib/minix/include/unistd.h | 130 + lib/minix/include/utime.h | 19 + lib/minixST/descr | 66 + lib/minixST/include/a.out.h | 111 + lib/minixST/include/minix/config.h | 121 + lib/ns/descr | 55 + lib/pdp/descr | 71 + lib/pmds/descr | 78 + lib/pmds4/descr | 80 + lib/s2650/descr | 43 + lib/sparc/descr | 72 + lib/sparc_solaris/descr | 72 + lib/sun2/descr | 82 + lib/sun3/descr | 87 + lib/vax4/descr | 80 + lib/xenix3/descr | 66 + lib/z80/descr | 53 + lib/z8000/descr | 47 + mach/6500/Action | 21 + mach/6500/as/READ_ME | 15 + mach/6500/as/mach0.c | 12 + mach/6500/as/mach1.c | 11 + mach/6500/as/mach2.c | 32 + mach/6500/as/mach3.c | 73 + mach/6500/as/mach4.c | 180 + mach/6500/as/mach5.c | 45 + mach/6500/cg/mach.c | 86 + mach/6500/cg/mach.h | 31 + mach/6500/cg/table | 2299 +++++ mach/6500/dl/dl.c | 176 + mach/6500/dl/proto.make | 34 + mach/6500/libem/LIST | 88 + mach/6500/libem/libem_s.a | Bin 0 -> 56008 bytes mach/6500/libend/LIST | 5 + mach/6500/libend/end_s.a | Bin 0 -> 612 bytes mach/6500/libmon/LIST | 7 + mach/6500/libmon/head_em.s | 240 + mach/6500/libmon/libmon_s.a | Bin 0 -> 4312 bytes mach/6500/mach_params | 5 + mach/6800/Action | 3 + mach/6800/as/mach0.c | 15 + mach/6800/as/mach1.c | 9 + mach/6800/as/mach2.c | 17 + mach/6800/as/mach3.c | 118 + mach/6800/as/mach4.c | 68 + mach/6800/as/mach5.c | 46 + mach/6805/Action | 3 + mach/6805/as/mach0.c | 15 + mach/6805/as/mach1.c | 13 + mach/6805/as/mach2.c | 21 + mach/6805/as/mach3.c | 127 + mach/6805/as/mach4.c | 194 + mach/6805/as/mach5.c | 55 + mach/6809/Action | 3 + mach/6809/as/mach0.c | 14 + mach/6809/as/mach1.c | 44 + mach/6809/as/mach2.c | 23 + mach/6809/as/mach3.c | 170 + mach/6809/as/mach4.c | 202 + mach/6809/as/mach5.c | 106 + mach/arm/Action | 27 + mach/arm/READ_ME | 57 + mach/arm/as/mach0.c | 50 + mach/arm/as/mach1.c | 3 + mach/arm/as/mach2.c | 24 + mach/arm/as/mach3.c | 121 + mach/arm/as/mach4.c | 129 + mach/arm/as/mach5.c | 510 ++ mach/arm/cv/arm.h | 34 + mach/arm/cv/cv.c | 245 + mach/arm/cv/proto.make | 34 + mach/arm/libem/LIST | 9 + mach/arm/libem/libem_s.a | Bin 0 -> 3198 bytes mach/arm/libend/LIST | 5 + mach/arm/libend/end_s.a | Bin 0 -> 612 bytes mach/arm/libfp/byte_order.h | 6 + mach/arm/libmon/LIST | 3 + mach/arm/libmon/head_em.s | 178 + mach/arm/libmon/libmon_s.a | Bin 0 -> 12726 bytes mach/arm/mach_params | 5 + mach/arm/ncg/mach.c | 148 + mach/arm/ncg/mach.h | 24 + mach/arm/ncg/table | 2359 +++++ mach/arm/top/table | 925 ++ mach/con_float | 279 + mach/em22/Action | 6 + mach/em22/libend/LIST | 5 + mach/em22/libend/end_e.a | Bin 0 -> 260 bytes mach/em22/mach_params | 5 + mach/em24/Action | 6 + mach/em24/libend/LIST | 5 + mach/em24/libend/end_e.a | Bin 0 -> 260 bytes mach/em24/mach_params | 5 + mach/em44/Action | 6 + mach/em44/libend/LIST | 5 + mach/em44/libend/end_e.a | Bin 0 -> 260 bytes mach/em44/mach_params | 5 + mach/i386/Action | 28 + mach/i386/as/mach0.c | 22 + mach/i386/as/mach1.c | 102 + mach/i386/as/mach2.c | 66 + mach/i386/as/mach3.c | 420 + mach/i386/as/mach4.c | 295 + mach/i386/as/mach5.c | 599 ++ mach/i386/ce/EM_table | 1110 +++ mach/i386/ce/as.c | 362 + mach/i386/ce/as.h | 37 + mach/i386/ce/as_table | 224 + mach/i386/ce/mach.c | 39 + mach/i386/ce/mach.h | 50 + mach/i386/ce/proto.make | 32 + mach/i386/cv/cv.c | 460 + mach/i386/cv/proto.make | 34 + mach/i386/libdb/machdep.s | 72 + mach/i386/libem/LIST | 45 + mach/i386/libem/libem_s.a | Bin 0 -> 21016 bytes mach/i386/libend/LIST | 5 + mach/i386/libend/end_s.a | Bin 0 -> 742 bytes mach/i386/libfp/byte_order.h | 6 + mach/i386/libsys/LIST | 94 + mach/i386/libsys/head_em.s | 67 + mach/i386/libsys/libmon_s.a | Bin 0 -> 13562 bytes mach/i386/mach_params | 5 + mach/i386/ncg/mach.c | 263 + mach/i386/ncg/mach.h | 34 + mach/i386/ncg/table | 2963 +++++++ mach/i80/Action | 21 + mach/i80/as/mach0.c | 11 + mach/i80/as/mach1.c | 23 + mach/i80/as/mach2.c | 23 + mach/i80/as/mach3.c | 99 + mach/i80/as/mach4.c | 86 + mach/i80/as/mach5.c | 5 + mach/i80/dl/README | 5 + mach/i80/dl/mccpm.c | 147 + mach/i80/dl/nascom.c | 174 + mach/i80/dl/proto.make | 40 + mach/i80/libem/LIST | 38 + mach/i80/libem/libem_s.a | Bin 0 -> 30810 bytes mach/i80/libend/LIST | 5 + mach/i80/libend/end_s.a | Bin 0 -> 612 bytes mach/i80/libmon/LIST | 7 + mach/i80/libmon/README | 6 + mach/i80/libmon/char.nas.s | 55 + mach/i80/libmon/head_em.s | 59 + mach/i80/libmon/libmon_s.a | Bin 0 -> 6972 bytes mach/i80/mach_params | 5 + mach/i80/ncg/mach.c | 97 + mach/i80/ncg/mach.h | 30 + mach/i80/ncg/table | 2055 +++++ mach/i86/Action | 21 + mach/i86/as/mach0.c | 21 + mach/i86/as/mach1.c | 44 + mach/i86/as/mach2.c | 49 + mach/i86/as/mach3.c | 337 + mach/i86/as/mach4.c | 165 + mach/i86/as/mach5.c | 358 + mach/i86/ce/EM_table | 1266 +++ mach/i86/ce/as.c | 312 + mach/i86/ce/as.h | 40 + mach/i86/ce/as_table | 232 + mach/i86/ce/mach.c | 27 + mach/i86/ce/mach.h | 30 + mach/i86/ce/proto.make | 32 + mach/i86/cv/cv.c | 322 + mach/i86/cv/proto.make | 34 + mach/i86/libem/LIST | 60 + mach/i86/libem/libem_s.a | Bin 0 -> 32108 bytes mach/i86/libend/LIST | 5 + mach/i86/libend/end_s.a | Bin 0 -> 612 bytes mach/i86/libfp/byte_order.h | 6 + mach/i86/libsys/LIST | 76 + mach/i86/libsys/head_em.s | 50 + mach/i86/libsys/libmon_s.a | Bin 0 -> 15340 bytes mach/i86/mach_params | 5 + mach/i86/ncg/mach.c | 187 + mach/i86/ncg/mach.h | 33 + mach/i86/ncg/table | 3378 +++++++ mach/m68020/Action | 31 + mach/m68020/as/Expect | 1 + mach/m68020/as/mach0.c | 33 + mach/m68020/as/mach1.c | 121 + mach/m68020/as/mach2.c | 88 + mach/m68020/as/mach3.c | 466 + mach/m68020/as/mach4.c | 726 ++ mach/m68020/as/mach5.c | 697 ++ mach/m68020/ce/EM_table | 885 ++ mach/m68020/ce/as.c | 396 + mach/m68020/ce/as.h | 34 + mach/m68020/ce/as_table | 178 + mach/m68020/ce/mach.c | 74 + mach/m68020/ce/mach.h | 48 + mach/m68020/ce/proto.make | 32 + mach/m68020/cv/cv.c | 332 + mach/m68020/cv/proto.make | 34 + mach/m68020/libdb/machdep.s | 63 + mach/m68020/libem/LIST | 25 + mach/m68020/libem/libem_s.a | Bin 0 -> 18190 bytes mach/m68020/libend/LIST | 5 + mach/m68020/libend/end_s.a | Bin 0 -> 742 bytes mach/m68020/libfp/byte_order.h | 6 + mach/m68020/libsys/LIST | 86 + mach/m68020/libsys/README | 66 + mach/m68020/libsys/head_em.s | 76 + mach/m68020/libsys/libmon_s.a | Bin 0 -> 16288 bytes mach/m68020/mach_params | 5 + mach/m68020/ncg/README | 11 + mach/m68020/ncg/instrmacs.h | 144 + mach/m68020/ncg/mach.c | 288 + mach/m68020/ncg/mach.h | 49 + mach/m68020/ncg/table | 6942 +++++++++++++++ mach/m68020/ncg/whichone.h | 9 + mach/m68020/top/table | 361 + mach/m68k2/Action | 24 + mach/m68k2/README | 59 + mach/m68k2/Unisoft_bug | 27 + mach/m68k2/as/mach0.c | 31 + mach/m68k2/as/mach1.c | 78 + mach/m68k2/as/mach2.c | 65 + mach/m68k2/as/mach3.c | 389 + mach/m68k2/as/mach4.c | 425 + mach/m68k2/as/mach5.c | 541 ++ mach/m68k2/cv/cv.c | 391 + mach/m68k2/cv/proto.make | 34 + mach/m68k2/dl/dl.c | 126 + mach/m68k2/dl/proto.make | 33 + mach/m68k2/libem/LIST | 35 + mach/m68k2/libem/READ_ME | 4 + mach/m68k2/libem/libem_s.a | Bin 0 -> 30014 bytes mach/m68k2/libend/LIST | 5 + mach/m68k2/libend/end_s.a | Bin 0 -> 742 bytes mach/m68k2/libfp/byte_order.h | 6 + mach/m68k2/libsys/LIST | 80 + mach/m68k2/libsys/head_em.s | 84 + mach/m68k2/libsys/libmon_s.a | Bin 0 -> 16818 bytes mach/m68k2/mach_params | 5 + mach/m68k2/ncg/table_dir | 1 + mach/m68k2/ncg/whichone.h | 9 + mach/m68k2/top/table | 227 + mach/m68k4/Action | 15 + mach/m68k4/libem/LIST | 32 + mach/m68k4/libem/libem_s.a | Bin 0 -> 24432 bytes mach/m68k4/libend/LIST | 5 + mach/m68k4/libend/end_s.a | Bin 0 -> 742 bytes mach/m68k4/libfp/byte_order.h | 6 + mach/m68k4/libsys/LIST | 85 + mach/m68k4/libsys/head_em.s | 81 + mach/m68k4/libsys/libmon_s.a | Bin 0 -> 17990 bytes mach/m68k4/mach_params | 5 + mach/m68k4/ncg/table_dir | 1 + mach/m68k4/ncg/whichone.h | 8 + mach/mantra/Action | 6 + mach/mantra/cv/Xcv.c | 344 + mach/mantra/cv/cv.c | 323 + mach/mantra/cv/proto.make | 43 + mach/mantra/int/con_float.c | 13 + mach/mantra/int/copyright | 57 + mach/mantra/int/deffile | 90 + mach/mantra/int/em.1 | 91 + mach/mantra/int/em.c | 114 + mach/mantra/int/mloop0 | 562 ++ mach/mantra/int/mloop1 | 497 ++ mach/mantra/int/mloop2 | 494 ++ mach/mantra/int/mloop3 | 300 + mach/mantra/int/mloop4 | 268 + mach/mantra/int/mloop5 | 343 + mach/mantra/int/mloop6 | 369 + mach/mantra/int/mloop7 | 251 + mach/mantra/int/mloop8 | 256 + mach/mantra/int/mloop9 | 332 + mach/mantra/int/mloopa | 483 + mach/mantra/int/mloopb | 145 + mach/mantra/int/mloopc | 529 ++ mach/mantra/int/proto.make | 86 + mach/mantra/libsys/LIST | 92 + mach/mantra/libsys/head_em.s | 81 + mach/mantra/libsys/libmon_s.a | Bin 0 -> 22650 bytes mach/mantra/mach_params | 5 + mach/minix/Action | 3 + mach/minix/libsys/LIST | 127 + mach/minix/libsys/head_em.s | 53 + mach/minix/libsys/libmon_s.a | Bin 0 -> 47480 bytes mach/minix/mach_params | 5 + mach/minixST/Action | 6 + mach/minixST/cv/cv.c | 331 + mach/minixST/cv/proto.make | 34 + mach/minixST/libsys/LIST | 127 + mach/minixST/libsys/head_em.s | 69 + mach/minixST/libsys/libmon_s.a | Bin 0 -> 47578 bytes mach/minixST/mach_params | 5 + mach/ns/Action | 18 + mach/ns/as/mach0.c | 25 + mach/ns/as/mach1.c | 117 + mach/ns/as/mach2.c | 63 + mach/ns/as/mach3.c | 457 + mach/ns/as/mach4.c | 405 + mach/ns/as/mach5.c | 315 + mach/ns/libem/LIST | 26 + mach/ns/libem/libem_s.a | Bin 0 -> 8044 bytes mach/ns/libend/LIST | 5 + mach/ns/libend/end_s.a | Bin 0 -> 742 bytes mach/ns/libmon/LIST | 2 + mach/ns/libmon/head_em.s | 62 + mach/ns/libmon/libmon_s.a | Bin 0 -> 724 bytes mach/ns/mach_params | 5 + mach/ns/ncg/mach.c | 161 + mach/ns/ncg/mach.h | 32 + mach/ns/ncg/table | 2270 +++++ mach/pdp/Action | 25 + mach/pdp/as/mach0.c | 23 + mach/pdp/as/mach1.c | 39 + mach/pdp/as/mach2.c | 40 + mach/pdp/as/mach3.c | 155 + mach/pdp/as/mach4.c | 154 + mach/pdp/as/mach5.c | 124 + mach/pdp/cg/mach.c | 210 + mach/pdp/cg/mach.h | 56 + mach/pdp/cg/table | 2863 ++++++ mach/pdp/cv/cv.c | 311 + mach/pdp/cv/proto.make | 34 + mach/pdp/int/README | 18 + mach/pdp/int/c+ | 1 + mach/pdp/int/c- | 1 + mach/pdp/int/em.1 | 76 + mach/pdp/int/em.c | 114 + mach/pdp/int/em_int.s | 3784 ++++++++ mach/pdp/int/eminform.1 | 51 + mach/pdp/int/eminform.s | 634 ++ mach/pdp/int/f+ | 1 + mach/pdp/int/f- | 1 + mach/pdp/int/p+ | 1 + mach/pdp/int/p- | 1 + mach/pdp/int/proto.make | 64 + mach/pdp/int/t+ | 1 + mach/pdp/int/t- | 1 + mach/pdp/libem/LIST | 72 + mach/pdp/libem/libem_s.a | Bin 0 -> 28824 bytes mach/pdp/libend/LIST | 5 + mach/pdp/libend/end_s.a | Bin 0 -> 742 bytes mach/pdp/libsys/LIST | 103 + mach/pdp/libsys/head_em.s | 80 + mach/pdp/libsys/libmon_s.a | Bin 0 -> 21898 bytes mach/pdp/libsys/sys.h | 104 + mach/pdp/mach_params | 5 + mach/pdp/top/table | 107 + mach/pmds/Action | 6 + mach/pmds/cv/cv.c | 222 + mach/pmds/cv/proto.make | 34 + mach/pmds/libsys/LIST | 62 + mach/pmds/libsys/head_em.s | 58 + mach/pmds/libsys/libmon_s.a | Bin 0 -> 19434 bytes mach/pmds/mach_params | 5 + mach/pmds4/Action | 3 + mach/pmds4/libsys/LIST | 77 + mach/pmds4/libsys/head_em.s | 68 + mach/pmds4/libsys/libmon_s.a | Bin 0 -> 16622 bytes mach/pmds4/mach_params | 5 + mach/proto/as/comm0.h | 262 + mach/proto/as/comm1.h | 129 + mach/proto/as/comm2.y | 431 + mach/proto/as/comm3.c | 50 + mach/proto/as/comm4.c | 595 ++ mach/proto/as/comm5.c | 562 ++ mach/proto/as/comm6.c | 371 + mach/proto/as/comm7.c | 440 + mach/proto/as/comm8.c | 14 + mach/proto/as/proto.make | 80 + mach/proto/cg/assert.h | 11 + mach/proto/cg/codegen.c | 692 ++ mach/proto/cg/compute.c | 353 + mach/proto/cg/data.h | 58 + mach/proto/cg/equiv.c | 94 + mach/proto/cg/equiv.h | 12 + mach/proto/cg/extern.h | 53 + mach/proto/cg/fillem.c | 699 ++ mach/proto/cg/gencode.c | 184 + mach/proto/cg/glosym.c | 41 + mach/proto/cg/glosym.h | 13 + mach/proto/cg/main.c | 80 + mach/proto/cg/move.c | 100 + mach/proto/cg/nextem.c | 125 + mach/proto/cg/param.h | 23 + mach/proto/cg/proto.make | 190 + mach/proto/cg/reg.c | 172 + mach/proto/cg/regvar.c | 140 + mach/proto/cg/regvar.h | 23 + mach/proto/cg/result.h | 23 + mach/proto/cg/salloc.c | 139 + mach/proto/cg/state.c | 93 + mach/proto/cg/state.h | 28 + mach/proto/cg/subr.c | 536 ++ mach/proto/cg/types.h | 30 + mach/proto/cg/var.c | 30 + mach/proto/fp/FP.script | 41 + mach/proto/fp/FP_bias.h | 28 + mach/proto/fp/FP_shift.h | 49 + mach/proto/fp/FP_trap.h | 22 + mach/proto/fp/FP_types.h | 113 + mach/proto/fp/add_ext.c | 56 + mach/proto/fp/adder.c | 50 + mach/proto/fp/adf4.c | 32 + mach/proto/fp/adf8.c | 32 + mach/proto/fp/cff4.c | 28 + mach/proto/fp/cff8.c | 28 + mach/proto/fp/cfi.c | 52 + mach/proto/fp/cfu.c | 43 + mach/proto/fp/cif4.c | 56 + mach/proto/fp/cif8.c | 55 + mach/proto/fp/cmf4.c | 40 + mach/proto/fp/cmf8.c | 61 + mach/proto/fp/compact.c | 202 + mach/proto/fp/cuf4.c | 57 + mach/proto/fp/cuf8.c | 54 + mach/proto/fp/div_ext.c | 266 + mach/proto/fp/dvf4.c | 26 + mach/proto/fp/dvf8.c | 26 + mach/proto/fp/extend.c | 111 + mach/proto/fp/fef4.c | 33 + mach/proto/fp/fef8.c | 33 + mach/proto/fp/fif4.c | 46 + mach/proto/fp/fif8.c | 48 + mach/proto/fp/fptrp.e | 21 + mach/proto/fp/get_put.h | 41 + mach/proto/fp/mlf4.c | 25 + mach/proto/fp/mlf8.c | 25 + mach/proto/fp/mul_ext.c | 98 + mach/proto/fp/ngf4.c | 27 + mach/proto/fp/ngf8.c | 28 + mach/proto/fp/nrm_ext.c | 50 + mach/proto/fp/sbf4.c | 27 + mach/proto/fp/sbf8.c | 27 + mach/proto/fp/sft_ext.c | 39 + mach/proto/fp/shifter.c | 75 + mach/proto/fp/sub_ext.c | 53 + mach/proto/fp/zrf4.c | 19 + mach/proto/fp/zrf8.c | 21 + mach/proto/fp/zrf_ext.c | 22 + mach/proto/grind/READ_ME | 17 + mach/proto/grind/atlin.c | 508 ++ mach/proto/grind/lib.e | 58 + mach/proto/grind/par_misc.e | 146 + mach/proto/libg/barrier.c | 6 + mach/proto/libg/compmodule | 6 + mach/proto/libg/proto.libbc | 43 + mach/proto/libg/proto.libcc | 63 + mach/proto/libg/proto.libcc.ansi | 63 + mach/proto/libg/proto.libdb | 49 + mach/proto/libg/proto.libem | 33 + mach/proto/libg/proto.libend | 35 + mach/proto/libg/proto.libf77 | 51 + mach/proto/libg/proto.libfp | 281 + mach/proto/libg/proto.libm2 | 43 + mach/proto/libg/proto.libmon | 39 + mach/proto/libg/proto.liboc | 42 + mach/proto/libg/proto.libpc | 43 + mach/proto/libg/proto.libsys | 39 + mach/proto/libg/proto.make | 48 + mach/proto/libg/proto.sysmon | 42 + mach/proto/ncg/assert.h | 11 + mach/proto/ncg/codegen.c | 973 ++ mach/proto/ncg/compute.c | 417 + mach/proto/ncg/data.h | 71 + mach/proto/ncg/equiv.c | 97 + mach/proto/ncg/equiv.h | 12 + mach/proto/ncg/extern.h | 54 + mach/proto/ncg/fillem.c | 740 ++ mach/proto/ncg/gencode.c | 152 + mach/proto/ncg/glosym.c | 42 + mach/proto/ncg/glosym.h | 13 + mach/proto/ncg/label.c | 47 + mach/proto/ncg/label.h | 17 + mach/proto/ncg/main.c | 94 + mach/proto/ncg/move.c | 140 + mach/proto/ncg/nextem.c | 135 + mach/proto/ncg/param.h | 26 + mach/proto/ncg/proto.make | 204 + mach/proto/ncg/reg.c | 214 + mach/proto/ncg/regvar.c | 150 + mach/proto/ncg/regvar.h | 23 + mach/proto/ncg/result.h | 21 + mach/proto/ncg/salloc.c | 139 + mach/proto/ncg/state.c | 67 + mach/proto/ncg/state.h | 22 + mach/proto/ncg/subr.c | 709 ++ mach/proto/ncg/types.h | 30 + mach/proto/ncg/var.c | 30 + mach/proto/top/proto.make | 50 + mach/proto/top/queue.c | 77 + mach/proto/top/queue.h | 16 + mach/proto/top/top.c | 663 ++ mach/proto/top/top.h | 93 + mach/s2650/Action | 3 + mach/s2650/as/mach0.c | 13 + mach/s2650/as/mach1.c | 24 + mach/s2650/as/mach2.c | 27 + mach/s2650/as/mach3.c | 94 + mach/s2650/as/mach4.c | 128 + mach/s2650/as/mach5.c | 90 + mach/sparc/Action | 28 + mach/sparc/ce/EM_table | 7880 +++++++++++++++++ mach/sparc/ce/EM_table.x | 4718 ++++++++++ mach/sparc/ce/Makefile | 14 + mach/sparc/ce/back.src/Makefile | 103 + mach/sparc/ce/back.src/back.h | 84 + mach/sparc/ce/back.src/con_str.c | 8 + mach/sparc/ce/back.src/const.h | 2 + mach/sparc/ce/back.src/do_open.c | 24 + mach/sparc/ce/back.src/gen_str.c | 27 + mach/sparc/ce/back.src/header.h | 4 + mach/sparc/ce/back.src/rom_str.c | 8 + mach/sparc/ce/back.src/symboldef.c | 9 + mach/sparc/ce/cache.c | 1424 +++ mach/sparc/ce/cache.c.x | 1304 +++ mach/sparc/ce/ce.src/C_con_scon.c | 15 + mach/sparc/ce/ce.src/C_cst.c | 8 + mach/sparc/ce/ce.src/C_dlb.c | 4 + mach/sparc/ce/ce.src/C_dnam.c | 4 + mach/sparc/ce/ce.src/C_exa_dnam.c | 14 + mach/sparc/ce/ce.src/C_ilb.c | 4 + mach/sparc/ce/ce.src/C_mes_begin.c | 8 + mach/sparc/ce/ce.src/C_mes_end.c | 7 + mach/sparc/ce/ce.src/C_pnam.c | 4 + mach/sparc/ce/ce.src/C_pro.c | 28 + mach/sparc/ce/ce.src/C_rom_scon.c | 15 + mach/sparc/ce/ce.src/C_scon.c | 4 + mach/sparc/ce/ce.src/misc.c | 52 + mach/sparc/ce/ce.src/ms_reg.c | 550 ++ mach/sparc/ce/cegpp | 30 + mach/sparc/ce/mach.c | 22 + mach/sparc/ce/mach.h | 108 + mach/sparc/ce/mach_em.h | 86 + mach/sparc/ce/misc.h | 14 + mach/sparc/ce/ms_reg.h | 16 + mach/sparc/ce/proto.make | 37 + mach/sparc/ce/push_pop.h | 76 + mach/sparc/ce_cg/convert.c | 116 + mach/sparc/ce_cg/proto.make | 43 + mach/sparc/libdb/machdep.s | 64 + mach/sparc/libem/LIST | 12 + mach/sparc/libem/libem_s.a | Bin 0 -> 10296 bytes mach/sparc/libend/LIST | 2 + mach/sparc/libend/end_s.a | Bin 0 -> 60 bytes mach/sparc/libsys/LIST | 179 + mach/sparc/libsys/SYS.h | 149 + mach/sparc/libsys/head_em.s | 76 + mach/sparc/libsys/libmon_s.a | Bin 0 -> 20630 bytes mach/sparc/libsys/syscall.h | 139 + mach/sparc/mach_params | 5 + mach/sparc/top/table | 138 + mach/sparc_solaris/Action | 24 + mach/sparc_solaris/ce/proto.make | 37 + mach/sparc_solaris/ce_cg/proto.make | 43 + mach/sparc_solaris/libdb/machdep.s | 64 + mach/sparc_solaris/libem/LIST | 12 + mach/sparc_solaris/libem/Makefile | 5 + mach/sparc_solaris/libem/READ_ME | 2 + mach/sparc_solaris/libem/libem_s.a | Bin 0 -> 10296 bytes mach/sparc_solaris/libend/LIST | 5 + mach/sparc_solaris/libend/end_s.a | Bin 0 -> 260 bytes mach/sparc_solaris/libsys/LIST | 142 + mach/sparc_solaris/libsys/SYS.h | 119 + mach/sparc_solaris/libsys/head_em.s | 84 + mach/sparc_solaris/libsys/libmon_s.a | Bin 0 -> 11256 bytes mach/sparc_solaris/mach_params | 5 + mach/sun2/Action | 6 + mach/sun2/cv/proto.make | 34 + mach/sun2/libsys/LIST | 186 + mach/sun2/libsys/head_em.s | 69 + mach/sun2/libsys/libmon_s.a | Bin 0 -> 33466 bytes mach/sun2/libsys/syscall.h | 132 + mach/sun2/mach_params | 5 + mach/sun3/Action | 14 + mach/sun3/ce/EM_table | 820 ++ mach/sun3/ce/Make.back | 107 + mach/sun3/ce/as.c | 376 + mach/sun3/ce/as.h | 33 + mach/sun3/ce/as_table | 178 + mach/sun3/ce/do_close.c | 9 + mach/sun3/ce/do_open.c | 14 + mach/sun3/ce/end_back.c | 37 + mach/sun3/ce/mach.c | 70 + mach/sun3/ce/mach.h | 47 + mach/sun3/ce/misc.c | 45 + mach/sun3/ce/output.c | 298 + mach/sun3/ce/proto.make | 49 + mach/sun3/ce/relocation.c | 46 + mach/sun3/cv/Xcv.c | 208 + mach/sun3/cv/cv.c | 624 ++ mach/sun3/cv/proto.make | 40 + mach/sun3/libce/adf4.s | 11 + mach/sun3/libce/adf8.s | 15 + mach/sun3/libce/cff.s | 26 + mach/sun3/libce/cfi.s | 21 + mach/sun3/libce/cfu.s | 21 + mach/sun3/libce/cif.s | 23 + mach/sun3/libce/cmf4.s | 20 + mach/sun3/libce/cmf8.s | 26 + mach/sun3/libce/cuf.s | 34 + mach/sun3/libce/dvf4.s | 11 + mach/sun3/libce/dvf8.s | 15 + mach/sun3/libce/fef4.s | 19 + mach/sun3/libce/fef8.s | 19 + mach/sun3/libce/fif4.s | 23 + mach/sun3/libce/fif8.s | 17 + mach/sun3/libce/head_em.s | 71 + mach/sun3/libce/mlf4.s | 11 + mach/sun3/libce/mlf8.s | 15 + mach/sun3/libce/proto.make | 46 + mach/sun3/libce/sbf4.s | 11 + mach/sun3/libce/sbf8.s | 15 + mach/sun3/libce/sys.s | 7 + mach/sun3/libce/vars.s | 20 + mach/sun3/libsys/LIST | 194 + mach/sun3/libsys/head_em.s | 71 + mach/sun3/libsys/libmon_s.a | Bin 0 -> 36174 bytes mach/sun3/libsys/syscall.h | 132 + mach/sun3/mach_params | 5 + mach/vax4/Action | 32 + mach/vax4/as/mach0.c | 25 + mach/vax4/as/mach1.c | 68 + mach/vax4/as/mach2.c | 65 + mach/vax4/as/mach3.c | 437 + mach/vax4/as/mach4.c | 325 + mach/vax4/as/mach5.c | 350 + mach/vax4/ce/EM_table | 781 ++ mach/vax4/ce/Make.back | 107 + mach/vax4/ce/as.c | 275 + mach/vax4/ce/as.h | 30 + mach/vax4/ce/as_table | 292 + mach/vax4/ce/do_close.c | 9 + mach/vax4/ce/do_open.c | 14 + mach/vax4/ce/end_back.c | 37 + mach/vax4/ce/mach.c | 93 + mach/vax4/ce/mach.h | 44 + mach/vax4/ce/output.c | 298 + mach/vax4/ce/proto.make | 54 + mach/vax4/ce/relocation.c | 46 + mach/vax4/cg/mach.c | 298 + mach/vax4/cg/mach.h | 32 + mach/vax4/cg/table | 4648 ++++++++++ mach/vax4/cv/cv.c | 605 ++ mach/vax4/cv/proto.make | 34 + mach/vax4/libbsd4_1a/LIST | 88 + mach/vax4/libbsd4_1a/head_em.s | 78 + mach/vax4/libbsd4_1a/libmon_s.a | Bin 0 -> 17428 bytes mach/vax4/libbsd4_2/LIST | 164 + mach/vax4/libbsd4_2/head_em.s | 74 + mach/vax4/libbsd4_2/libmon_s.a | Bin 0 -> 17364 bytes mach/vax4/libbsd4_2/syscall.h | 127 + mach/vax4/libce/proto.make | 32 + mach/vax4/libce/sys.s | 7 + mach/vax4/libdb/machdep.s | 35 + mach/vax4/libem/LIST | 71 + mach/vax4/libem/libem_s.a | Bin 0 -> 27776 bytes mach/vax4/libend/LIST | 5 + mach/vax4/libend/end_s.a | Bin 0 -> 742 bytes mach/vax4/libsysV_2/LIST | 94 + mach/vax4/libsysV_2/head_em.s | 76 + mach/vax4/libsysV_2/libmon_s.a | Bin 0 -> 18770 bytes mach/vax4/mach_params | 5 + mach/vax4/top/table | 292 + mach/xenix3/Action | 6 + mach/xenix3/cv/chstack.c | 75 + mach/xenix3/cv/cv.c | 472 + mach/xenix3/cv/proto.make | 43 + mach/xenix3/libsys/LIST | 113 + mach/xenix3/libsys/head_em.s | 87 + mach/xenix3/libsys/libmon_s.a | Bin 0 -> 20150 bytes mach/xenix3/mach_params | 5 + mach/z80/Action | 18 + mach/z80/as/mach0.c | 15 + mach/z80/as/mach1.c | 36 + mach/z80/as/mach2.c | 34 + mach/z80/as/mach3.c | 102 + mach/z80/as/mach4.c | 328 + mach/z80/as/mach5.c | 61 + mach/z80/cg/mach.c | 102 + mach/z80/cg/mach.h | 35 + mach/z80/cg/table | 1450 +++ mach/z80/int/READ_ME | 23 + mach/z80/int/atof.s | 287 + mach/z80/int/cv.c | 24 + mach/z80/int/dl.c | 190 + mach/z80/int/dosort | 9 + mach/z80/int/dvi4.s | 80 + mach/z80/int/dvu4.s | 142 + mach/z80/int/eb.s | 10 + mach/z80/int/em.s | 4678 ++++++++++ mach/z80/int/em22 | 43 + mach/z80/int/fpp.s | 487 + mach/z80/int/mli4.s | 80 + mach/z80/int/proto.make | 71 + mach/z80/libem/LIST | 42 + mach/z80/libem/libem_s.a | Bin 0 -> 29380 bytes mach/z80/libend/LIST | 5 + mach/z80/libend/end_s.a | Bin 0 -> 612 bytes mach/z80/libmon/LIST | 3 + mach/z80/libmon/README | 11 + mach/z80/libmon/char.her.s | 36 + mach/z80/libmon/char.nas.s | 49 + mach/z80/libmon/head_em.s | 88 + mach/z80/libmon/libmon_s.a | Bin 0 -> 2300 bytes mach/z80/libmon/mon.cpm.s | 929 ++ mach/z80/libmon/mon.s | 107 + mach/z80/mach_params | 5 + mach/z8000/Action | 18 + mach/z8000/as/README | 2 + mach/z8000/as/mach0.c | 20 + mach/z8000/as/mach1.c | 82 + mach/z8000/as/mach2.c | 38 + mach/z8000/as/mach3.c | 369 + mach/z8000/as/mach4.c | 573 ++ mach/z8000/as/mach5.c | 177 + mach/z8000/cg/mach.c | 97 + mach/z8000/cg/mach.h | 30 + mach/z8000/cg/table | 1857 ++++ mach/z8000/libem/LIST | 34 + mach/z8000/libem/libem_s.a | Bin 0 -> 12356 bytes mach/z8000/libend/LIST | 5 + mach/z8000/libend/end_s.a | Bin 0 -> 612 bytes mach/z8000/libmon/LIST | 2 + mach/z8000/libmon/head_em.s | 64 + mach/z8000/libmon/libmon_s.a | Bin 0 -> 1504 bytes mach/z8000/mach_params | 5 + man/6500_as.6 | 69 + man/6800_as.6 | 54 + man/6805_as.6 | 71 + man/6809_as.6 | 150 + man/8080_as.6 | 38 + man/em_cg.6 | 32 + man/em_ncg.6 | 42 + man/head | 10 + man/i386_as.6 | 118 + man/i86_as.6 | 137 + man/libmon.7 | 102 + man/libpc.7 | 298 + man/m68k2_as.6 | 100 + man/ns_as.6 | 104 + man/pc_prlib.7 | 692 ++ man/pdp_as.6 | 143 + man/proto.make | 22 + man/uni_ass.6 | 262 + man/z8000_as.6 | 166 + man/z80_as.6 | 68 + modules/h/ansi.h | 29 + modules/h/em.h | 8 + modules/h/emO_code.h | 40 + modules/h/em_arith.h | 7 + modules/h/em_code.h | 36 + modules/h/em_codeCE.h | 306 + modules/h/em_codeO.h | 270 + modules/h/em_label.h | 7 + modules/h/em_mesX.h | 5 + modules/h/proto.make | 33 + modules/src/Action | 39 + modules/src/Action.lint | 52 + modules/src/alloc/Malloc.c | 28 + modules/src/alloc/No_Mem.c | 14 + modules/src/alloc/Realloc.c | 33 + modules/src/alloc/Salloc.c | 37 + modules/src/alloc/Srealloc.c | 21 + modules/src/alloc/alloc.3 | 107 + modules/src/alloc/alloc.h | 56 + modules/src/alloc/botch.c | 23 + modules/src/alloc/clear.c | 39 + modules/src/alloc/proto.make | 87 + modules/src/alloc/st_alloc.c | 65 + modules/src/alloc/std_alloc.c | 68 + modules/src/assert/BadAssert.c | 41 + modules/src/assert/assert.3 | 61 + modules/src/assert/assert.h | 24 + modules/src/assert/proto.make | 43 + modules/src/em_code/C_out.c | 199 + modules/src/em_code/bhcst.c | 20 + modules/src/em_code/bhdlb.c | 22 + modules/src/em_code/bhdnam.c | 22 + modules/src/em_code/bhfcon.c | 22 + modules/src/em_code/bhicon.c | 22 + modules/src/em_code/bhilb.c | 21 + modules/src/em_code/bhpnam.c | 21 + modules/src/em_code/bhucon.c | 22 + modules/src/em_code/convert.c | 89 + modules/src/em_code/crcst.c | 15 + modules/src/em_code/crdlb.c | 16 + modules/src/em_code/crdnam.c | 16 + modules/src/em_code/crilb.c | 15 + modules/src/em_code/crpnam.c | 15 + modules/src/em_code/crscon.c | 16 + modules/src/em_code/crxcon.c | 16 + modules/src/em_code/cst.c | 11 + modules/src/em_code/dfdlb.c | 13 + modules/src/em_code/dfdnam.c | 13 + modules/src/em_code/dfilb.c | 13 + modules/src/em_code/dlb.c | 12 + modules/src/em_code/dnam.c | 12 + modules/src/em_code/em.c | 485 + modules/src/em_code/em.nogen | 147 + modules/src/em_code/em_code.3X | 496 ++ modules/src/em_code/em_private.h | 88 + modules/src/em_code/end.c | 14 + modules/src/em_code/endarg.c | 13 + modules/src/em_code/exc.c | 14 + modules/src/em_code/failed.c | 13 + modules/src/em_code/fcon.c | 12 + modules/src/em_code/getid.c | 12 + modules/src/em_code/icon.c | 12 + modules/src/em_code/ilb.c | 11 + modules/src/em_code/insert.c | 384 + modules/src/em_code/insert.h | 70 + modules/src/em_code/internerr.c | 13 + modules/src/em_code/make.em.gen | 17 + modules/src/em_code/make.sh | 20 + modules/src/em_code/msend.c | 10 + modules/src/em_code/msstart.c | 13 + modules/src/em_code/op.c | 13 + modules/src/em_code/opcst.c | 15 + modules/src/em_code/opdlb.c | 16 + modules/src/em_code/opdnam.c | 16 + modules/src/em_code/opilb.c | 15 + modules/src/em_code/opnarg.c | 14 + modules/src/em_code/oppnam.c | 15 + modules/src/em_code/pnam.c | 11 + modules/src/em_code/pro.c | 17 + modules/src/em_code/pronarg.c | 16 + modules/src/em_code/proto.make | 128 + modules/src/em_code/psdlb.c | 14 + modules/src/em_code/psdnam.c | 14 + modules/src/em_code/pspnam.c | 14 + modules/src/em_code/scon.c | 12 + modules/src/em_code/ucon.c | 12 + modules/src/em_mes/C_ms_com.c | 16 + modules/src/em_mes/C_ms_ego.c | 20 + modules/src/em_mes/C_ms_emx.c | 17 + modules/src/em_mes/C_ms_err.c | 14 + modules/src/em_mes/C_ms_flt.c | 14 + modules/src/em_mes/C_ms_gto.c | 14 + modules/src/em_mes/C_ms_opt.c | 14 + modules/src/em_mes/C_ms_par.c | 16 + modules/src/em_mes/C_ms_reg.c | 20 + modules/src/em_mes/C_ms_src.c | 18 + modules/src/em_mes/C_ms_stb.c | 79 + modules/src/em_mes/C_ms_std.c | 20 + modules/src/em_mes/em_mes.3 | 181 + modules/src/em_mes/proto.make | 110 + modules/src/em_opt/aux.c | 98 + modules/src/em_opt/em_codeO.h | 270 + modules/src/em_opt/em_nopt.6 | 24 + modules/src/em_opt/em_opt.3 | 113 + modules/src/em_opt/findworst.c | 165 + modules/src/em_opt/initlex.c | 64 + modules/src/em_opt/main.c | 114 + modules/src/em_opt/makefuns.awk | 68 + modules/src/em_opt/mkstrct.c | 296 + modules/src/em_opt/nopt.c | 354 + modules/src/em_opt/nopt.h | 105 + modules/src/em_opt/outcalls.c | 111 + modules/src/em_opt/outputdfa.c | 643 ++ modules/src/em_opt/parser.g | 507 ++ modules/src/em_opt/parser.h | 97 + modules/src/em_opt/patterns | 718 ++ modules/src/em_opt/proto.make | 241 + modules/src/em_opt/pseudo.r | 129 + modules/src/em_opt/syntax.l | 65 + modules/src/flt_arith/b64_add.c | 30 + modules/src/flt_arith/b64_sft.c | 48 + modules/src/flt_arith/flt_add.c | 85 + modules/src/flt_arith/flt_ar2flt.c | 41 + modules/src/flt_arith/flt_arith.3 | 263 + modules/src/flt_arith/flt_arith.h | 49 + modules/src/flt_arith/flt_chk.c | 29 + modules/src/flt_arith/flt_cmp.c | 20 + modules/src/flt_arith/flt_div.c | 129 + modules/src/flt_arith/flt_flt2ar.c | 62 + modules/src/flt_arith/flt_misc.h | 28 + modules/src/flt_arith/flt_modf.c | 33 + modules/src/flt_arith/flt_mul.c | 79 + modules/src/flt_arith/flt_nrm.c | 38 + modules/src/flt_arith/flt_str2fl.c | 478 + modules/src/flt_arith/flt_umin.c | 18 + modules/src/flt_arith/proto.make | 117 + modules/src/flt_arith/split.c | 17 + modules/src/flt_arith/test.c | 87 + modules/src/flt_arith/ucmp.c | 21 + modules/src/idf/idf.3 | 87 + modules/src/idf/idf_pkg.body | 161 + modules/src/idf/idf_pkg.spec | 45 + modules/src/idf/proto.make | 29 + modules/src/input/AtEoIF.c | 14 + modules/src/input/AtEoIT.c | 14 + modules/src/input/inp_pkg.body | 447 + modules/src/input/inp_pkg.spec | 58 + modules/src/input/input.3 | 154 + modules/src/input/proto.make | 52 + modules/src/malloc/READ_ME | 27 + modules/src/malloc/add_file | 14 + modules/src/malloc/check.c | 306 + modules/src/malloc/check.h | 20 + modules/src/malloc/getsize.c | 27 + modules/src/malloc/global.c | 11 + modules/src/malloc/impl.h | 93 + modules/src/malloc/log.c | 133 + modules/src/malloc/log.h | 25 + modules/src/malloc/mal.c | 361 + modules/src/malloc/param.h | 56 + modules/src/malloc/phys.c | 100 + modules/src/malloc/phys.h | 75 + modules/src/malloc/proto.make | 53 + modules/src/malloc/size_type.h | 15 + modules/src/object/obj.h | 79 + modules/src/object/object.3 | 316 + modules/src/object/object.h | 45 + modules/src/object/proto.make | 95 + modules/src/object/rd.c | 260 + modules/src/object/rd_arhdr.c | 34 + modules/src/object/rd_bytes.c | 35 + modules/src/object/rd_int2.c | 15 + modules/src/object/rd_long.c | 15 + modules/src/object/rd_ranlib.c | 27 + modules/src/object/rd_unsig2.c | 15 + modules/src/object/wr.c | 405 + modules/src/object/wr_arhdr.c | 28 + modules/src/object/wr_bytes.c | 34 + modules/src/object/wr_int2.c | 15 + modules/src/object/wr_long.c | 16 + modules/src/object/wr_putc.c | 17 + modules/src/object/wr_ranlib.c | 28 + modules/src/print/doprnt.c | 27 + modules/src/print/format.c | 108 + modules/src/print/fprint.c | 43 + modules/src/print/param.h | 7 + modules/src/print/print.3 | 121 + modules/src/print/print.c | 41 + modules/src/print/print.h | 23 + modules/src/print/proto.make | 62 + modules/src/print/sprint.c | 42 + modules/src/read_em/EM_vars.c | 16 + modules/src/read_em/argtype | 14 + modules/src/read_em/em_comp.h | 82 + modules/src/read_em/m_C_mnem | 91 + modules/src/read_em/m_C_mnem_na | 15 + modules/src/read_em/mkcalls.c | 440 + modules/src/read_em/proto.make | 123 + modules/src/read_em/read_em.3 | 328 + modules/src/read_em/read_em.c | 423 + modules/src/read_em/reade.c | 662 ++ modules/src/read_em/readk.c | 351 + modules/src/string/ack_string.h | 31 + modules/src/string/bts2str.c | 35 + modules/src/string/btscat.c | 23 + modules/src/string/btscmp.c | 23 + modules/src/string/btscpy.c | 21 + modules/src/string/btszero.c | 22 + modules/src/string/long2str.c | 69 + modules/src/string/proto.make | 110 + modules/src/string/str2bts.c | 74 + modules/src/string/str2long.c | 41 + modules/src/string/strcat.c | 24 + modules/src/string/strcmp.c | 20 + modules/src/string/strcpy.c | 21 + modules/src/string/strindex.c | 20 + modules/src/string/string.3 | 257 + modules/src/string/strlen.c | 20 + modules/src/string/strncat.c | 25 + modules/src/string/strncmp.c | 26 + modules/src/string/strncpy.c | 22 + modules/src/string/strrindex.c | 20 + modules/src/string/strzero.c | 17 + modules/src/system/access.c | 15 + modules/src/system/break.c | 21 + modules/src/system/chmode.c | 15 + modules/src/system/close.c | 18 + modules/src/system/create.c | 28 + modules/src/system/filesize.c | 20 + modules/src/system/lock.c | 33 + modules/src/system/modtime.c | 21 + modules/src/system/open.c | 52 + modules/src/system/proto.make | 109 + modules/src/system/read.c | 17 + modules/src/system/remove.c | 14 + modules/src/system/rename.c | 17 + modules/src/system/seek.c | 19 + modules/src/system/stop.c | 22 + modules/src/system/system.3 | 324 + modules/src/system/system.c | 24 + modules/src/system/system.h | 67 + modules/src/system/time.c | 15 + modules/src/system/unlock.c | 14 + modules/src/system/write.c | 17 + util/LLgen/COPYING | 19 + util/LLgen/LLgen.1 | 139 + util/LLgen/Makefile | 49 + util/LLgen/READ_ME | 19 + util/LLgen/lib/incl | 68 + util/LLgen/lib/nc_incl | 70 + util/LLgen/lib/nc_rec | 1790 ++++ util/LLgen/lib/rec | 442 + util/LLgen/proto.make | 52 + util/LLgen/src/LLgen.c.dist | 1413 +++ util/LLgen/src/LLgen.g | 663 ++ util/LLgen/src/Lpars.c.dist | 826 ++ util/LLgen/src/Lpars.h.dist | 25 + util/LLgen/src/Makefile | 102 + util/LLgen/src/alloc.c | 92 + util/LLgen/src/cclass.c | 138 + util/LLgen/src/cclass.h | 17 + util/LLgen/src/check.c | 472 + util/LLgen/src/compute.c | 1159 +++ util/LLgen/src/extern.h | 94 + util/LLgen/src/gencode.c | 1360 +++ util/LLgen/src/global.c | 89 + util/LLgen/src/io.h | 43 + util/LLgen/src/machdep.c | 70 + util/LLgen/src/main.c | 376 + util/LLgen/src/name.c | 251 + util/LLgen/src/proto.make | 157 + util/LLgen/src/reach.c | 135 + util/LLgen/src/savegram.c | 385 + util/LLgen/src/sets.c | 218 + util/LLgen/src/sets.h | 33 + util/LLgen/src/tokens.c.dist | 634 ++ util/LLgen/src/tokens.g | 604 ++ util/LLgen/src/types.h | 272 + util/ack/ack.1.X | 442 + util/ack/ack.h | 98 + util/ack/data.c | 20 + util/ack/data.h | 51 + util/ack/dmach.h | 23 + util/ack/files.c | 180 + util/ack/grows.c | 74 + util/ack/grows.h | 27 + util/ack/list.c | 70 + util/ack/list.h | 31 + util/ack/main.c | 452 + util/ack/mktables.c | 121 + util/ack/proto.make | 187 + util/ack/rmach.c | 383 + util/ack/run.c | 150 + util/ack/scan.c | 255 + util/ack/svars.c | 118 + util/ack/trans.c | 666 ++ util/ack/trans.h | 46 + util/ack/util.c | 184 + util/amisc/anm.1 | 74 + util/amisc/anm.c | 339 + util/amisc/ashow.c | 229 + util/amisc/asize.1 | 16 + util/amisc/asize.c | 68 + util/amisc/astrip.1 | 16 + util/amisc/astrip.c | 152 + util/amisc/proto.make | 48 + util/arch/aal.1 | 90 + util/arch/arch.1 | 94 + util/arch/arch.5 | 55 + util/arch/archiver.c | 781 ++ util/arch/proto.make | 62 + util/ass/asprint.p | 386 + util/ass/ass00.c | 528 ++ util/ass/ass00.h | 253 + util/ass/ass30.c | 368 + util/ass/ass40.c | 49 + util/ass/ass50.c | 184 + util/ass/ass60.c | 204 + util/ass/ass70.c | 337 + util/ass/ass80.c | 413 + util/ass/assci.c | 864 ++ util/ass/asscm.c | 132 + util/ass/assda.c | 138 + util/ass/assex.h | 165 + util/ass/assrl.c | 291 + util/ass/em_ass.6 | 72 + util/ass/maktab.c | 476 + util/ass/proto.make | 174 + util/byacc/ACKNOWLEDGEMENTS | 25 + util/byacc/NEW_FEATURES | 46 + util/byacc/NO_WARRANTY | 3 + util/byacc/README | 23 + util/byacc/closure.c | 265 + util/byacc/defs.h | 285 + util/byacc/error.c | 317 + util/byacc/lalr.c | 638 ++ util/byacc/lr0.c | 598 ++ util/byacc/main.c | 366 + util/byacc/manpage | 100 + util/byacc/mkpar.c | 357 + util/byacc/output.c | 1146 +++ util/byacc/proto.make | 99 + util/byacc/reader.c | 1770 ++++ util/byacc/skeleton.c | 292 + util/byacc/symtab.c | 119 + util/byacc/verbose.c | 329 + util/byacc/warshall.c | 84 + util/ceg/Action | 24 + util/ceg/EM_parser/as_EM_pars/arg_type.h | 4 + util/ceg/EM_parser/as_EM_pars/dist.c | 14 + util/ceg/EM_parser/as_EM_pars/em_decl.h | 66 + util/ceg/EM_parser/as_EM_pars/em_parser.h | 7 + util/ceg/EM_parser/as_EM_pars/error.c | 39 + util/ceg/EM_parser/as_EM_pars/proto.make | 42 + util/ceg/EM_parser/common/C_instr2.c | 462 + util/ceg/EM_parser/common/action.c | 57 + util/ceg/EM_parser/common/arg_type.h | 4 + util/ceg/EM_parser/common/decl.h | 68 + util/ceg/EM_parser/common/default.c | 255 + util/ceg/EM_parser/common/em_parser.h | 7 + util/ceg/EM_parser/common/eval.c | 24 + util/ceg/EM_parser/common/help.c | 180 + util/ceg/EM_parser/common/mylex.c | 298 + util/ceg/EM_parser/common/pars.g | 329 + util/ceg/EM_parser/common/proto.make | 74 + util/ceg/EM_parser/common/scan.c | 107 + util/ceg/EM_parser/obj_EM_pars/arg_type.h | 4 + util/ceg/EM_parser/obj_EM_pars/dist.c | 115 + util/ceg/EM_parser/obj_EM_pars/em_parser.h | 7 + util/ceg/EM_parser/obj_EM_pars/proto.make | 38 + util/ceg/as_parser/as_parser.h | 5 + util/ceg/as_parser/const.h | 7 + util/ceg/as_parser/conversion.c | 138 + util/ceg/as_parser/decl.h | 3 + util/ceg/as_parser/eval/eval.c | 345 + util/ceg/as_parser/eval/proto.make | 14 + util/ceg/as_parser/eval/states.h | 11 + util/ceg/as_parser/help.c | 356 + util/ceg/as_parser/pars.g | 215 + util/ceg/as_parser/proto.make | 73 + util/ceg/as_parser/table.l | 54 + util/ceg/assemble/as_assemble/READ_ME | 4 + util/ceg/assemble/as_assemble/assemble.c | 63 + util/ceg/assemble/as_assemble/block_as.c | 15 + util/ceg/assemble/obj_assemble/READ_ME | 5 + util/ceg/assemble/obj_assemble/assemble.c | 265 + util/ceg/assemble/obj_assemble/block_as.c | 15 + util/ceg/assemble/obj_assemble/const.h | 6 + util/ceg/assemble/proto.make | 34 + util/ceg/ce_back/as_back/READ_ME | 5 + util/ceg/ce_back/as_back/back.h | 67 + util/ceg/ce_back/as_back/bottom.c | 119 + util/ceg/ce_back/as_back/bss.c | 16 + util/ceg/ce_back/as_back/con1.c | 7 + util/ceg/ce_back/as_back/con2.c | 7 + util/ceg/ce_back/as_back/con4.c | 7 + util/ceg/ce_back/as_back/dbsym.c | 26 + util/ceg/ce_back/as_back/do_close.c | 6 + util/ceg/ce_back/as_back/do_open.c | 10 + util/ceg/ce_back/as_back/end_back.c | 5 + util/ceg/ce_back/as_back/gen1.c | 17 + util/ceg/ce_back/as_back/gen2.c | 17 + util/ceg/ce_back/as_back/gen4.c | 17 + util/ceg/ce_back/as_back/header.h | 4 + util/ceg/ce_back/as_back/init_back.c | 5 + util/ceg/ce_back/as_back/proto.make | 98 + util/ceg/ce_back/as_back/reloc1.c | 9 + util/ceg/ce_back/as_back/reloc2.c | 9 + util/ceg/ce_back/as_back/reloc4.c | 9 + util/ceg/ce_back/as_back/rom1.c | 7 + util/ceg/ce_back/as_back/rom2.c | 7 + util/ceg/ce_back/as_back/rom4.c | 7 + util/ceg/ce_back/as_back/set_global.c | 7 + util/ceg/ce_back/as_back/set_local.c | 7 + util/ceg/ce_back/as_back/switchseg.c | 25 + util/ceg/ce_back/as_back/symboldef.c | 7 + util/ceg/ce_back/as_back/text1.c | 7 + util/ceg/ce_back/as_back/text2.c | 7 + util/ceg/ce_back/as_back/text4.c | 7 + util/ceg/ce_back/obj_back/READ_ME | 5 + util/ceg/ce_back/obj_back/back.h | 78 + util/ceg/ce_back/obj_back/common.c | 23 + util/ceg/ce_back/obj_back/con2.c | 16 + util/ceg/ce_back/obj_back/con4.c | 23 + util/ceg/ce_back/obj_back/data.c | 90 + util/ceg/ce_back/obj_back/data.h | 58 + util/ceg/ce_back/obj_back/dbsym.c | 43 + util/ceg/ce_back/obj_back/do_close.c | 11 + util/ceg/ce_back/obj_back/do_open.c | 14 + util/ceg/ce_back/obj_back/end_back.c | 55 + util/ceg/ce_back/obj_back/extnd.c | 151 + util/ceg/ce_back/obj_back/gen1.c | 21 + util/ceg/ce_back/obj_back/gen2.c | 35 + util/ceg/ce_back/obj_back/gen4.c | 44 + util/ceg/ce_back/obj_back/hash.h | 8 + util/ceg/ce_back/obj_back/header.h | 8 + util/ceg/ce_back/obj_back/init_back.c | 33 + util/ceg/ce_back/obj_back/label.c | 30 + util/ceg/ce_back/obj_back/memory.c | 72 + util/ceg/ce_back/obj_back/misc.c | 45 + util/ceg/ce_back/obj_back/output.c | 188 + util/ceg/ce_back/obj_back/proto.make | 111 + util/ceg/ce_back/obj_back/reloc1.c | 42 + util/ceg/ce_back/obj_back/reloc2.c | 42 + util/ceg/ce_back/obj_back/reloc4.c | 49 + util/ceg/ce_back/obj_back/relocation.c | 76 + util/ceg/ce_back/obj_back/rom2.c | 15 + util/ceg/ce_back/obj_back/rom4.c | 25 + util/ceg/ce_back/obj_back/set_global.c | 14 + util/ceg/ce_back/obj_back/set_local.c | 14 + util/ceg/ce_back/obj_back/switchseg.c | 12 + util/ceg/ce_back/obj_back/symboldef.c | 15 + util/ceg/ce_back/obj_back/symtable.c | 136 + util/ceg/ce_back/obj_back/text2.c | 15 + util/ceg/ce_back/obj_back/text4.c | 23 + util/ceg/ce_back/proto.make | 48 + util/ceg/defaults/C_out_skel.c | 333 + util/ceg/defaults/EM_vars.c | 3 + util/ceg/defaults/READ_ME | 15 + util/ceg/defaults/argtype | 14 + util/ceg/defaults/m_C_mnem | 92 + util/ceg/defaults/m_C_mnem_na | 16 + util/ceg/defaults/message/C_cst.c | 27 + util/ceg/defaults/message/C_dlb.c | 16 + util/ceg/defaults/message/C_dnam.c | 16 + util/ceg/defaults/message/C_fcon.c | 9 + util/ceg/defaults/message/C_icon.c | 9 + util/ceg/defaults/message/C_ilb.c | 17 + util/ceg/defaults/message/C_mes_begin.c | 17 + util/ceg/defaults/message/C_mes_end.c | 16 + util/ceg/defaults/message/C_pnam.c | 15 + util/ceg/defaults/message/C_scon.c | 17 + util/ceg/defaults/message/C_ucon.c | 9 + util/ceg/defaults/mk_C_out | 12 + util/ceg/defaults/not_impl/not_impl.c | 9 + util/ceg/defaults/not_impl/not_impl_table | 271 + util/ceg/defaults/proto.make | 69 + util/ceg/defaults/pseudo/C_busy.c | 9 + util/ceg/defaults/pseudo/C_close.c | 15 + util/ceg/defaults/pseudo/C_df_dlb.c | 15 + util/ceg/defaults/pseudo/C_df_dnam.c | 10 + util/ceg/defaults/pseudo/C_df_ilb.c | 16 + util/ceg/defaults/pseudo/C_end.c | 24 + util/ceg/defaults/pseudo/C_end_narg.c | 11 + util/ceg/defaults/pseudo/C_exa_dlb.c | 10 + util/ceg/defaults/pseudo/C_exa_dnam.c | 10 + util/ceg/defaults/pseudo/C_exp.c | 10 + util/ceg/defaults/pseudo/C_ina_dlb.c | 10 + util/ceg/defaults/pseudo/C_ina_dnam.c | 10 + util/ceg/defaults/pseudo/C_init.c | 20 + util/ceg/defaults/pseudo/C_inp.c | 10 + util/ceg/defaults/pseudo/C_insertpart.c | 53 + util/ceg/defaults/pseudo/C_magic.c | 7 + util/ceg/defaults/pseudo/C_open.c | 19 + util/ceg/defaults/pseudo/C_pro.c | 20 + util/ceg/defaults/pseudo/C_pro_narg.c | 27 + util/ceg/defaults/pseudo_vars.c | 3 + util/ceg/defaults/storage/C_bss_cst.c | 47 + util/ceg/defaults/storage/C_bss_dlb.c | 48 + util/ceg/defaults/storage/C_bss_dnam.c | 48 + util/ceg/defaults/storage/C_bss_ilb.c | 50 + util/ceg/defaults/storage/C_bss_pnam.c | 48 + util/ceg/defaults/storage/C_con_cst.c | 41 + util/ceg/defaults/storage/C_con_dlb.c | 41 + util/ceg/defaults/storage/C_con_dnam.c | 41 + util/ceg/defaults/storage/C_con_ilb.c | 42 + util/ceg/defaults/storage/C_con_pnam.c | 40 + util/ceg/defaults/storage/C_con_scon.c | 42 + util/ceg/defaults/storage/C_hol_cst.c | 54 + util/ceg/defaults/storage/C_hol_dlb.c | 55 + util/ceg/defaults/storage/C_hol_dnam.c | 55 + util/ceg/defaults/storage/C_hol_ilb.c | 55 + util/ceg/defaults/storage/C_hol_pnam.c | 55 + util/ceg/defaults/storage/C_rom_cst.c | 40 + util/ceg/defaults/storage/C_rom_dlb.c | 41 + util/ceg/defaults/storage/C_rom_dnam.c | 41 + util/ceg/defaults/storage/C_rom_ilb.c | 42 + util/ceg/defaults/storage/C_rom_pnam.c | 40 + util/ceg/defaults/storage/C_rom_scon.c | 42 + util/ceg/util/install_ceg | 30 + util/ceg/util/make_asobj | 41 + util/ceg/util/make_back | 18 + util/ceg/util/make_ce | 34 + util/ceg/util/make_ceg | 54 + util/ceg/util/make_own | 37 + util/ceg/util/proto.make | 29 + util/ceg/util/update_ceg | 40 + util/cgg/bootgram.y | 1125 +++ util/cgg/booth.h | 223 + util/cgg/bootlex.l | 167 + util/cgg/main.c | 1017 +++ util/cgg/proto.make | 45 + util/cmisc/GCIPM.c | 250 + util/cmisc/cclash.1 | 26 + util/cmisc/cclash.c | 304 + util/cmisc/cid.1 | 34 + util/cmisc/cid.c | 207 + util/cmisc/mkdep.1 | 29 + util/cmisc/mkdep.c | 201 + util/cmisc/prid.1 | 13 + util/cmisc/prid.c | 134 + util/cmisc/proto.make | 69 + util/cmisc/tabgen.1 | 110 + util/cmisc/tabgen.c | 339 + util/cpp/LLlex.c | 375 + util/cpp/LLlex.h | 44 + util/cpp/LLmessage.c | 23 + util/cpp/Parameters | 72 + util/cpp/bits.h | 18 + util/cpp/ch7bin.c | 80 + util/cpp/ch7mon.c | 25 + util/cpp/char.tab | 36 + util/cpp/class.h | 44 + util/cpp/cpp.6 | 76 + util/cpp/domacro.c | 718 ++ util/cpp/error.c | 148 + util/cpp/expr.c | 58 + util/cpp/expression.g | 130 + util/cpp/file_info.h | 18 + util/cpp/idf.c | 7 + util/cpp/idf.h | 20 + util/cpp/init.c | 78 + util/cpp/input.c | 64 + util/cpp/input.h | 7 + util/cpp/interface.h | 8 + util/cpp/macro.h | 78 + util/cpp/main.c | 151 + util/cpp/make.hfiles | 35 + util/cpp/make.tokcase | 34 + util/cpp/make.tokfile | 6 + util/cpp/next.c | 12 + util/cpp/options.c | 146 + util/cpp/preprocess.c | 264 + util/cpp/proto.make | 344 + util/cpp/replace.c | 242 + util/cpp/scan.c | 237 + util/cpp/skip.c | 106 + util/cpp/tokenname.c | 55 + util/data/em_flag.c | 136 + util/data/em_mnem.c | 135 + util/data/em_pseu.c | 14 + util/data/em_ptyp.c | 19 + util/data/proto.make | 49 + util/ego/Action | 45 + util/ego/bo/bo.c | 329 + util/ego/bo/proto.make | 77 + util/ego/ca/ca.c | 238 + util/ego/ca/ca.h | 20 + util/ego/ca/ca_put.c | 420 + util/ego/ca/ca_put.h | 14 + util/ego/ca/proto.make | 89 + util/ego/cf/cf.c | 556 ++ util/ego/cf/cf.h | 21 + util/ego/cf/cf_idom.c | 143 + util/ego/cf/cf_idom.h | 20 + util/ego/cf/cf_loop.c | 405 + util/ego/cf/cf_loop.h | 19 + util/ego/cf/cf_succ.c | 257 + util/ego/cf/cf_succ.h | 15 + util/ego/cf/proto.make | 117 + util/ego/cj/cj.c | 362 + util/ego/cj/proto.make | 76 + util/ego/cs/cs.c | 84 + util/ego/cs/cs.h | 128 + util/ego/cs/cs_alloc.c | 49 + util/ego/cs/cs_alloc.h | 29 + util/ego/cs/cs_aux.c | 69 + util/ego/cs/cs_aux.h | 30 + util/ego/cs/cs_avail.c | 208 + util/ego/cs/cs_avail.h | 23 + util/ego/cs/cs_debug.c | 152 + util/ego/cs/cs_debug.h | 38 + util/ego/cs/cs_elim.c | 288 + util/ego/cs/cs_elim.h | 10 + util/ego/cs/cs_entity.c | 147 + util/ego/cs/cs_entity.h | 20 + util/ego/cs/cs_getent.c | 230 + util/ego/cs/cs_getent.h | 13 + util/ego/cs/cs_kill.c | 383 + util/ego/cs/cs_kill.h | 29 + util/ego/cs/cs_partit.c | 376 + util/ego/cs/cs_partit.h | 60 + util/ego/cs/cs_profit.c | 224 + util/ego/cs/cs_profit.h | 15 + util/ego/cs/cs_stack.c | 137 + util/ego/cs/cs_stack.h | 23 + util/ego/cs/cs_vnm.c | 356 + util/ego/cs/cs_vnm.h | 9 + util/ego/cs/proto.make | 251 + util/ego/descr/descr.sed | 23 + util/ego/descr/em22.descr | 85 + util/ego/descr/em24.descr | 85 + util/ego/descr/em44.descr | 85 + util/ego/descr/i386.descr | 100 + util/ego/descr/i86.descr | 100 + util/ego/descr/m68020.descr | 109 + util/ego/descr/m68k2.descr | 106 + util/ego/descr/m68k4.descr | 109 + util/ego/descr/pdp.descr | 99 + util/ego/descr/proto.make | 79 + util/ego/descr/sparc.descr | 107 + util/ego/descr/vax4.descr | 120 + util/ego/em_ego/em_ego.c | 421 + util/ego/em_ego/proto.make | 43 + util/ego/ic/ic.c | 572 ++ util/ego/ic/ic.h | 75 + util/ego/ic/ic_aux.c | 465 + util/ego/ic/ic_aux.h | 44 + util/ego/ic/ic_io.c | 206 + util/ego/ic/ic_io.h | 39 + util/ego/ic/ic_lib.c | 299 + util/ego/ic/ic_lib.h | 19 + util/ego/ic/ic_lookup.c | 434 + util/ego/ic/ic_lookup.h | 76 + util/ego/ic/proto.make | 137 + util/ego/il/il.c | 339 + util/ego/il/il.h | 167 + util/ego/il/il1_anal.c | 185 + util/ego/il/il1_anal.h | 22 + util/ego/il/il1_aux.c | 212 + util/ego/il/il1_aux.h | 43 + util/ego/il/il1_cal.c | 143 + util/ego/il/il1_cal.h | 36 + util/ego/il/il1_formal.c | 146 + util/ego/il/il1_formal.h | 16 + util/ego/il/il2_aux.c | 731 ++ util/ego/il/il2_aux.h | 43 + util/ego/il/il3_aux.c | 68 + util/ego/il/il3_aux.h | 20 + util/ego/il/il3_change.c | 589 ++ util/ego/il/il3_change.h | 46 + util/ego/il/il3_subst.c | 127 + util/ego/il/il3_subst.h | 22 + util/ego/il/il_aux.c | 387 + util/ego/il/il_aux.h | 58 + util/ego/il/proto.make | 223 + util/ego/lv/lv.c | 614 ++ util/ego/lv/lv.h | 47 + util/ego/lv/proto.make | 84 + util/ego/ra/itemtab.src | 21 + util/ego/ra/makeitems.c | 83 + util/ego/ra/proto.make | 234 + util/ego/ra/ra.c | 556 ++ util/ego/ra/ra.h | 140 + util/ego/ra/ra_allocl.c | 389 + util/ego/ra/ra_allocl.h | 24 + util/ego/ra/ra_aux.c | 45 + util/ego/ra/ra_aux.h | 29 + util/ego/ra/ra_interv.c | 233 + util/ego/ra/ra_interv.h | 40 + util/ego/ra/ra_items.c | 351 + util/ego/ra/ra_items.h | 36 + util/ego/ra/ra_lifet.c | 83 + util/ego/ra/ra_lifet.h | 17 + util/ego/ra/ra_pack.c | 414 + util/ego/ra/ra_pack.h | 16 + util/ego/ra/ra_profits.c | 248 + util/ego/ra/ra_profits.h | 16 + util/ego/ra/ra_xform.c | 580 ++ util/ego/ra/ra_xform.h | 29 + util/ego/share/alloc.c | 243 + util/ego/share/alloc.h | 59 + util/ego/share/aux.c | 251 + util/ego/share/aux.h | 71 + util/ego/share/cldefs.src | 69 + util/ego/share/cset.c | 282 + util/ego/share/cset.h | 26 + util/ego/share/debug.c | 149 + util/ego/share/debug.h | 61 + util/ego/share/def.h | 19 + util/ego/share/files.c | 22 + util/ego/share/files.h | 38 + util/ego/share/get.c | 552 ++ util/ego/share/get.h | 66 + util/ego/share/global.c | 26 + util/ego/share/global.h | 52 + util/ego/share/go.c | 163 + util/ego/share/go.h | 39 + util/ego/share/init_glob.c | 62 + util/ego/share/init_glob.h | 15 + util/ego/share/locals.c | 251 + util/ego/share/locals.h | 49 + util/ego/share/lset.c | 214 + util/ego/share/lset.h | 21 + util/ego/share/makecldef.c | 89 + util/ego/share/map.c | 26 + util/ego/share/map.h | 43 + util/ego/share/parser.c | 273 + util/ego/share/parser.h | 18 + util/ego/share/pop_push.awk | 15 + util/ego/share/pop_push.h | 138 + util/ego/share/proto.make | 241 + util/ego/share/put.c | 446 + util/ego/share/put.h | 47 + util/ego/share/show.c | 416 + util/ego/share/stack_chg.c | 108 + util/ego/share/stack_chg.h | 15 + util/ego/share/types.h | 424 + util/ego/sp/proto.make | 75 + util/ego/sp/sp.c | 246 + util/ego/sr/proto.make | 176 + util/ego/sr/sr.c | 239 + util/ego/sr/sr.h | 79 + util/ego/sr/sr_aux.c | 120 + util/ego/sr/sr_aux.h | 25 + util/ego/sr/sr_cand.c | 192 + util/ego/sr/sr_cand.h | 19 + util/ego/sr/sr_expr.c | 204 + util/ego/sr/sr_expr.h | 18 + util/ego/sr/sr_iv.c | 188 + util/ego/sr/sr_iv.h | 12 + util/ego/sr/sr_reduce.c | 768 ++ util/ego/sr/sr_reduce.h | 10 + util/ego/sr/sr_xform.c | 194 + util/ego/sr/sr_xform.h | 24 + util/ego/ud/proto.make | 158 + util/ego/ud/ud.c | 563 ++ util/ego/ud/ud.h | 26 + util/ego/ud/ud_aux.c | 60 + util/ego/ud/ud_aux.h | 22 + util/ego/ud/ud_const.c | 249 + util/ego/ud/ud_const.h | 29 + util/ego/ud/ud_copy.c | 395 + util/ego/ud/ud_copy.h | 46 + util/ego/ud/ud_defs.c | 380 + util/ego/ud/ud_defs.h | 56 + util/flex/COPYING | 38 + util/flex/Changes | 345 + util/flex/Headers | 25 + util/flex/Makefile | 190 + util/flex/README | 78 + util/flex/ccl.c | 175 + util/flex/dfa.c | 1075 +++ util/flex/ecs.c | 349 + util/flex/flex.1 | 781 ++ util/flex/flex.skel | 858 ++ util/flex/flexdef.h | 877 ++ util/flex/flexdoc.1 | 2446 +++++ util/flex/gen.c | 1336 +++ util/flex/initscan.c | 2294 +++++ util/flex/libmain.c | 13 + util/flex/main.c | 769 ++ util/flex/misc.c | 836 ++ util/flex/nfa.c | 717 ++ util/flex/parse.y | 702 ++ util/flex/proto.make | 179 + util/flex/scan.l | 533 ++ util/flex/sym.c | 315 + util/flex/tblcmp.c | 932 ++ util/flex/yylex.c | 216 + util/grind/Makefile | 2 + util/grind/PROBLEMS | 6 + util/grind/READ_ME | 3 + util/grind/avl.cc | 245 + util/grind/avl.h | 43 + util/grind/c.c | 604 ++ util/grind/char.ct | 71 + util/grind/class.h | 34 + util/grind/commands.g | 738 ++ util/grind/db_symtab.g | 928 ++ util/grind/do_comm.c | 953 ++ util/grind/expr.c | 1446 +++ util/grind/expr.h | 95 + util/grind/file.hh | 35 + util/grind/grind.1 | 423 + util/grind/idf.c | 8 + util/grind/idf.h | 14 + util/grind/itemlist.cc | 275 + util/grind/langdep.cc | 54 + util/grind/langdep.h | 43 + util/grind/list.c | 134 + util/grind/main.c | 241 + util/grind/make.allocd | 26 + util/grind/make.next | 6 + util/grind/make.ops | 18 + util/grind/make.tokcase | 36 + util/grind/make.tokfile | 6 + util/grind/message.h | 60 + util/grind/misc.h | 7 + util/grind/modula-2.c | 557 ++ util/grind/operator.h | 12 + util/grind/operators.ot | 43 + util/grind/ops.c | 49 + util/grind/ops.h | 73 + util/grind/pascal.c | 480 + util/grind/position.c | 134 + util/grind/position.h | 39 + util/grind/print.c | 393 + util/grind/proto.main | 55 + util/grind/proto.make | 185 + util/grind/rd.c | 640 ++ util/grind/rd.h | 5 + util/grind/run.c | 754 ++ util/grind/scope.cc | 147 + util/grind/scope.h | 60 + util/grind/symbol.c | 358 + util/grind/symbol.hh | 63 + util/grind/token.h | 16 + util/grind/tokenname.c | 97 + util/grind/tokenname.h | 8 + util/grind/tree.c | 517 ++ util/grind/tree.hh | 37 + util/grind/type.c | 433 + util/grind/type.hh | 124 + util/int/M.trap_msg | 21 + util/int/M.warn_h | 23 + util/int/M.warn_msg | 24 + util/int/READ_ME | 42 + util/int/alloc.c | 48 + util/int/alloc.h | 14 + util/int/core.c | 75 + util/int/data.c | 456 + util/int/debug.h | 8 + util/int/disassemble.c | 1776 ++++ util/int/do_array.c | 86 + util/int/do_branch.c | 169 + util/int/do_comp.c | 171 + util/int/do_conv.c | 383 + util/int/do_fpar.c | 459 + util/int/do_incdec.c | 155 + util/int/do_intar.c | 264 + util/int/do_load.c | 213 + util/int/do_logic.c | 159 + util/int/do_misc.c | 504 ++ util/int/do_proc.c | 152 + util/int/do_ptrar.c | 85 + util/int/do_sets.c | 99 + util/int/do_store.c | 115 + util/int/do_unsar.c | 176 + util/int/dump.c | 643 ++ util/int/e.out.h | 9 + util/int/fra.c | 55 + util/int/fra.h | 18 + util/int/global.c | 73 + util/int/global.h | 156 + util/int/init.c | 202 + util/int/int.1 | 200 + util/int/io.c | 254 + util/int/linfil.h | 20 + util/int/log.c | 333 + util/int/log.h | 24 + util/int/logging.h | 4 + util/int/m_ioctl.c | 301 + util/int/m_sigtrp.c | 131 + util/int/main.c | 194 + util/int/mem.h | 75 + util/int/memdirect.h | 60 + util/int/moncalls.c | 1128 +++ util/int/monstruct.c | 193 + util/int/monstruct.h | 69 + util/int/nofloat.h | 4 + util/int/opcode.h | 13 + util/int/proctab.c | 74 + util/int/proctab.h | 13 + util/int/proto.make | 593 ++ util/int/read.c | 321 + util/int/read.h | 18 + util/int/rsb.c | 108 + util/int/rsb.h | 31 + util/int/segcheck.h | 11 + util/int/segment.c | 84 + util/int/shadow.h | 102 + util/int/stack.c | 800 ++ util/int/switch.c | 29 + util/int/switch/READ_ME | 98 + util/int/switch/mkiswitch.c | 275 + util/int/switch/mkswitch.c | 288 + util/int/switch/proto.make | 56 + util/int/sysidf.h | 21 + util/int/tally.c | 137 + util/int/test/READ_ME | 16 + util/int/test/args.c | 32 + util/int/test/awa.p | 67 + util/int/test/fork2.c | 40 + util/int/test/ioc0.c | 36 + util/int/test/proto.make | 29 + util/int/test/prtime.c | 39 + util/int/test/set.c | 29 + util/int/test/sig.c | 27 + util/int/text.c | 47 + util/int/text.h | 114 + util/int/trap.c | 126 + util/int/trap.h | 14 + util/int/v7ioctl.h | 5 + util/int/warn.c | 158 + util/led/WRONG | 19 + util/led/ack.out.5 | 317 + util/led/archive.c | 180 + util/led/assert.h | 22 + util/led/const.h | 26 + util/led/debug.h | 19 + util/led/defs.h | 14 + util/led/error.c | 77 + util/led/extract.c | 250 + util/led/finish.c | 270 + util/led/led.6 | 137 + util/led/mach.c | 51 + util/led/main.c | 627 ++ util/led/memory.c | 620 ++ util/led/memory.h | 42 + util/led/orig.h | 9 + util/led/output.c | 90 + util/led/proto.make | 175 + util/led/read.c | 14 + util/led/relocate.c | 221 + util/led/save.c | 108 + util/led/scan.c | 619 ++ util/led/scan.h | 17 + util/led/sym.c | 145 + util/led/write.c | 115 + util/misc/convert.c | 91 + util/misc/em_decode.6 | 40 + util/misc/esize.1 | 20 + util/misc/esize.c | 156 + util/misc/proto.make | 84 + util/ncgg/assert.h | 11 + util/ncgg/cgg.y | 1099 +++ util/ncgg/coerc.c | 262 + util/ncgg/cost.h | 12 + util/ncgg/cvtkeywords | 22 + util/ncgg/emlookup.c | 78 + util/ncgg/error.c | 58 + util/ncgg/expr.c | 318 + util/ncgg/expr.h | 20 + util/ncgg/extern.h | 42 + util/ncgg/hall.c | 159 + util/ncgg/instruct.c | 200 + util/ncgg/instruct.h | 41 + util/ncgg/iocc.c | 208 + util/ncgg/iocc.h | 10 + util/ncgg/keywords | 52 + util/ncgg/lookup.c | 69 + util/ncgg/lookup.h | 33 + util/ncgg/main.c | 64 + util/ncgg/ncgg.6 | 45 + util/ncgg/output.c | 919 ++ util/ncgg/param.h | 53 + util/ncgg/property.h | 12 + util/ncgg/proto.make | 255 + util/ncgg/pseudo.h | 14 + util/ncgg/reg.h | 17 + util/ncgg/regvar.h | 14 + util/ncgg/scan.l | 106 + util/ncgg/set.c | 154 + util/ncgg/set.h | 9 + util/ncgg/strlookup.c | 24 + util/ncgg/subr.c | 389 + util/ncgg/token.h | 23 + util/ncgg/var.c | 52 + util/ncgg/varinfo.h | 17 + util/opt/alloc.c | 439 + util/opt/alloc.h | 49 + util/opt/assert.h | 11 + util/opt/backward.c | 190 + util/opt/cleanup.c | 59 + util/opt/em_opt.6 | 42 + util/opt/ext.h | 22 + util/opt/flow.c | 144 + util/opt/getline.c | 548 ++ util/opt/line.h | 92 + util/opt/lookup.c | 86 + util/opt/lookup.h | 30 + util/opt/main.c | 72 + util/opt/mktab.y | 422 + util/opt/optim.h | 16 + util/opt/param.h | 19 + util/opt/pattern.h | 130 + util/opt/patterns | 776 ++ util/opt/peephole.c | 823 ++ util/opt/pop_push.awk | 29 + util/opt/pop_push.h | 3 + util/opt/process.c | 204 + util/opt/proinf.h | 44 + util/opt/proto.make | 300 + util/opt/putline.c | 371 + util/opt/reg.c | 112 + util/opt/scan.l | 67 + util/opt/special.c | 24 + util/opt/tes.c | 190 + util/opt/tes.h | 8 + util/opt/testopt | 9 + util/opt/types.h | 25 + util/opt/util.c | 54 + util/opt/var.c | 32 + util/shf/march.sh | 54 + util/shf/proto.make | 23 + util/topgen/LLlex.c | 134 + util/topgen/hash.c | 88 + util/topgen/main.c | 60 + util/topgen/misc.h | 15 + util/topgen/pattern.c | 137 + util/topgen/proto.make | 60 + util/topgen/symtab.c | 53 + util/topgen/symtab.h | 23 + util/topgen/token.h | 15 + util/topgen/topgen.g | 311 + util/topgen/tunable.h | 16 + 3325 files changed, 501682 insertions(+) create mode 100644 Action create mode 100644 Copyright create mode 100644 NEW create mode 100644 README create mode 100755 TakeAction create mode 100755 bin/cc-and-mkdep.ack create mode 100755 bin/cc-and-mkdep.all create mode 100755 bin/cc-and-mkdep.sun create mode 100755 bin/do_deps create mode 100755 bin/do_resolve create mode 100755 bin/lint-lib.ack create mode 100755 bin/lint-lib.unix create mode 100755 bin/mk_manpage create mode 100755 bin/rm_deps create mode 100644 doc/6500.doc create mode 100644 doc/LLgen/LLgen.n create mode 100644 doc/LLgen/LLgen.refs create mode 100644 doc/LLgen/proto.make create mode 100644 doc/Makefile create mode 100644 doc/READ_ME create mode 100644 doc/ack.doc create mode 100755 doc/ansi_C.doc create mode 100644 doc/basic.doc create mode 100644 doc/ceg/ceg.ref create mode 100644 doc/ceg/ceg.tr create mode 100644 doc/ceg/proto.make create mode 100644 doc/cg.doc create mode 100644 doc/crefman.doc create mode 100644 doc/ego/bo/bo1 create mode 100644 doc/ego/ca/ca1 create mode 100644 doc/ego/cf/cf1 create mode 100644 doc/ego/cf/cf2 create mode 100644 doc/ego/cf/cf3 create mode 100644 doc/ego/cf/cf4 create mode 100644 doc/ego/cf/cf5 create mode 100644 doc/ego/cf/cf6 create mode 100644 doc/ego/cj/cj1 create mode 100644 doc/ego/cs/cs1 create mode 100644 doc/ego/cs/cs2 create mode 100644 doc/ego/cs/cs3 create mode 100644 doc/ego/cs/cs4 create mode 100644 doc/ego/cs/cs5 create mode 100644 doc/ego/ic/ic1 create mode 100644 doc/ego/ic/ic2 create mode 100644 doc/ego/ic/ic3 create mode 100644 doc/ego/ic/ic4 create mode 100644 doc/ego/ic/ic5 create mode 100644 doc/ego/il/il1 create mode 100644 doc/ego/il/il2 create mode 100644 doc/ego/il/il3 create mode 100644 doc/ego/il/il4 create mode 100644 doc/ego/il/il5 create mode 100644 doc/ego/il/il6 create mode 100644 doc/ego/intro/head create mode 100644 doc/ego/intro/intro1 create mode 100644 doc/ego/intro/tail create mode 100644 doc/ego/lv/lv1 create mode 100644 doc/ego/ov/ov1 create mode 100644 doc/ego/proto.make create mode 100644 doc/ego/ra/ra1 create mode 100644 doc/ego/ra/ra2 create mode 100644 doc/ego/ra/ra3 create mode 100644 doc/ego/ra/ra4 create mode 100644 doc/ego/refs.gen create mode 100644 doc/ego/refs.opt create mode 100644 doc/ego/refs.stat create mode 100644 doc/ego/sp/sp1 create mode 100644 doc/ego/sr/sr1 create mode 100644 doc/ego/sr/sr2 create mode 100644 doc/ego/sr/sr3 create mode 100644 doc/ego/sr/sr4 create mode 100644 doc/ego/ud/ud1 create mode 100644 doc/ego/ud/ud2 create mode 100644 doc/ego/ud/ud3 create mode 100644 doc/ego/ud/ud4 create mode 100644 doc/ego/ud/ud5 create mode 100644 doc/em/READ_ME create mode 100644 doc/em/app.codes.nr create mode 100644 doc/em/app.exam.nr create mode 100644 doc/em/assem.nr create mode 100644 doc/em/cont.nr create mode 100644 doc/em/descr.nr create mode 100644 doc/em/dispat1.sed create mode 100644 doc/em/dispat2.sed create mode 100644 doc/em/dispat3.sed create mode 100644 doc/em/dspace.nr create mode 100644 doc/em/em.i create mode 100644 doc/em/env.nr create mode 100644 doc/em/even.c create mode 100644 doc/em/exam.e create mode 100644 doc/em/exam.p create mode 100644 doc/em/int/READ_ME create mode 100644 doc/em/int/em.p create mode 100644 doc/em/int/emdmp.c create mode 100644 doc/em/int/mktables.c create mode 100644 doc/em/int/proto.make create mode 100644 doc/em/intro.nr create mode 100644 doc/em/ip.awk create mode 100644 doc/em/ispace.nr create mode 100644 doc/em/mach.nr create mode 100644 doc/em/macr.nr create mode 100644 doc/em/mapping.nr create mode 100644 doc/em/mem.nr create mode 100644 doc/em/mkdispatch.c create mode 100644 doc/em/proto.make create mode 100644 doc/em/title.nr create mode 100644 doc/em/traps.nr create mode 100644 doc/em/types.nr create mode 100644 doc/i80.doc create mode 100644 doc/install.doc create mode 100644 doc/install.pr create mode 100755 doc/int/Makefile create mode 100644 doc/int/README create mode 100644 doc/int/appA create mode 100644 doc/int/appB create mode 100644 doc/int/bib create mode 100644 doc/int/cover create mode 100644 doc/int/draw.mac create mode 100755 doc/int/proto.make create mode 100644 doc/int/txt1 create mode 100644 doc/int/txt2 create mode 100644 doc/int/txt3 create mode 100644 doc/lint/abstract create mode 100644 doc/lint/appendix_A create mode 100644 doc/lint/appendix_B create mode 100644 doc/lint/chap1 create mode 100644 doc/lint/chap2 create mode 100644 doc/lint/chap3 create mode 100644 doc/lint/chap4 create mode 100644 doc/lint/chap5 create mode 100644 doc/lint/chap6 create mode 100644 doc/lint/chap7 create mode 100644 doc/lint/chap8 create mode 100644 doc/lint/chap9 create mode 100644 doc/lint/contents create mode 100644 doc/lint/frontpage create mode 100644 doc/lint/proto.make create mode 100644 doc/m2ref.doc create mode 100644 doc/m68020.doc create mode 100644 doc/ncg.doc create mode 100644 doc/nopt.doc create mode 100755 doc/occam/ctot create mode 100644 doc/occam/p0 create mode 100644 doc/occam/p1 create mode 100644 doc/occam/p2 create mode 100644 doc/occam/p3 create mode 100644 doc/occam/p4 create mode 100644 doc/occam/p5 create mode 100644 doc/occam/p6 create mode 100644 doc/occam/p7 create mode 100644 doc/occam/p8 create mode 100644 doc/occam/p9 create mode 100644 doc/occam/proto.make create mode 100644 doc/pascal/ab+intro.doc create mode 100644 doc/pascal/compar.doc create mode 100644 doc/pascal/conf.doc create mode 100644 doc/pascal/contents.doc create mode 100644 doc/pascal/deviations.doc create mode 100644 doc/pascal/example.doc create mode 100644 doc/pascal/extensions.doc create mode 100644 doc/pascal/hints.doc create mode 100644 doc/pascal/his.doc create mode 100644 doc/pascal/improv.doc create mode 100644 doc/pascal/internal.doc create mode 100644 doc/pascal/options.doc create mode 100644 doc/pascal/proto.make create mode 100644 doc/pascal/reference.doc create mode 100644 doc/pascal/rtl.doc create mode 100644 doc/pascal/syntax.doc create mode 100644 doc/pascal/test.doc create mode 100644 doc/pascal/titlepg.doc create mode 100644 doc/pascal/transpem.doc create mode 100644 doc/pascal/vrk.doc create mode 100644 doc/pcref.doc create mode 100644 doc/peep.doc create mode 100644 doc/proto.make create mode 100644 doc/regadd.doc create mode 100644 doc/sparc/1 create mode 100644 doc/sparc/2 create mode 100644 doc/sparc/3 create mode 100644 doc/sparc/4 create mode 100644 doc/sparc/5 create mode 100644 doc/sparc/A create mode 100644 doc/sparc/B create mode 100644 doc/sparc/init create mode 100644 doc/sparc/intro create mode 100644 doc/sparc/note_on_reg_wins create mode 100644 doc/sparc/pics/EM_stack.orig create mode 100644 doc/sparc/pics/EM_stack.ours create mode 100644 doc/sparc/pics/compile_bars create mode 100644 doc/sparc/pics/mem_config create mode 100644 doc/sparc/pics/perf create mode 100644 doc/sparc/pics/perf.comp create mode 100644 doc/sparc/pics/perf.d create mode 100644 doc/sparc/pics/perf.dhry create mode 100644 doc/sparc/pics/reg_layout create mode 100644 doc/sparc/pics/run-time_bars create mode 100644 doc/sparc/pics/run-time_bars.bup create mode 100644 doc/sparc/pics/signal_stack create mode 100644 doc/sparc/proto.make create mode 100644 doc/sparc/refs create mode 100644 doc/sparc/timing create mode 100644 doc/sparc/title create mode 100644 doc/toolkit.doc create mode 100644 doc/top/proto.make create mode 100644 doc/top/refs.top create mode 100644 doc/top/top.n create mode 100644 doc/v7bugs.doc create mode 100644 doc/val.doc create mode 100644 doc/z80.doc create mode 100644 emtest/Makefile create mode 100644 emtest/READ_ME create mode 100755 emtest/ok create mode 100644 emtest/select.c create mode 100644 emtest/test.h create mode 100644 emtest/tests create mode 100644 etc/em_table create mode 100644 etc/ip_spec.t create mode 100755 etc/new_table create mode 100644 etc/new_table_done create mode 100644 etc/pop_push create mode 100644 etc/proto.make create mode 100644 etc/traps create mode 100644 fast/Action create mode 100644 fast/driver/afcc.1 create mode 100644 fast/driver/afm2.1 create mode 100644 fast/driver/afpc.1 create mode 100644 fast/driver/driver.c create mode 100644 fast/driver/proto.make create mode 100644 fast/f_c.ansi/Parameters create mode 100644 fast/f_c.ansi/proto.main create mode 100644 fast/f_c.ansi/proto.make create mode 100644 fast/f_c/Parameters create mode 100644 fast/f_c/proto.main create mode 100644 fast/f_c/proto.make create mode 100644 fast/f_m2/Parameters create mode 100644 fast/f_m2/proto.main create mode 100644 fast/f_m2/proto.make create mode 100644 fast/f_pc/Parameters create mode 100644 fast/f_pc/proto.main create mode 100644 fast/f_pc/proto.make create mode 100644 fcc/Action create mode 100644 fcc/cemcom/Parameters.sun3 create mode 100644 fcc/cemcom/Parameters.vax4 create mode 100644 fcc/cemcom/proto.main create mode 100644 fcc/cemcom/proto.make create mode 100644 fcc/driver/fcc.1 create mode 100644 fcc/driver/fcc.c create mode 100644 fcc/driver/proto.make create mode 100755 first/cp_dir create mode 100755 first/create_dir create mode 100644 first/em_path.h.src create mode 100755 first/first create mode 100755 first/get_answer create mode 100755 first/get_makepars create mode 100755 first/get_sys create mode 100755 first/get_sysvax create mode 100644 first/install_tail create mode 100644 first/limit_enquire create mode 100755 first/limit_impl create mode 100644 first/lint_params create mode 100644 first/local.h.src create mode 100755 first/mk_config create mode 100755 first/mk_makefile create mode 100755 first/mk_target create mode 100644 first/target_comp create mode 100644 first/util_comp create mode 100644 h/Makefile create mode 100644 h/arch.h create mode 100644 h/as_spec.h create mode 100644 h/bc_io.h create mode 100644 h/bc_string.h create mode 100644 h/cg_pattern.h create mode 100644 h/cgg_cg.h create mode 100644 h/em_abs.h create mode 100644 h/em_ego.h create mode 100644 h/em_flag.h create mode 100644 h/em_mes.h create mode 100644 h/em_mnem.h create mode 100644 h/em_pseu.h create mode 100644 h/em_ptyp.h create mode 100644 h/em_reg.h create mode 100644 h/em_spec.h create mode 100644 h/ip_spec.h create mode 100644 h/m2_traps.h create mode 100644 h/ocm_chan.h create mode 100644 h/ocm_parco.h create mode 100644 h/ocm_proc.h create mode 100644 h/out.h create mode 100644 h/pc_err.h create mode 100644 h/pc_file.h create mode 100644 h/pc_math.h create mode 100644 h/ranlib.h create mode 100644 h/stb.h create mode 100644 include/_tail_cc/assert.h create mode 100644 include/_tail_cc/ctype.h create mode 100644 include/_tail_cc/errno.h create mode 100644 include/_tail_cc/fcntl.h create mode 100644 include/_tail_cc/grp.h create mode 100644 include/_tail_cc/math.h create mode 100644 include/_tail_cc/pwd.h create mode 100644 include/_tail_cc/setjmp.h create mode 100644 include/_tail_cc/sgtty.h create mode 100644 include/_tail_cc/signal.h create mode 100644 include/_tail_cc/stdio.h create mode 100644 include/_tail_cc/sys/dir.h create mode 100644 include/_tail_cc/sys/errno.h create mode 100644 include/_tail_cc/sys/stat.h create mode 100644 include/_tail_cc/sys/stdtypes.h create mode 100644 include/_tail_cc/sys/types.h create mode 100644 include/_tail_cc/time.h create mode 100644 include/_tail_cc/varargs.h create mode 100644 include/_tail_mon/errno.h create mode 100644 include/_tail_mon/sgtty.h create mode 100644 include/_tail_mon/signal.h create mode 100644 include/_tail_mon/sys/timeb.h create mode 100644 include/_tail_mon/sys/types.h create mode 100644 include/occam/dec.ocm create mode 100644 include/occam/printd.ocm create mode 100644 include/occam/prints.ocm create mode 100644 lang/basic/lib/LIST create mode 100644 lang/basic/lib/tail_bc.a create mode 100644 lang/basic/src/basic.g create mode 100644 lang/basic/src/basic.lex create mode 100644 lang/basic/src/bem.c create mode 100644 lang/basic/src/bem.h create mode 100644 lang/basic/src/compile.c create mode 100644 lang/basic/src/eval.c create mode 100644 lang/basic/src/func.c create mode 100644 lang/basic/src/gencode.c create mode 100644 lang/basic/src/graph.c create mode 100644 lang/basic/src/graph.h create mode 100644 lang/basic/src/initialize.c create mode 100644 lang/basic/src/llmess.c create mode 100755 lang/basic/src/maketokentab create mode 100644 lang/basic/src/parsepar.c create mode 100644 lang/basic/src/proto.main create mode 100644 lang/basic/src/proto.make create mode 100644 lang/basic/src/symbols.c create mode 100644 lang/basic/src/symbols.h create mode 100644 lang/basic/src/util.c create mode 100644 lang/basic/src/yylexp.c create mode 100644 lang/basic/test/Makefile create mode 100644 lang/basic/test/Out.std create mode 100644 lang/basic/test/bull.b create mode 100644 lang/basic/test/bull.b.g create mode 100644 lang/basic/test/buzzword.b create mode 100644 lang/basic/test/buzzword.b.g create mode 100644 lang/basic/test/checker.b create mode 100644 lang/basic/test/checker.b.g create mode 100644 lang/basic/test/creator.b create mode 100644 lang/basic/test/grafiek.b create mode 100644 lang/basic/test/grafiek.b.g create mode 100644 lang/basic/test/gunner.b create mode 100644 lang/basic/test/gunner.b.g create mode 100644 lang/basic/test/learn.b create mode 100644 lang/basic/test/learn.b.g create mode 100644 lang/basic/test/opg1.b create mode 100644 lang/basic/test/opg1.b.g create mode 100644 lang/basic/test/opg2.b create mode 100644 lang/basic/test/opg2.b.g create mode 100644 lang/basic/test/opg3.b create mode 100644 lang/basic/test/opg3.b.g create mode 100644 lang/basic/test/opg4.b create mode 100644 lang/basic/test/opg4.b.g create mode 100644 lang/basic/test/opg5.b create mode 100644 lang/basic/test/opg5.b.g create mode 100644 lang/basic/test/opg6.b create mode 100644 lang/basic/test/opg6.b.g create mode 100755 lang/basic/test/runcmp create mode 100644 lang/basic/test/som4.b create mode 100644 lang/basic/test/som4.b.g create mode 100644 lang/basic/test/test01.b create mode 100644 lang/basic/test/test01.b.g create mode 100644 lang/basic/test/test02.b create mode 100644 lang/basic/test/test02.b.g create mode 100644 lang/basic/test/test03.b create mode 100644 lang/basic/test/test03.b.g create mode 100644 lang/basic/test/test04.b create mode 100644 lang/basic/test/test04.b.g create mode 100644 lang/basic/test/test05.b create mode 100644 lang/basic/test/test05.b.g create mode 100644 lang/basic/test/test06.b create mode 100644 lang/basic/test/test06.b.g create mode 100644 lang/basic/test/test07.b create mode 100644 lang/basic/test/test07.b.g create mode 100644 lang/basic/test/test08.b create mode 100644 lang/basic/test/test08.b.g create mode 100644 lang/basic/test/test09.b create mode 100644 lang/basic/test/test09.b.g create mode 100644 lang/basic/test/test10.b create mode 100644 lang/basic/test/test10.b.g create mode 100644 lang/basic/test/test11.b create mode 100644 lang/basic/test/test11.b.g create mode 100644 lang/basic/test/test12.b create mode 100644 lang/basic/test/test12.b.g create mode 100644 lang/basic/test/test13.b create mode 100644 lang/basic/test/test13.b.g create mode 100644 lang/basic/test/test14.b create mode 100644 lang/basic/test/test14.b.g create mode 100644 lang/basic/test/test15.b create mode 100644 lang/basic/test/test15.b.g create mode 100644 lang/basic/test/test16.b create mode 100644 lang/basic/test/test16.b.g create mode 100644 lang/basic/test/test17.b create mode 100644 lang/basic/test/test17.b.g create mode 100644 lang/basic/test/test18.b create mode 100644 lang/basic/test/test18.b.g create mode 100644 lang/basic/test/test19.b create mode 100644 lang/basic/test/test19.b.g create mode 100644 lang/basic/test/test20.b create mode 100644 lang/basic/test/test20.b.g create mode 100644 lang/basic/test/test21.b create mode 100644 lang/basic/test/test21.b.g create mode 100644 lang/basic/test/test22.b create mode 100644 lang/basic/test/test22.b.g create mode 100644 lang/basic/test/test23.b create mode 100644 lang/basic/test/test23.b.g create mode 100644 lang/basic/test/test24.b create mode 100644 lang/basic/test/test24.b.g create mode 100644 lang/basic/test/test25.b create mode 100644 lang/basic/test/test25.b.g create mode 100644 lang/basic/test/test26.b create mode 100644 lang/basic/test/test26.b.g create mode 100644 lang/basic/test/test27.b create mode 100644 lang/basic/test/test27.b.g create mode 100644 lang/basic/test/test28.b create mode 100644 lang/basic/test/test28.b.g create mode 100644 lang/basic/test/test29.b create mode 100644 lang/basic/test/test29.b.g create mode 100644 lang/basic/test/test30.b create mode 100644 lang/basic/test/test30.b.g create mode 100644 lang/basic/test/test31.b create mode 100644 lang/basic/test/test31.b.g create mode 100644 lang/basic/test/test32.b create mode 100644 lang/basic/test/test32.b.g create mode 100644 lang/basic/test/test33.b create mode 100644 lang/basic/test/test33.b.g create mode 100644 lang/basic/test/test34.b create mode 100644 lang/basic/test/test35.b create mode 100644 lang/basic/test/test35.b.g create mode 100644 lang/basic/test/tst/data create mode 100644 lang/basic/test/tst/data1 create mode 100644 lang/cem/cemcom.ansi/BigPars create mode 100644 lang/cem/cemcom.ansi/LLlex.c create mode 100644 lang/cem/cemcom.ansi/LLlex.h create mode 100644 lang/cem/cemcom.ansi/LLmessage.c create mode 100644 lang/cem/cemcom.ansi/SmallPars create mode 100644 lang/cem/cemcom.ansi/align.h create mode 100644 lang/cem/cemcom.ansi/arith.c create mode 100644 lang/cem/cemcom.ansi/arith.h create mode 100644 lang/cem/cemcom.ansi/assert.h create mode 100644 lang/cem/cemcom.ansi/atw.h create mode 100644 lang/cem/cemcom.ansi/blocks.c create mode 100644 lang/cem/cemcom.ansi/cemcom.ansi.1 create mode 100644 lang/cem/cemcom.ansi/ch3.c create mode 100644 lang/cem/cemcom.ansi/ch3bin.c create mode 100644 lang/cem/cemcom.ansi/ch3mon.c create mode 100644 lang/cem/cemcom.ansi/char.tab create mode 100644 lang/cem/cemcom.ansi/class.h create mode 100644 lang/cem/cemcom.ansi/code.c create mode 100644 lang/cem/cemcom.ansi/code.str create mode 100644 lang/cem/cemcom.ansi/conversion.c create mode 100644 lang/cem/cemcom.ansi/cstoper.c create mode 100644 lang/cem/cemcom.ansi/dataflow.c create mode 100644 lang/cem/cemcom.ansi/declar.g create mode 100644 lang/cem/cemcom.ansi/declar.str create mode 100644 lang/cem/cemcom.ansi/declarator.c create mode 100644 lang/cem/cemcom.ansi/decspecs.c create mode 100644 lang/cem/cemcom.ansi/decspecs.h create mode 100644 lang/cem/cemcom.ansi/def.str create mode 100644 lang/cem/cemcom.ansi/domacro.c create mode 100644 lang/cem/cemcom.ansi/dumpidf.c create mode 100644 lang/cem/cemcom.ansi/error.c create mode 100644 lang/cem/cemcom.ansi/estack.str create mode 100644 lang/cem/cemcom.ansi/eval.c create mode 100644 lang/cem/cemcom.ansi/expr.c create mode 100644 lang/cem/cemcom.ansi/expr.str create mode 100644 lang/cem/cemcom.ansi/expression.g create mode 100644 lang/cem/cemcom.ansi/field.c create mode 100644 lang/cem/cemcom.ansi/field.str create mode 100644 lang/cem/cemcom.ansi/file_info.h create mode 100644 lang/cem/cemcom.ansi/fltcstoper.c create mode 100644 lang/cem/cemcom.ansi/idf.c create mode 100644 lang/cem/cemcom.ansi/idf.str create mode 100644 lang/cem/cemcom.ansi/init.c create mode 100644 lang/cem/cemcom.ansi/input.c create mode 100644 lang/cem/cemcom.ansi/input.h create mode 100644 lang/cem/cemcom.ansi/interface.h create mode 100644 lang/cem/cemcom.ansi/ival.g create mode 100644 lang/cem/cemcom.ansi/l_brace.str create mode 100644 lang/cem/cemcom.ansi/l_class.h create mode 100644 lang/cem/cemcom.ansi/l_comment.c create mode 100644 lang/cem/cemcom.ansi/l_comment.h create mode 100644 lang/cem/cemcom.ansi/l_em.h create mode 100644 lang/cem/cemcom.ansi/l_ev_ord.c create mode 100644 lang/cem/cemcom.ansi/l_lint.c create mode 100644 lang/cem/cemcom.ansi/l_lint.h create mode 100644 lang/cem/cemcom.ansi/l_misc.c create mode 100644 lang/cem/cemcom.ansi/l_outdef.c create mode 100644 lang/cem/cemcom.ansi/l_outdef.str create mode 100644 lang/cem/cemcom.ansi/l_state.str create mode 100644 lang/cem/cemcom.ansi/l_states.c create mode 100644 lang/cem/cemcom.ansi/label.c create mode 100644 lang/cem/cemcom.ansi/label.h create mode 100644 lang/cem/cemcom.ansi/level.h create mode 100644 lang/cem/cemcom.ansi/macro.str create mode 100644 lang/cem/cemcom.ansi/main.c create mode 100755 lang/cem/cemcom.ansi/make.allocd create mode 100755 lang/cem/cemcom.ansi/make.hfiles create mode 100755 lang/cem/cemcom.ansi/make.next create mode 100755 lang/cem/cemcom.ansi/make.tokcase create mode 100755 lang/cem/cemcom.ansi/make.tokfile create mode 100644 lang/cem/cemcom.ansi/mes.h create mode 100644 lang/cem/cemcom.ansi/options create mode 100644 lang/cem/cemcom.ansi/options.c create mode 100644 lang/cem/cemcom.ansi/pragma.c create mode 100644 lang/cem/cemcom.ansi/program.g create mode 100644 lang/cem/cemcom.ansi/proto.c create mode 100644 lang/cem/cemcom.ansi/proto.main create mode 100644 lang/cem/cemcom.ansi/proto.make create mode 100644 lang/cem/cemcom.ansi/proto.str create mode 100644 lang/cem/cemcom.ansi/replace.c create mode 100644 lang/cem/cemcom.ansi/replace.str create mode 100644 lang/cem/cemcom.ansi/sizes.h create mode 100644 lang/cem/cemcom.ansi/skip.c create mode 100644 lang/cem/cemcom.ansi/specials.h create mode 100644 lang/cem/cemcom.ansi/stab.c create mode 100644 lang/cem/cemcom.ansi/stack.c create mode 100644 lang/cem/cemcom.ansi/stack.str create mode 100644 lang/cem/cemcom.ansi/statement.g create mode 100644 lang/cem/cemcom.ansi/stb.c create mode 100644 lang/cem/cemcom.ansi/stmt.str create mode 100644 lang/cem/cemcom.ansi/struct.c create mode 100644 lang/cem/cemcom.ansi/struct.str create mode 100644 lang/cem/cemcom.ansi/switch.c create mode 100644 lang/cem/cemcom.ansi/switch.str create mode 100644 lang/cem/cemcom.ansi/tokenname.c create mode 100644 lang/cem/cemcom.ansi/tokenname.h create mode 100644 lang/cem/cemcom.ansi/type.c create mode 100644 lang/cem/cemcom.ansi/type.str create mode 100644 lang/cem/cemcom.ansi/util.c create mode 100644 lang/cem/cemcom.ansi/util.str create mode 100644 lang/cem/cemcom/BigPars create mode 100644 lang/cem/cemcom/LLlex.c create mode 100644 lang/cem/cemcom/LLlex.h create mode 100644 lang/cem/cemcom/LLmessage.c create mode 100644 lang/cem/cemcom/SmallPars create mode 100644 lang/cem/cemcom/align.h create mode 100644 lang/cem/cemcom/arith.c create mode 100644 lang/cem/cemcom/arith.h create mode 100644 lang/cem/cemcom/asm.c create mode 100644 lang/cem/cemcom/assert.h create mode 100644 lang/cem/cemcom/atw.h create mode 100644 lang/cem/cemcom/blocks.c create mode 100644 lang/cem/cemcom/cemcom.1 create mode 100644 lang/cem/cemcom/ch7.c create mode 100644 lang/cem/cemcom/ch7bin.c create mode 100644 lang/cem/cemcom/ch7mon.c create mode 100644 lang/cem/cemcom/char.tab create mode 100644 lang/cem/cemcom/class.h create mode 100644 lang/cem/cemcom/code.c create mode 100644 lang/cem/cemcom/code.str create mode 100644 lang/cem/cemcom/conversion.c create mode 100644 lang/cem/cemcom/cstoper.c create mode 100644 lang/cem/cemcom/dataflow.c create mode 100644 lang/cem/cemcom/declar.g create mode 100644 lang/cem/cemcom/declar.str create mode 100644 lang/cem/cemcom/declarator.c create mode 100644 lang/cem/cemcom/decspecs.c create mode 100644 lang/cem/cemcom/decspecs.str create mode 100644 lang/cem/cemcom/def.str create mode 100644 lang/cem/cemcom/domacro.c create mode 100644 lang/cem/cemcom/dumpidf.c create mode 100644 lang/cem/cemcom/error.c create mode 100644 lang/cem/cemcom/estack.str create mode 100644 lang/cem/cemcom/eval.c create mode 100644 lang/cem/cemcom/expr.c create mode 100644 lang/cem/cemcom/expr.str create mode 100644 lang/cem/cemcom/expression.g create mode 100644 lang/cem/cemcom/field.c create mode 100644 lang/cem/cemcom/field.str create mode 100644 lang/cem/cemcom/file_info.h create mode 100644 lang/cem/cemcom/idf.c create mode 100644 lang/cem/cemcom/idf.str create mode 100644 lang/cem/cemcom/init.c create mode 100644 lang/cem/cemcom/input.c create mode 100644 lang/cem/cemcom/input.h create mode 100644 lang/cem/cemcom/interface.h create mode 100644 lang/cem/cemcom/ival.g create mode 100644 lang/cem/cemcom/l_brace.str create mode 100644 lang/cem/cemcom/l_class.h create mode 100644 lang/cem/cemcom/l_comment.c create mode 100644 lang/cem/cemcom/l_comment.h create mode 100644 lang/cem/cemcom/l_em.h create mode 100644 lang/cem/cemcom/l_ev_ord.c create mode 100644 lang/cem/cemcom/l_lint.c create mode 100644 lang/cem/cemcom/l_lint.h create mode 100644 lang/cem/cemcom/l_misc.c create mode 100644 lang/cem/cemcom/l_outdef.c create mode 100644 lang/cem/cemcom/l_outdef.str create mode 100644 lang/cem/cemcom/l_state.str create mode 100644 lang/cem/cemcom/l_states.c create mode 100644 lang/cem/cemcom/label.c create mode 100644 lang/cem/cemcom/label.h create mode 100644 lang/cem/cemcom/level.h create mode 100644 lang/cem/cemcom/macro.str create mode 100644 lang/cem/cemcom/main.c create mode 100755 lang/cem/cemcom/make.allocd create mode 100755 lang/cem/cemcom/make.hfiles create mode 100755 lang/cem/cemcom/make.next create mode 100755 lang/cem/cemcom/make.tokcase create mode 100755 lang/cem/cemcom/make.tokfile create mode 100644 lang/cem/cemcom/mcomm.c create mode 100644 lang/cem/cemcom/mes.h create mode 100644 lang/cem/cemcom/options create mode 100644 lang/cem/cemcom/options.c create mode 100644 lang/cem/cemcom/program.g create mode 100644 lang/cem/cemcom/proto.main create mode 100644 lang/cem/cemcom/proto.make create mode 100644 lang/cem/cemcom/replace.c create mode 100644 lang/cem/cemcom/scan.c create mode 100644 lang/cem/cemcom/sizes.h create mode 100644 lang/cem/cemcom/skip.c create mode 100644 lang/cem/cemcom/specials.h create mode 100644 lang/cem/cemcom/stab.c create mode 100644 lang/cem/cemcom/stack.c create mode 100644 lang/cem/cemcom/stack.str create mode 100644 lang/cem/cemcom/statement.g create mode 100644 lang/cem/cemcom/stb.c create mode 100644 lang/cem/cemcom/stmt.str create mode 100644 lang/cem/cemcom/struct.c create mode 100644 lang/cem/cemcom/struct.str create mode 100644 lang/cem/cemcom/switch.c create mode 100644 lang/cem/cemcom/switch.str create mode 100644 lang/cem/cemcom/tokenname.c create mode 100644 lang/cem/cemcom/tokenname.h create mode 100644 lang/cem/cemcom/type.c create mode 100644 lang/cem/cemcom/type.str create mode 100644 lang/cem/cemcom/util.c create mode 100644 lang/cem/cemcom/util.str create mode 100644 lang/cem/cpp.ansi/LLlex.c create mode 100644 lang/cem/cpp.ansi/LLlex.h create mode 100644 lang/cem/cpp.ansi/LLmessage.c create mode 100644 lang/cem/cpp.ansi/Parameters create mode 100644 lang/cem/cpp.ansi/arith.h create mode 100644 lang/cem/cpp.ansi/bits.h create mode 100644 lang/cem/cpp.ansi/ch3bin.c create mode 100644 lang/cem/cpp.ansi/ch3mon.c create mode 100644 lang/cem/cpp.ansi/char.tab create mode 100644 lang/cem/cpp.ansi/class.h create mode 100644 lang/cem/cpp.ansi/domacro.c create mode 100644 lang/cem/cpp.ansi/error.c create mode 100644 lang/cem/cpp.ansi/expr.c create mode 100644 lang/cem/cpp.ansi/expression.g create mode 100644 lang/cem/cpp.ansi/file_info.h create mode 100644 lang/cem/cpp.ansi/idf.c create mode 100644 lang/cem/cpp.ansi/idf.h create mode 100644 lang/cem/cpp.ansi/init.c create mode 100644 lang/cem/cpp.ansi/input.c create mode 100644 lang/cem/cpp.ansi/input.h create mode 100644 lang/cem/cpp.ansi/macro.str create mode 100644 lang/cem/cpp.ansi/main.c create mode 100755 lang/cem/cpp.ansi/make.allocd create mode 100755 lang/cem/cpp.ansi/make.hfiles create mode 100755 lang/cem/cpp.ansi/make.next create mode 100755 lang/cem/cpp.ansi/make.tokcase create mode 100755 lang/cem/cpp.ansi/make.tokfile create mode 100644 lang/cem/cpp.ansi/ncpp.6 create mode 100644 lang/cem/cpp.ansi/options.c create mode 100644 lang/cem/cpp.ansi/preprocess.c create mode 100644 lang/cem/cpp.ansi/proto.main create mode 100644 lang/cem/cpp.ansi/proto.make create mode 100644 lang/cem/cpp.ansi/replace.c create mode 100644 lang/cem/cpp.ansi/replace.str create mode 100644 lang/cem/cpp.ansi/skip.c create mode 100644 lang/cem/cpp.ansi/tokenname.c create mode 100644 lang/cem/ctest/Out2.nf.std create mode 100644 lang/cem/ctest/Out2.std create mode 100644 lang/cem/ctest/Out4.nf.std create mode 100644 lang/cem/ctest/Out4.std create mode 100644 lang/cem/ctest/READ_ME create mode 100644 lang/cem/ctest/ctconv/conv.c create mode 100644 lang/cem/ctest/ctconv/conv.cem.g create mode 100644 lang/cem/ctest/ctconv/run create mode 100644 lang/cem/ctest/ctdecl/decl.c create mode 100644 lang/cem/ctest/ctdecl/decl.cem.g create mode 100644 lang/cem/ctest/ctdecl/run create mode 100644 lang/cem/ctest/ctdivers/ops.c create mode 100644 lang/cem/ctest/ctdivers/ops.cem.g create mode 100644 lang/cem/ctest/ctdivers/run create mode 100644 lang/cem/ctest/cterr/bugs.c create mode 100644 lang/cem/ctest/cterr/bugs.cem.g create mode 100755 lang/cem/ctest/cterr/run create mode 100644 lang/cem/ctest/ctest1/run create mode 100644 lang/cem/ctest/ctest1/test.c create mode 100644 lang/cem/ctest/ctest1/test.cem.g create mode 100644 lang/cem/ctest/ctest2/run create mode 100644 lang/cem/ctest/ctest2/t7.c create mode 100644 lang/cem/ctest/ctest2/t7.cem.g create mode 100755 lang/cem/ctest/ctest3/run create mode 100644 lang/cem/ctest/ctest3/test2.c create mode 100644 lang/cem/ctest/ctest3/test2.cem.g create mode 100644 lang/cem/ctest/ctest5/run create mode 100644 lang/cem/ctest/ctest5/test1.c create mode 100644 lang/cem/ctest/ctest5/test1.cem.g create mode 100644 lang/cem/ctest/ctgen/OPS create mode 100644 lang/cem/ctest/ctgen/bf.cem.g create mode 100644 lang/cem/ctest/ctgen/bf.sed create mode 100644 lang/cem/ctest/ctgen/bfu.cem.g create mode 100644 lang/cem/ctest/ctgen/bfu.sed create mode 100644 lang/cem/ctest/ctgen/cel.cem.g create mode 100644 lang/cem/ctest/ctgen/cel.sed create mode 100644 lang/cem/ctest/ctgen/clu.cem.g create mode 100644 lang/cem/ctest/ctgen/clu.sed create mode 100644 lang/cem/ctest/ctgen/ec.cem.g create mode 100644 lang/cem/ctest/ctgen/ec.sed create mode 100644 lang/cem/ctest/ctgen/ef.cem.g create mode 100644 lang/cem/ctest/ctgen/ef.sed create mode 100644 lang/cem/ctest/ctgen/ei.cem.g create mode 100644 lang/cem/ctest/ctgen/ei.sed create mode 100644 lang/cem/ctest/ctgen/el.cem.g create mode 100644 lang/cem/ctest/ctgen/el.sed create mode 100644 lang/cem/ctest/ctgen/eu.cem.g create mode 100644 lang/cem/ctest/ctgen/eu.sed create mode 100644 lang/cem/ctest/ctgen/id.cem.g create mode 100644 lang/cem/ctest/ctgen/id.sed create mode 100644 lang/cem/ctest/ctgen/lc.cem.g create mode 100644 lang/cem/ctest/ctgen/lc.sed create mode 100644 lang/cem/ctest/ctgen/ld.cem.g create mode 100644 lang/cem/ctest/ctgen/ld.sed create mode 100644 lang/cem/ctest/ctgen/lf.cem.g create mode 100644 lang/cem/ctest/ctgen/lf.sed create mode 100644 lang/cem/ctest/ctgen/li.cem.g create mode 100644 lang/cem/ctest/ctgen/li.sed create mode 100644 lang/cem/ctest/ctgen/ll.cem.g create mode 100644 lang/cem/ctest/ctgen/ll.sed create mode 100644 lang/cem/ctest/ctgen/lu.cem.g create mode 100644 lang/cem/ctest/ctgen/lu.sed create mode 100644 lang/cem/ctest/ctgen/makefile create mode 100755 lang/cem/ctest/ctgen/mkc create mode 100755 lang/cem/ctest/ctgen/run create mode 100755 lang/cem/ctest/ctgen/run1 create mode 100644 lang/cem/ctest/ctill/noarg.c create mode 100644 lang/cem/ctest/ctill/noarg.cem.g create mode 100755 lang/cem/ctest/ctill/run create mode 100644 lang/cem/ctest/ctinit/init.c create mode 100644 lang/cem/ctest/ctinit/init.cem.g create mode 100644 lang/cem/ctest/ctinit/run create mode 100644 lang/cem/ctest/ctmargt/margt.c create mode 100644 lang/cem/ctest/ctmargt/margt.cem.g create mode 100644 lang/cem/ctest/ctmargt/run create mode 100644 lang/cem/ctest/ctprof/makefile create mode 100644 lang/cem/ctest/ctprof/procentry.c create mode 100644 lang/cem/ctest/ctprof/run create mode 100644 lang/cem/ctest/ctprof/tp.c create mode 100644 lang/cem/ctest/ctprof/tp.cem.g create mode 100755 lang/cem/ctest/ctsetjmp/run create mode 100644 lang/cem/ctest/ctsetjmp/stjmp.c create mode 100644 lang/cem/ctest/ctsetjmp/stjmp.cem.g create mode 100644 lang/cem/ctest/ctstruct/run create mode 100644 lang/cem/ctest/ctstruct/str.c create mode 100644 lang/cem/ctest/ctstruct/str.cem.g create mode 100644 lang/cem/ctest/ctsys/run create mode 100644 lang/cem/ctest/ctsys/signal.c create mode 100644 lang/cem/ctest/ctsys/tfork.c create mode 100644 lang/cem/ctest/ctsys/tfork.cem.g create mode 100644 lang/cem/ctest/local.h create mode 100644 lang/cem/ctest/makefile.std create mode 100755 lang/cem/ctest/run create mode 100644 lang/cem/libcc.ansi/LIST create mode 100644 lang/cem/libcc.ansi/assert/LIST create mode 100644 lang/cem/libcc.ansi/assert/Makefile create mode 100644 lang/cem/libcc.ansi/assert/assert.c create mode 100644 lang/cem/libcc.ansi/ctype/LIST create mode 100644 lang/cem/libcc.ansi/ctype/Makefile create mode 100644 lang/cem/libcc.ansi/ctype/char.tab create mode 100644 lang/cem/libcc.ansi/ctype/genfiles create mode 100644 lang/cem/libcc.ansi/ctype/tolower.c create mode 100644 lang/cem/libcc.ansi/ctype/toupper.c create mode 100644 lang/cem/libcc.ansi/errno/LIST create mode 100644 lang/cem/libcc.ansi/errno/Makefile create mode 100644 lang/cem/libcc.ansi/errno/errlist.c create mode 100644 lang/cem/libcc.ansi/head_ac.e create mode 100644 lang/cem/libcc.ansi/headers/LIST create mode 100644 lang/cem/libcc.ansi/headers/Makefile create mode 100644 lang/cem/libcc.ansi/headers/assert.h create mode 100644 lang/cem/libcc.ansi/headers/ctype.h create mode 100644 lang/cem/libcc.ansi/headers/dirent.h create mode 100644 lang/cem/libcc.ansi/headers/errno.h create mode 100644 lang/cem/libcc.ansi/headers/float.h create mode 100644 lang/cem/libcc.ansi/headers/grp.h create mode 100644 lang/cem/libcc.ansi/headers/limits.h create mode 100644 lang/cem/libcc.ansi/headers/locale.h create mode 100644 lang/cem/libcc.ansi/headers/math.h create mode 100644 lang/cem/libcc.ansi/headers/mathconst.h create mode 100644 lang/cem/libcc.ansi/headers/setjmp.h create mode 100644 lang/cem/libcc.ansi/headers/signal.h create mode 100644 lang/cem/libcc.ansi/headers/stdarg.h create mode 100644 lang/cem/libcc.ansi/headers/stddef.h create mode 100644 lang/cem/libcc.ansi/headers/stdio.h create mode 100644 lang/cem/libcc.ansi/headers/stdlib.h create mode 100644 lang/cem/libcc.ansi/headers/string.h create mode 100644 lang/cem/libcc.ansi/headers/sys/dirent.h create mode 100644 lang/cem/libcc.ansi/headers/sys/errno.h create mode 100644 lang/cem/libcc.ansi/headers/time.h create mode 100644 lang/cem/libcc.ansi/headers/varargs.h create mode 100644 lang/cem/libcc.ansi/locale/LIST create mode 100644 lang/cem/libcc.ansi/locale/Makefile create mode 100644 lang/cem/libcc.ansi/locale/localeconv.c create mode 100644 lang/cem/libcc.ansi/locale/setlocale.c create mode 100644 lang/cem/libcc.ansi/math/LIST create mode 100644 lang/cem/libcc.ansi/math/Makefile create mode 100644 lang/cem/libcc.ansi/math/asin.c create mode 100644 lang/cem/libcc.ansi/math/atan.c create mode 100644 lang/cem/libcc.ansi/math/atan2.c create mode 100644 lang/cem/libcc.ansi/math/ceil.c create mode 100644 lang/cem/libcc.ansi/math/exp.c create mode 100644 lang/cem/libcc.ansi/math/fabs.c create mode 100644 lang/cem/libcc.ansi/math/floor.c create mode 100644 lang/cem/libcc.ansi/math/fmod.c create mode 100644 lang/cem/libcc.ansi/math/frexp.e create mode 100644 lang/cem/libcc.ansi/math/hugeval.c create mode 100644 lang/cem/libcc.ansi/math/isnan.c create mode 100644 lang/cem/libcc.ansi/math/ldexp.c create mode 100644 lang/cem/libcc.ansi/math/localmath.h create mode 100644 lang/cem/libcc.ansi/math/log.c create mode 100644 lang/cem/libcc.ansi/math/log10.c create mode 100644 lang/cem/libcc.ansi/math/modf.e create mode 100644 lang/cem/libcc.ansi/math/pow.c create mode 100644 lang/cem/libcc.ansi/math/sin.c create mode 100644 lang/cem/libcc.ansi/math/sinh.c create mode 100644 lang/cem/libcc.ansi/math/sqrt.c create mode 100644 lang/cem/libcc.ansi/math/tan.c create mode 100644 lang/cem/libcc.ansi/math/tanh.c create mode 100644 lang/cem/libcc.ansi/misc/LIST create mode 100644 lang/cem/libcc.ansi/misc/Makefile create mode 100644 lang/cem/libcc.ansi/misc/closedir.c create mode 100644 lang/cem/libcc.ansi/misc/environ.c create mode 100644 lang/cem/libcc.ansi/misc/fdopen.c create mode 100644 lang/cem/libcc.ansi/misc/getdents.c create mode 100644 lang/cem/libcc.ansi/misc/getgrent.c create mode 100644 lang/cem/libcc.ansi/misc/getopt.c create mode 100644 lang/cem/libcc.ansi/misc/getpass.c create mode 100644 lang/cem/libcc.ansi/misc/getpw.c create mode 100644 lang/cem/libcc.ansi/misc/getw.c create mode 100644 lang/cem/libcc.ansi/misc/hypot.c create mode 100644 lang/cem/libcc.ansi/misc/isatty.c create mode 100644 lang/cem/libcc.ansi/misc/mktemp.c create mode 100644 lang/cem/libcc.ansi/misc/opendir.c create mode 100644 lang/cem/libcc.ansi/misc/popen.c create mode 100644 lang/cem/libcc.ansi/misc/putenv.c create mode 100644 lang/cem/libcc.ansi/misc/putw.c create mode 100644 lang/cem/libcc.ansi/misc/readdir.c create mode 100644 lang/cem/libcc.ansi/misc/rewinddir.c create mode 100644 lang/cem/libcc.ansi/misc/seekdir.c create mode 100644 lang/cem/libcc.ansi/misc/sleep.c create mode 100644 lang/cem/libcc.ansi/misc/telldir.c create mode 100644 lang/cem/libcc.ansi/misc/termcap.c create mode 100644 lang/cem/libcc.ansi/proto.make create mode 100644 lang/cem/libcc.ansi/setjmp/LIST create mode 100644 lang/cem/libcc.ansi/setjmp/Makefile create mode 100644 lang/cem/libcc.ansi/setjmp/setjmp.e create mode 100644 lang/cem/libcc.ansi/setjmp/sigmisc.c create mode 100644 lang/cem/libcc.ansi/signal/LIST create mode 100644 lang/cem/libcc.ansi/signal/Makefile create mode 100644 lang/cem/libcc.ansi/signal/raise.c create mode 100644 lang/cem/libcc.ansi/stdio/LIST create mode 100644 lang/cem/libcc.ansi/stdio/Makefile create mode 100644 lang/cem/libcc.ansi/stdio/clearerr.c create mode 100644 lang/cem/libcc.ansi/stdio/data.c create mode 100644 lang/cem/libcc.ansi/stdio/doprnt.c create mode 100644 lang/cem/libcc.ansi/stdio/doscan.c create mode 100644 lang/cem/libcc.ansi/stdio/ecvt.c create mode 100644 lang/cem/libcc.ansi/stdio/fclose.c create mode 100644 lang/cem/libcc.ansi/stdio/feof.c create mode 100644 lang/cem/libcc.ansi/stdio/ferror.c create mode 100644 lang/cem/libcc.ansi/stdio/fflush.c create mode 100644 lang/cem/libcc.ansi/stdio/fgetc.c create mode 100644 lang/cem/libcc.ansi/stdio/fgetpos.c create mode 100644 lang/cem/libcc.ansi/stdio/fgets.c create mode 100644 lang/cem/libcc.ansi/stdio/fileno.c create mode 100644 lang/cem/libcc.ansi/stdio/fillbuf.c create mode 100644 lang/cem/libcc.ansi/stdio/fltpr.c create mode 100644 lang/cem/libcc.ansi/stdio/flushbuf.c create mode 100644 lang/cem/libcc.ansi/stdio/fopen.c create mode 100644 lang/cem/libcc.ansi/stdio/fprintf.c create mode 100644 lang/cem/libcc.ansi/stdio/fputc.c create mode 100644 lang/cem/libcc.ansi/stdio/fputs.c create mode 100644 lang/cem/libcc.ansi/stdio/fread.c create mode 100644 lang/cem/libcc.ansi/stdio/freopen.c create mode 100644 lang/cem/libcc.ansi/stdio/fscanf.c create mode 100644 lang/cem/libcc.ansi/stdio/fseek.c create mode 100644 lang/cem/libcc.ansi/stdio/fsetpos.c create mode 100644 lang/cem/libcc.ansi/stdio/ftell.c create mode 100644 lang/cem/libcc.ansi/stdio/fwrite.c create mode 100644 lang/cem/libcc.ansi/stdio/getc.c create mode 100644 lang/cem/libcc.ansi/stdio/getchar.c create mode 100644 lang/cem/libcc.ansi/stdio/gets.c create mode 100644 lang/cem/libcc.ansi/stdio/icompute.c create mode 100644 lang/cem/libcc.ansi/stdio/isatty.c create mode 100644 lang/cem/libcc.ansi/stdio/loc_incl.h create mode 100644 lang/cem/libcc.ansi/stdio/perror.c create mode 100644 lang/cem/libcc.ansi/stdio/printf.c create mode 100644 lang/cem/libcc.ansi/stdio/putc.c create mode 100644 lang/cem/libcc.ansi/stdio/putchar.c create mode 100644 lang/cem/libcc.ansi/stdio/puts.c create mode 100644 lang/cem/libcc.ansi/stdio/remove.c create mode 100644 lang/cem/libcc.ansi/stdio/rename.c create mode 100644 lang/cem/libcc.ansi/stdio/rewind.c create mode 100644 lang/cem/libcc.ansi/stdio/scanf.c create mode 100644 lang/cem/libcc.ansi/stdio/setbuf.c create mode 100644 lang/cem/libcc.ansi/stdio/setvbuf.c create mode 100644 lang/cem/libcc.ansi/stdio/sprintf.c create mode 100644 lang/cem/libcc.ansi/stdio/sscanf.c create mode 100644 lang/cem/libcc.ansi/stdio/tmpfile.c create mode 100644 lang/cem/libcc.ansi/stdio/tmpnam.c create mode 100644 lang/cem/libcc.ansi/stdio/ungetc.c create mode 100644 lang/cem/libcc.ansi/stdio/vfprintf.c create mode 100644 lang/cem/libcc.ansi/stdio/vprintf.c create mode 100644 lang/cem/libcc.ansi/stdio/vsprintf.c create mode 100644 lang/cem/libcc.ansi/stdlib/LIST create mode 100644 lang/cem/libcc.ansi/stdlib/Makefile create mode 100644 lang/cem/libcc.ansi/stdlib/abort.c create mode 100644 lang/cem/libcc.ansi/stdlib/abs.c create mode 100644 lang/cem/libcc.ansi/stdlib/atexit.c create mode 100644 lang/cem/libcc.ansi/stdlib/atof.c create mode 100644 lang/cem/libcc.ansi/stdlib/atoi.c create mode 100644 lang/cem/libcc.ansi/stdlib/atol.c create mode 100644 lang/cem/libcc.ansi/stdlib/bsearch.c create mode 100644 lang/cem/libcc.ansi/stdlib/div.c create mode 100644 lang/cem/libcc.ansi/stdlib/exit.c create mode 100644 lang/cem/libcc.ansi/stdlib/ext_comp.c create mode 100644 lang/cem/libcc.ansi/stdlib/ext_fmt.h create mode 100644 lang/cem/libcc.ansi/stdlib/getenv.c create mode 100644 lang/cem/libcc.ansi/stdlib/labs.c create mode 100644 lang/cem/libcc.ansi/stdlib/ldiv.c create mode 100644 lang/cem/libcc.ansi/stdlib/malloc.c create mode 100644 lang/cem/libcc.ansi/stdlib/malloc/Makefile create mode 100644 lang/cem/libcc.ansi/stdlib/malloc/READ_ME create mode 100755 lang/cem/libcc.ansi/stdlib/malloc/add_file create mode 100644 lang/cem/libcc.ansi/stdlib/malloc/check.c create mode 100644 lang/cem/libcc.ansi/stdlib/malloc/check.h create mode 100644 lang/cem/libcc.ansi/stdlib/malloc/global.c create mode 100644 lang/cem/libcc.ansi/stdlib/malloc/impl.h create mode 100644 lang/cem/libcc.ansi/stdlib/malloc/log.c create mode 100644 lang/cem/libcc.ansi/stdlib/malloc/log.h create mode 100644 lang/cem/libcc.ansi/stdlib/malloc/mal.c create mode 100644 lang/cem/libcc.ansi/stdlib/malloc/param.h create mode 100644 lang/cem/libcc.ansi/stdlib/malloc/phys.c create mode 100644 lang/cem/libcc.ansi/stdlib/malloc/phys.h create mode 100644 lang/cem/libcc.ansi/stdlib/malloc/size_type.h create mode 100644 lang/cem/libcc.ansi/stdlib/mblen.c create mode 100644 lang/cem/libcc.ansi/stdlib/mbstowcs.c create mode 100644 lang/cem/libcc.ansi/stdlib/mbtowc.c create mode 100644 lang/cem/libcc.ansi/stdlib/qsort.c create mode 100644 lang/cem/libcc.ansi/stdlib/rand.c create mode 100644 lang/cem/libcc.ansi/stdlib/strtod.c create mode 100644 lang/cem/libcc.ansi/stdlib/strtol.c create mode 100644 lang/cem/libcc.ansi/stdlib/system.c create mode 100644 lang/cem/libcc.ansi/stdlib/wcstombs.c create mode 100644 lang/cem/libcc.ansi/stdlib/wctomb.c create mode 100644 lang/cem/libcc.ansi/string/LIST create mode 100644 lang/cem/libcc.ansi/string/Makefile create mode 100644 lang/cem/libcc.ansi/string/memchr.c create mode 100644 lang/cem/libcc.ansi/string/memcmp.c create mode 100644 lang/cem/libcc.ansi/string/memcpy.c create mode 100644 lang/cem/libcc.ansi/string/memmove.c create mode 100644 lang/cem/libcc.ansi/string/memset.c create mode 100644 lang/cem/libcc.ansi/string/strcat.c create mode 100644 lang/cem/libcc.ansi/string/strchr.c create mode 100644 lang/cem/libcc.ansi/string/strcmp.c create mode 100644 lang/cem/libcc.ansi/string/strcoll.c create mode 100644 lang/cem/libcc.ansi/string/strcpy.c create mode 100644 lang/cem/libcc.ansi/string/strcspn.c create mode 100644 lang/cem/libcc.ansi/string/strerror.c create mode 100644 lang/cem/libcc.ansi/string/strlen.c create mode 100644 lang/cem/libcc.ansi/string/strncat.c create mode 100644 lang/cem/libcc.ansi/string/strncmp.c create mode 100644 lang/cem/libcc.ansi/string/strncpy.c create mode 100644 lang/cem/libcc.ansi/string/strpbrk.c create mode 100644 lang/cem/libcc.ansi/string/strrchr.c create mode 100644 lang/cem/libcc.ansi/string/strspn.c create mode 100644 lang/cem/libcc.ansi/string/strstr.c create mode 100644 lang/cem/libcc.ansi/string/strtok.c create mode 100644 lang/cem/libcc.ansi/string/strxfrm.c create mode 100644 lang/cem/libcc.ansi/time/LIST create mode 100644 lang/cem/libcc.ansi/time/Makefile create mode 100644 lang/cem/libcc.ansi/time/asctime.c create mode 100644 lang/cem/libcc.ansi/time/clock.c create mode 100644 lang/cem/libcc.ansi/time/ctime.c create mode 100644 lang/cem/libcc.ansi/time/difftime.c create mode 100644 lang/cem/libcc.ansi/time/gmtime.c create mode 100644 lang/cem/libcc.ansi/time/loc_time.h create mode 100644 lang/cem/libcc.ansi/time/localtime.c create mode 100644 lang/cem/libcc.ansi/time/misc.c create mode 100644 lang/cem/libcc.ansi/time/mktime.c create mode 100644 lang/cem/libcc.ansi/time/strftime.c create mode 100644 lang/cem/libcc.ansi/time/time.c create mode 100644 lang/cem/libcc.ansi/time/tzset.c create mode 100644 lang/cem/libcc/gen/LIST create mode 100644 lang/cem/libcc/gen/head_cc.e create mode 100644 lang/cem/libcc/gen/tail_cc.2g.a create mode 100644 lang/cem/libcc/math/LIST create mode 100644 lang/cem/libcc/math/tail_m.a create mode 100644 lang/cem/libcc/math/test.c create mode 100644 lang/cem/libcc/mon/LIST create mode 100644 lang/cem/libcc/mon/tail_mon.a create mode 100644 lang/cem/libcc/mon/types create mode 100644 lang/cem/libcc/stdio/LIST create mode 100644 lang/cem/libcc/stdio/tail_cc.1s.a create mode 100644 lang/cem/lint/README create mode 100644 lang/cem/lint/llib/ChangeLog create mode 100644 lang/cem/lint/llib/README create mode 100644 lang/cem/lint/llib/ansi_c.llb create mode 100644 lang/cem/lint/llib/c.llb create mode 100644 lang/cem/lint/llib/curses.llb create mode 100644 lang/cem/lint/llib/m.llb create mode 100644 lang/cem/lint/llib/proto.make create mode 100644 lang/cem/lint/llib/termlib.llb create mode 100644 lang/cem/lint/llib/unix7.c create mode 100644 lang/cem/lint/lpass1.ansi/Parameters create mode 100644 lang/cem/lint/lpass1.ansi/proto.main create mode 100644 lang/cem/lint/lpass1.ansi/proto.make create mode 100644 lang/cem/lint/lpass1/Parameters create mode 100644 lang/cem/lint/lpass1/proto.main create mode 100644 lang/cem/lint/lpass1/proto.make create mode 100644 lang/cem/lint/lpass2/ChangeLog create mode 100644 lang/cem/lint/lpass2/checkargs.c create mode 100644 lang/cem/lint/lpass2/class.c create mode 100644 lang/cem/lint/lpass2/class.h create mode 100644 lang/cem/lint/lpass2/inpdef.str create mode 100644 lang/cem/lint/lpass2/l_print3ack.c create mode 100755 lang/cem/lint/lpass2/lint create mode 100644 lang/cem/lint/lpass2/lint.1 create mode 100644 lang/cem/lint/lpass2/lpass2.c create mode 100644 lang/cem/lint/lpass2/private.h create mode 100644 lang/cem/lint/lpass2/proto.make create mode 100644 lang/cem/lint/lpass2/read.c create mode 100644 lang/cem/lint/lpass2/report.c create mode 100644 lang/cem/lint/proto.make create mode 100644 lang/fortran/changes create mode 100644 lang/fortran/comp/Notice create mode 100644 lang/fortran/comp/README create mode 100644 lang/fortran/comp/cds.c create mode 100644 lang/fortran/comp/data.c create mode 100644 lang/fortran/comp/defines.h create mode 100644 lang/fortran/comp/defs.h create mode 100644 lang/fortran/comp/equiv.c create mode 100644 lang/fortran/comp/error.c create mode 100644 lang/fortran/comp/exec.c create mode 100644 lang/fortran/comp/expr.c create mode 100644 lang/fortran/comp/f2c.1 create mode 100644 lang/fortran/comp/f2c.1t create mode 100644 lang/fortran/comp/f2c.6 create mode 100644 lang/fortran/comp/f2c.h create mode 100644 lang/fortran/comp/format.c create mode 100644 lang/fortran/comp/format.h create mode 100644 lang/fortran/comp/formatdata.c create mode 100644 lang/fortran/comp/ftypes.h create mode 100644 lang/fortran/comp/gram.dcl create mode 100644 lang/fortran/comp/gram.exec create mode 100644 lang/fortran/comp/gram.expr create mode 100644 lang/fortran/comp/gram.head create mode 100644 lang/fortran/comp/gram.io create mode 100644 lang/fortran/comp/init.c create mode 100644 lang/fortran/comp/intr.c create mode 100644 lang/fortran/comp/io.c create mode 100644 lang/fortran/comp/iob.h create mode 100644 lang/fortran/comp/lex.c create mode 100644 lang/fortran/comp/machdefs.h create mode 100644 lang/fortran/comp/main.c create mode 100644 lang/fortran/comp/makefile create mode 100644 lang/fortran/comp/malloc.c create mode 100644 lang/fortran/comp/mem.c create mode 100644 lang/fortran/comp/memset.c create mode 100644 lang/fortran/comp/misc.c create mode 100755 lang/fortran/comp/mk_tokdefs create mode 100644 lang/fortran/comp/names.c create mode 100644 lang/fortran/comp/names.h create mode 100644 lang/fortran/comp/niceprintf.c create mode 100644 lang/fortran/comp/niceprintf.h create mode 100644 lang/fortran/comp/output.c create mode 100644 lang/fortran/comp/output.h create mode 100644 lang/fortran/comp/p1defs.h create mode 100644 lang/fortran/comp/p1output.c create mode 100644 lang/fortran/comp/parse.h create mode 100644 lang/fortran/comp/parse_args.c create mode 100644 lang/fortran/comp/pccdefs.h create mode 100644 lang/fortran/comp/pread.c create mode 100644 lang/fortran/comp/proc.c create mode 100644 lang/fortran/comp/proto.make create mode 100644 lang/fortran/comp/put.c create mode 100644 lang/fortran/comp/putpcc.c create mode 100644 lang/fortran/comp/string.h create mode 100644 lang/fortran/comp/sysdep.c create mode 100644 lang/fortran/comp/sysdep.h create mode 100644 lang/fortran/comp/tokens create mode 100644 lang/fortran/comp/usignal.h create mode 100644 lang/fortran/comp/vax.c create mode 100644 lang/fortran/comp/version.c create mode 100644 lang/fortran/comp/xsum.c create mode 100644 lang/fortran/comp/xsum0.out create mode 100644 lang/fortran/disclaimer create mode 100644 lang/fortran/fc create mode 100644 lang/fortran/fixes create mode 100644 lang/fortran/index create mode 100644 lang/fortran/lib/LIST create mode 100644 lang/fortran/lib/libF77/LIST create mode 100644 lang/fortran/lib/libF77/Notice create mode 100644 lang/fortran/lib/libF77/README create mode 100644 lang/fortran/lib/libF77/Version.c create mode 100644 lang/fortran/lib/libF77/abort_.c create mode 100644 lang/fortran/lib/libF77/c_abs.c create mode 100644 lang/fortran/lib/libF77/c_cos.c create mode 100644 lang/fortran/lib/libF77/c_div.c create mode 100644 lang/fortran/lib/libF77/c_exp.c create mode 100644 lang/fortran/lib/libF77/c_log.c create mode 100644 lang/fortran/lib/libF77/c_sin.c create mode 100644 lang/fortran/lib/libF77/c_sqrt.c create mode 100644 lang/fortran/lib/libF77/cabs.c create mode 100644 lang/fortran/lib/libF77/d_abs.c create mode 100644 lang/fortran/lib/libF77/d_acos.c create mode 100644 lang/fortran/lib/libF77/d_asin.c create mode 100644 lang/fortran/lib/libF77/d_atan.c create mode 100644 lang/fortran/lib/libF77/d_atn2.c create mode 100644 lang/fortran/lib/libF77/d_cnjg.c create mode 100644 lang/fortran/lib/libF77/d_cos.c create mode 100644 lang/fortran/lib/libF77/d_cosh.c create mode 100644 lang/fortran/lib/libF77/d_dim.c create mode 100644 lang/fortran/lib/libF77/d_exp.c create mode 100644 lang/fortran/lib/libF77/d_imag.c create mode 100644 lang/fortran/lib/libF77/d_int.c create mode 100644 lang/fortran/lib/libF77/d_lg10.c create mode 100644 lang/fortran/lib/libF77/d_log.c create mode 100644 lang/fortran/lib/libF77/d_mod.c create mode 100644 lang/fortran/lib/libF77/d_nint.c create mode 100644 lang/fortran/lib/libF77/d_prod.c create mode 100644 lang/fortran/lib/libF77/d_sign.c create mode 100644 lang/fortran/lib/libF77/d_sin.c create mode 100644 lang/fortran/lib/libF77/d_sinh.c create mode 100644 lang/fortran/lib/libF77/d_sqrt.c create mode 100644 lang/fortran/lib/libF77/d_tan.c create mode 100644 lang/fortran/lib/libF77/d_tanh.c create mode 100644 lang/fortran/lib/libF77/derf_.c create mode 100644 lang/fortran/lib/libF77/derfc_.c create mode 100644 lang/fortran/lib/libF77/ef1asc_.c create mode 100644 lang/fortran/lib/libF77/ef1cmc_.c create mode 100644 lang/fortran/lib/libF77/erf_.c create mode 100644 lang/fortran/lib/libF77/erfc_.c create mode 100644 lang/fortran/lib/libF77/getarg_.c create mode 100644 lang/fortran/lib/libF77/getenv_.c create mode 100644 lang/fortran/lib/libF77/h_abs.c create mode 100644 lang/fortran/lib/libF77/h_dim.c create mode 100644 lang/fortran/lib/libF77/h_dnnt.c create mode 100644 lang/fortran/lib/libF77/h_indx.c create mode 100644 lang/fortran/lib/libF77/h_len.c create mode 100644 lang/fortran/lib/libF77/h_mod.c create mode 100644 lang/fortran/lib/libF77/h_nint.c create mode 100644 lang/fortran/lib/libF77/h_sign.c create mode 100644 lang/fortran/lib/libF77/hl_ge.c create mode 100644 lang/fortran/lib/libF77/hl_gt.c create mode 100644 lang/fortran/lib/libF77/hl_le.c create mode 100644 lang/fortran/lib/libF77/hl_lt.c create mode 100644 lang/fortran/lib/libF77/i_abs.c create mode 100644 lang/fortran/lib/libF77/i_dim.c create mode 100644 lang/fortran/lib/libF77/i_dnnt.c create mode 100644 lang/fortran/lib/libF77/i_indx.c create mode 100644 lang/fortran/lib/libF77/i_len.c create mode 100644 lang/fortran/lib/libF77/i_mod.c create mode 100644 lang/fortran/lib/libF77/i_nint.c create mode 100644 lang/fortran/lib/libF77/i_sign.c create mode 100644 lang/fortran/lib/libF77/iargc_.c create mode 100644 lang/fortran/lib/libF77/l_ge.c create mode 100644 lang/fortran/lib/libF77/l_gt.c create mode 100644 lang/fortran/lib/libF77/l_le.c create mode 100644 lang/fortran/lib/libF77/l_lt.c create mode 100644 lang/fortran/lib/libF77/libF77.xsum create mode 100644 lang/fortran/lib/libF77/main.c create mode 100644 lang/fortran/lib/libF77/makefile create mode 100644 lang/fortran/lib/libF77/pow_ci.c create mode 100644 lang/fortran/lib/libF77/pow_dd.c create mode 100644 lang/fortran/lib/libF77/pow_di.c create mode 100644 lang/fortran/lib/libF77/pow_hh.c create mode 100644 lang/fortran/lib/libF77/pow_ii.c create mode 100644 lang/fortran/lib/libF77/pow_ri.c create mode 100644 lang/fortran/lib/libF77/pow_zi.c create mode 100644 lang/fortran/lib/libF77/pow_zz.c create mode 100644 lang/fortran/lib/libF77/r_abs.c create mode 100644 lang/fortran/lib/libF77/r_acos.c create mode 100644 lang/fortran/lib/libF77/r_asin.c create mode 100644 lang/fortran/lib/libF77/r_atan.c create mode 100644 lang/fortran/lib/libF77/r_atn2.c create mode 100644 lang/fortran/lib/libF77/r_cnjg.c create mode 100644 lang/fortran/lib/libF77/r_cos.c create mode 100644 lang/fortran/lib/libF77/r_cosh.c create mode 100644 lang/fortran/lib/libF77/r_dim.c create mode 100644 lang/fortran/lib/libF77/r_exp.c create mode 100644 lang/fortran/lib/libF77/r_imag.c create mode 100644 lang/fortran/lib/libF77/r_int.c create mode 100644 lang/fortran/lib/libF77/r_lg10.c create mode 100644 lang/fortran/lib/libF77/r_log.c create mode 100644 lang/fortran/lib/libF77/r_mod.c create mode 100644 lang/fortran/lib/libF77/r_nint.c create mode 100644 lang/fortran/lib/libF77/r_sign.c create mode 100644 lang/fortran/lib/libF77/r_sin.c create mode 100644 lang/fortran/lib/libF77/r_sinh.c create mode 100644 lang/fortran/lib/libF77/r_sqrt.c create mode 100644 lang/fortran/lib/libF77/r_tan.c create mode 100644 lang/fortran/lib/libF77/r_tanh.c create mode 100644 lang/fortran/lib/libF77/s_cat.c create mode 100644 lang/fortran/lib/libF77/s_cmp.c create mode 100644 lang/fortran/lib/libF77/s_copy.c create mode 100644 lang/fortran/lib/libF77/s_paus.c create mode 100644 lang/fortran/lib/libF77/s_rnge.c create mode 100644 lang/fortran/lib/libF77/s_stop.c create mode 100644 lang/fortran/lib/libF77/sig_die.c create mode 100644 lang/fortran/lib/libF77/signal_.c create mode 100644 lang/fortran/lib/libF77/system_.c create mode 100644 lang/fortran/lib/libF77/z_abs.c create mode 100644 lang/fortran/lib/libF77/z_cos.c create mode 100644 lang/fortran/lib/libF77/z_div.c create mode 100644 lang/fortran/lib/libF77/z_exp.c create mode 100644 lang/fortran/lib/libF77/z_log.c create mode 100644 lang/fortran/lib/libF77/z_sin.c create mode 100644 lang/fortran/lib/libF77/z_sqrt.c create mode 100644 lang/fortran/lib/libI77/LIST create mode 100644 lang/fortran/lib/libI77/Notice create mode 100644 lang/fortran/lib/libI77/README create mode 100644 lang/fortran/lib/libI77/Version.c create mode 100644 lang/fortran/lib/libI77/backspace.c create mode 100644 lang/fortran/lib/libI77/close.c create mode 100644 lang/fortran/lib/libI77/dfe.c create mode 100644 lang/fortran/lib/libI77/dolio.c create mode 100644 lang/fortran/lib/libI77/due.c create mode 100644 lang/fortran/lib/libI77/endfile.c create mode 100644 lang/fortran/lib/libI77/err.c create mode 100644 lang/fortran/lib/libI77/fio.h create mode 100644 lang/fortran/lib/libI77/fmt.c create mode 100644 lang/fortran/lib/libI77/fmt.h create mode 100644 lang/fortran/lib/libI77/fmtlib.c create mode 100644 lang/fortran/lib/libI77/fp.h create mode 100644 lang/fortran/lib/libI77/iio.c create mode 100644 lang/fortran/lib/libI77/ilnw.c create mode 100644 lang/fortran/lib/libI77/inquire.c create mode 100644 lang/fortran/lib/libI77/libI77.xsum create mode 100644 lang/fortran/lib/libI77/lio.h create mode 100644 lang/fortran/lib/libI77/local.h create mode 100644 lang/fortran/lib/libI77/lread.c create mode 100644 lang/fortran/lib/libI77/lwrite.c create mode 100644 lang/fortran/lib/libI77/makefile create mode 100644 lang/fortran/lib/libI77/open.c create mode 100644 lang/fortran/lib/libI77/rdfmt.c create mode 100644 lang/fortran/lib/libI77/rewind.c create mode 100644 lang/fortran/lib/libI77/rsfe.c create mode 100644 lang/fortran/lib/libI77/rsli.c create mode 100644 lang/fortran/lib/libI77/rsne.c create mode 100644 lang/fortran/lib/libI77/sfe.c create mode 100644 lang/fortran/lib/libI77/sue.c create mode 100644 lang/fortran/lib/libI77/typesize.c create mode 100644 lang/fortran/lib/libI77/uio.c create mode 100644 lang/fortran/lib/libI77/util.c create mode 100644 lang/fortran/lib/libI77/wref.c create mode 100644 lang/fortran/lib/libI77/wrtfmt.c create mode 100644 lang/fortran/lib/libI77/wsfe.c create mode 100644 lang/fortran/lib/libI77/wsle.c create mode 100644 lang/fortran/lib/libI77/wsne.c create mode 100644 lang/fortran/lib/libI77/xwsne.c create mode 100644 lang/m2/comp/BigPars create mode 100644 lang/m2/comp/LLlex.c create mode 100644 lang/m2/comp/LLlex.h create mode 100644 lang/m2/comp/LLmessage.c create mode 100644 lang/m2/comp/SYSTEM.h create mode 100644 lang/m2/comp/SmallPars create mode 100644 lang/m2/comp/casestat.C create mode 100644 lang/m2/comp/char.tab create mode 100644 lang/m2/comp/chk_expr.c create mode 100644 lang/m2/comp/chk_expr.h create mode 100644 lang/m2/comp/class.h create mode 100644 lang/m2/comp/code.c create mode 100644 lang/m2/comp/cstoper.c create mode 100644 lang/m2/comp/debug.h create mode 100644 lang/m2/comp/declar.g create mode 100644 lang/m2/comp/def.H create mode 100644 lang/m2/comp/def.c create mode 100644 lang/m2/comp/defmodule.c create mode 100644 lang/m2/comp/desig.c create mode 100644 lang/m2/comp/desig.h create mode 100644 lang/m2/comp/em_m2.6 create mode 100644 lang/m2/comp/enter.c create mode 100644 lang/m2/comp/error.c create mode 100644 lang/m2/comp/expression.g create mode 100644 lang/m2/comp/f_info.h create mode 100644 lang/m2/comp/idf.c create mode 100644 lang/m2/comp/idf.h create mode 100644 lang/m2/comp/input.c create mode 100644 lang/m2/comp/input.h create mode 100644 lang/m2/comp/lookup.c create mode 100644 lang/m2/comp/main.c create mode 100644 lang/m2/comp/main.h create mode 100755 lang/m2/comp/make.allocd create mode 100755 lang/m2/comp/make.hfiles create mode 100755 lang/m2/comp/make.next create mode 100755 lang/m2/comp/make.tokcase create mode 100755 lang/m2/comp/make.tokfile create mode 100644 lang/m2/comp/misc.c create mode 100644 lang/m2/comp/misc.h create mode 100644 lang/m2/comp/modula-2.1 create mode 100644 lang/m2/comp/node.H create mode 100644 lang/m2/comp/node.c create mode 100644 lang/m2/comp/options create mode 100644 lang/m2/comp/options.c create mode 100644 lang/m2/comp/program.g create mode 100644 lang/m2/comp/proto.main create mode 100644 lang/m2/comp/proto.make create mode 100644 lang/m2/comp/real.H create mode 100644 lang/m2/comp/scope.C create mode 100644 lang/m2/comp/scope.h create mode 100644 lang/m2/comp/stab.c create mode 100644 lang/m2/comp/standards.h create mode 100644 lang/m2/comp/statement.g create mode 100644 lang/m2/comp/tmpvar.C create mode 100644 lang/m2/comp/tokenname.c create mode 100644 lang/m2/comp/tokenname.h create mode 100644 lang/m2/comp/type.H create mode 100644 lang/m2/comp/type.c create mode 100644 lang/m2/comp/typequiv.c create mode 100644 lang/m2/comp/walk.c create mode 100644 lang/m2/comp/walk.h create mode 100644 lang/m2/comp/warning.h create mode 100644 lang/m2/libm2/ASCII.def create mode 100644 lang/m2/libm2/Arguments.def create mode 100644 lang/m2/libm2/ArraySort.def create mode 100644 lang/m2/libm2/CSP.def create mode 100644 lang/m2/libm2/Conversion.def create mode 100644 lang/m2/libm2/EM.def create mode 100644 lang/m2/libm2/Epilogue.def create mode 100644 lang/m2/libm2/InOut.def create mode 100644 lang/m2/libm2/LIST create mode 100644 lang/m2/libm2/MathLib0.def create mode 100644 lang/m2/libm2/Mathlib.def create mode 100644 lang/m2/libm2/PascalIO.def create mode 100644 lang/m2/libm2/Processes.def create mode 100644 lang/m2/libm2/RealConver.def create mode 100644 lang/m2/libm2/RealInOut.def create mode 100644 lang/m2/libm2/Semaphores.def create mode 100644 lang/m2/libm2/Storage.def create mode 100644 lang/m2/libm2/Streams.def create mode 100644 lang/m2/libm2/Strings.def create mode 100644 lang/m2/libm2/StripUnix.def create mode 100644 lang/m2/libm2/Termcap.def create mode 100644 lang/m2/libm2/Terminal.def create mode 100644 lang/m2/libm2/Traps.def create mode 100644 lang/m2/libm2/Unix.def create mode 100644 lang/m2/libm2/XXTermcap.def create mode 100644 lang/m2/libm2/head_m2.e create mode 100644 lang/m2/libm2/proto.make create mode 100644 lang/m2/libm2/random.def create mode 100644 lang/m2/libm2/tail_m2.a create mode 100644 lang/m2/m2mm/LLlex.c create mode 100644 lang/m2/m2mm/LLlex.h create mode 100644 lang/m2/m2mm/LLmessage.c create mode 100644 lang/m2/m2mm/char.tab create mode 100644 lang/m2/m2mm/class.h create mode 100644 lang/m2/m2mm/declar.g create mode 100644 lang/m2/m2mm/error.c create mode 100644 lang/m2/m2mm/expression.g create mode 100644 lang/m2/m2mm/f_info.h create mode 100644 lang/m2/m2mm/file_list.h create mode 100644 lang/m2/m2mm/idf.c create mode 100644 lang/m2/m2mm/idf.h create mode 100644 lang/m2/m2mm/idfsize.h create mode 100644 lang/m2/m2mm/input.c create mode 100644 lang/m2/m2mm/input.h create mode 100644 lang/m2/m2mm/inputtype.h create mode 100644 lang/m2/m2mm/lib.c create mode 100644 lang/m2/m2mm/m2mm.1 create mode 100644 lang/m2/m2mm/main.c create mode 100644 lang/m2/m2mm/main.h create mode 100755 lang/m2/m2mm/make.tokcase create mode 100755 lang/m2/m2mm/make.tokfile create mode 100644 lang/m2/m2mm/misc.c create mode 100644 lang/m2/m2mm/options.c create mode 100644 lang/m2/m2mm/program.g create mode 100644 lang/m2/m2mm/proto.main create mode 100644 lang/m2/m2mm/proto.make create mode 100644 lang/m2/m2mm/statement.g create mode 100644 lang/m2/m2mm/tokenname.c create mode 100644 lang/m2/m2mm/tokenname.h create mode 100644 lang/m2/test/Thalmann/LifeGame.mod create mode 100644 lang/m2/test/Thalmann/Shoes.mod create mode 100644 lang/m2/test/Thalmann/StoreFetch.mod create mode 100644 lang/m2/test/Thalmann/bold.mod create mode 100644 lang/m2/test/Thalmann/characters.mod create mode 100644 lang/m2/test/Wirth/PowersOf2.mod create mode 100644 lang/m2/test/Wirth/TableHandl.def create mode 100644 lang/m2/test/Wirth/TableHandl.mod create mode 100644 lang/m2/test/Wirth/XREF.mod create mode 100644 lang/m2/test/getenv.mod create mode 100644 lang/m2/test/m2p.mod create mode 100644 lang/m2/test/queens.mod create mode 100644 lang/occam/comp/builtin.c create mode 100644 lang/occam/comp/code.c create mode 100644 lang/occam/comp/code.h create mode 100644 lang/occam/comp/em.c create mode 100644 lang/occam/comp/em.h create mode 100644 lang/occam/comp/expr.c create mode 100644 lang/occam/comp/expr.h create mode 100644 lang/occam/comp/keytab.c create mode 100644 lang/occam/comp/lex.l create mode 100644 lang/occam/comp/occam.g create mode 100644 lang/occam/comp/proto.main create mode 100644 lang/occam/comp/proto.make create mode 100644 lang/occam/comp/report.c create mode 100644 lang/occam/comp/sizes.h create mode 100644 lang/occam/comp/symtab.c create mode 100644 lang/occam/comp/symtab.h create mode 100644 lang/occam/comp/token.h create mode 100644 lang/occam/lib/LIST create mode 100644 lang/occam/lib/tail_ocm.a create mode 100644 lang/occam/test/Huffman.ocm create mode 100644 lang/occam/test/Makefile create mode 100644 lang/occam/test/READ_ME create mode 100644 lang/occam/test/aatob.ocm create mode 100644 lang/occam/test/copy.ocm create mode 100644 lang/occam/test/key.ocm create mode 100644 lang/occam/test/lifegame.ocm create mode 100644 lang/occam/test/matmul.ocm create mode 100644 lang/occam/test/sort.ocm create mode 100644 lang/occam/test/use_prnt.ocm create mode 100644 lang/occam/test/xxtoy.ocm create mode 100644 lang/pc/comp/LLlex.c create mode 100644 lang/pc/comp/LLlex.h create mode 100644 lang/pc/comp/LLmessage.c create mode 100644 lang/pc/comp/Parameters create mode 100644 lang/pc/comp/body.c create mode 100644 lang/pc/comp/casestat.C create mode 100644 lang/pc/comp/char.tab create mode 100644 lang/pc/comp/chk_expr.c create mode 100644 lang/pc/comp/chk_expr.h create mode 100644 lang/pc/comp/class.h create mode 100644 lang/pc/comp/code.c create mode 100644 lang/pc/comp/const.h create mode 100644 lang/pc/comp/cstoper.c create mode 100644 lang/pc/comp/debug.h create mode 100644 lang/pc/comp/declar.g create mode 100644 lang/pc/comp/def.H create mode 100644 lang/pc/comp/def.c create mode 100644 lang/pc/comp/desig.H create mode 100644 lang/pc/comp/desig.c create mode 100644 lang/pc/comp/em_pc.6 create mode 100644 lang/pc/comp/enter.c create mode 100644 lang/pc/comp/error.c create mode 100644 lang/pc/comp/expression.g create mode 100644 lang/pc/comp/f_info.h create mode 100644 lang/pc/comp/idf.c create mode 100644 lang/pc/comp/idf.h create mode 100644 lang/pc/comp/input.c create mode 100644 lang/pc/comp/input.h create mode 100644 lang/pc/comp/label.c create mode 100644 lang/pc/comp/lookup.c create mode 100644 lang/pc/comp/main.c create mode 100644 lang/pc/comp/main.h create mode 100755 lang/pc/comp/make.allocd create mode 100755 lang/pc/comp/make.hfiles create mode 100755 lang/pc/comp/make.next create mode 100755 lang/pc/comp/make.tokcase create mode 100755 lang/pc/comp/make.tokfile create mode 100644 lang/pc/comp/misc.c create mode 100644 lang/pc/comp/misc.h create mode 100644 lang/pc/comp/node.H create mode 100644 lang/pc/comp/node.c create mode 100644 lang/pc/comp/options create mode 100644 lang/pc/comp/options.c create mode 100644 lang/pc/comp/program.g create mode 100644 lang/pc/comp/progs.c create mode 100644 lang/pc/comp/proto.main create mode 100644 lang/pc/comp/proto.make create mode 100644 lang/pc/comp/readwrite.c create mode 100644 lang/pc/comp/required.h create mode 100644 lang/pc/comp/scope.H create mode 100644 lang/pc/comp/scope.c create mode 100644 lang/pc/comp/stab.c create mode 100644 lang/pc/comp/statement.g create mode 100644 lang/pc/comp/tmpvar.C create mode 100644 lang/pc/comp/tokenname.c create mode 100644 lang/pc/comp/tokenname.h create mode 100644 lang/pc/comp/type.H create mode 100644 lang/pc/comp/type.c create mode 100644 lang/pc/comp/typequiv.c create mode 100644 lang/pc/libpc/LIST create mode 100644 lang/pc/libpc/Makefile create mode 100644 lang/pc/libpc/READ_ME create mode 100644 lang/pc/libpc/head_pc.e create mode 100644 lang/pc/libpc/tail_pc.a create mode 100644 lang/pc/test/Makefile create mode 100644 lang/pc/test/b.p create mode 100644 lang/pc/test/callc.p create mode 100644 lang/pc/test/cmod.c create mode 100644 lang/pc/test/machar.p create mode 100644 lang/pc/test/t1.p create mode 100644 lang/pc/test/t2.p create mode 100644 lang/pc/test/t3.p create mode 100644 lang/pc/test/t4.p create mode 100644 lang/pc/test/t5.p create mode 100644 lang/pc/test/tstenc.p create mode 100644 lang/pc/test/tstgto.p create mode 100644 lib/6500/descr create mode 100644 lib/6800/descr create mode 100644 lib/6805/descr create mode 100644 lib/6809/descr create mode 100644 lib/arm/descr create mode 100644 lib/descr/cpm create mode 100644 lib/descr/fe create mode 100644 lib/em22/descr create mode 100644 lib/em24/descr create mode 100644 lib/em44/descr create mode 100644 lib/i386/descr create mode 100644 lib/i80/descr create mode 100644 lib/i86/descr create mode 100644 lib/m68020/descr create mode 100644 lib/m68k2/descr create mode 100644 lib/m68k4/descr create mode 100644 lib/mantra/descr create mode 100644 lib/minix/descr create mode 100644 lib/minix/include/ansi.h create mode 100644 lib/minix/include/errno.h create mode 100644 lib/minix/include/fcntl.h create mode 100644 lib/minix/include/lib.h create mode 100644 lib/minix/include/limits.h create mode 100644 lib/minix/include/minix/callnr.h create mode 100644 lib/minix/include/minix/com.h create mode 100644 lib/minix/include/minix/config.h create mode 100644 lib/minix/include/minix/const.h create mode 100644 lib/minix/include/minix/type.h create mode 100644 lib/minix/include/sgtty.h create mode 100644 lib/minix/include/signal.h create mode 100644 lib/minix/include/string.h create mode 100644 lib/minix/include/sys/errno.h create mode 100644 lib/minix/include/sys/stat.h create mode 100644 lib/minix/include/sys/times.h create mode 100644 lib/minix/include/sys/types.h create mode 100644 lib/minix/include/sys/wait.h create mode 100644 lib/minix/include/time.h create mode 100644 lib/minix/include/unistd.h create mode 100644 lib/minix/include/utime.h create mode 100644 lib/minixST/descr create mode 100644 lib/minixST/include/a.out.h create mode 100644 lib/minixST/include/minix/config.h create mode 100644 lib/ns/descr create mode 100644 lib/pdp/descr create mode 100644 lib/pmds/descr create mode 100644 lib/pmds4/descr create mode 100644 lib/s2650/descr create mode 100644 lib/sparc/descr create mode 100644 lib/sparc_solaris/descr create mode 100644 lib/sun2/descr create mode 100644 lib/sun3/descr create mode 100644 lib/vax4/descr create mode 100644 lib/xenix3/descr create mode 100644 lib/z80/descr create mode 100644 lib/z8000/descr create mode 100644 mach/6500/Action create mode 100644 mach/6500/as/READ_ME create mode 100644 mach/6500/as/mach0.c create mode 100644 mach/6500/as/mach1.c create mode 100644 mach/6500/as/mach2.c create mode 100644 mach/6500/as/mach3.c create mode 100644 mach/6500/as/mach4.c create mode 100644 mach/6500/as/mach5.c create mode 100644 mach/6500/cg/mach.c create mode 100644 mach/6500/cg/mach.h create mode 100644 mach/6500/cg/table create mode 100644 mach/6500/dl/dl.c create mode 100644 mach/6500/dl/proto.make create mode 100644 mach/6500/libem/LIST create mode 100644 mach/6500/libem/libem_s.a create mode 100644 mach/6500/libend/LIST create mode 100644 mach/6500/libend/end_s.a create mode 100644 mach/6500/libmon/LIST create mode 100644 mach/6500/libmon/head_em.s create mode 100644 mach/6500/libmon/libmon_s.a create mode 100644 mach/6500/mach_params create mode 100644 mach/6800/Action create mode 100644 mach/6800/as/mach0.c create mode 100644 mach/6800/as/mach1.c create mode 100644 mach/6800/as/mach2.c create mode 100644 mach/6800/as/mach3.c create mode 100644 mach/6800/as/mach4.c create mode 100644 mach/6800/as/mach5.c create mode 100644 mach/6805/Action create mode 100644 mach/6805/as/mach0.c create mode 100644 mach/6805/as/mach1.c create mode 100644 mach/6805/as/mach2.c create mode 100644 mach/6805/as/mach3.c create mode 100644 mach/6805/as/mach4.c create mode 100644 mach/6805/as/mach5.c create mode 100644 mach/6809/Action create mode 100644 mach/6809/as/mach0.c create mode 100644 mach/6809/as/mach1.c create mode 100644 mach/6809/as/mach2.c create mode 100644 mach/6809/as/mach3.c create mode 100644 mach/6809/as/mach4.c create mode 100644 mach/6809/as/mach5.c create mode 100644 mach/arm/Action create mode 100644 mach/arm/READ_ME create mode 100644 mach/arm/as/mach0.c create mode 100644 mach/arm/as/mach1.c create mode 100644 mach/arm/as/mach2.c create mode 100644 mach/arm/as/mach3.c create mode 100644 mach/arm/as/mach4.c create mode 100644 mach/arm/as/mach5.c create mode 100644 mach/arm/cv/arm.h create mode 100644 mach/arm/cv/cv.c create mode 100644 mach/arm/cv/proto.make create mode 100644 mach/arm/libem/LIST create mode 100644 mach/arm/libem/libem_s.a create mode 100644 mach/arm/libend/LIST create mode 100644 mach/arm/libend/end_s.a create mode 100644 mach/arm/libfp/byte_order.h create mode 100644 mach/arm/libmon/LIST create mode 100644 mach/arm/libmon/head_em.s create mode 100644 mach/arm/libmon/libmon_s.a create mode 100644 mach/arm/mach_params create mode 100644 mach/arm/ncg/mach.c create mode 100644 mach/arm/ncg/mach.h create mode 100644 mach/arm/ncg/table create mode 100644 mach/arm/top/table create mode 100644 mach/con_float create mode 100644 mach/em22/Action create mode 100644 mach/em22/libend/LIST create mode 100644 mach/em22/libend/end_e.a create mode 100644 mach/em22/mach_params create mode 100644 mach/em24/Action create mode 100644 mach/em24/libend/LIST create mode 100644 mach/em24/libend/end_e.a create mode 100644 mach/em24/mach_params create mode 100644 mach/em44/Action create mode 100644 mach/em44/libend/LIST create mode 100644 mach/em44/libend/end_e.a create mode 100644 mach/em44/mach_params create mode 100644 mach/i386/Action create mode 100644 mach/i386/as/mach0.c create mode 100644 mach/i386/as/mach1.c create mode 100644 mach/i386/as/mach2.c create mode 100644 mach/i386/as/mach3.c create mode 100644 mach/i386/as/mach4.c create mode 100644 mach/i386/as/mach5.c create mode 100644 mach/i386/ce/EM_table create mode 100644 mach/i386/ce/as.c create mode 100644 mach/i386/ce/as.h create mode 100644 mach/i386/ce/as_table create mode 100644 mach/i386/ce/mach.c create mode 100644 mach/i386/ce/mach.h create mode 100644 mach/i386/ce/proto.make create mode 100644 mach/i386/cv/cv.c create mode 100644 mach/i386/cv/proto.make create mode 100644 mach/i386/libdb/machdep.s create mode 100644 mach/i386/libem/LIST create mode 100644 mach/i386/libem/libem_s.a create mode 100644 mach/i386/libend/LIST create mode 100644 mach/i386/libend/end_s.a create mode 100644 mach/i386/libfp/byte_order.h create mode 100644 mach/i386/libsys/LIST create mode 100644 mach/i386/libsys/head_em.s create mode 100644 mach/i386/libsys/libmon_s.a create mode 100644 mach/i386/mach_params create mode 100644 mach/i386/ncg/mach.c create mode 100644 mach/i386/ncg/mach.h create mode 100644 mach/i386/ncg/table create mode 100644 mach/i80/Action create mode 100644 mach/i80/as/mach0.c create mode 100644 mach/i80/as/mach1.c create mode 100644 mach/i80/as/mach2.c create mode 100644 mach/i80/as/mach3.c create mode 100644 mach/i80/as/mach4.c create mode 100644 mach/i80/as/mach5.c create mode 100644 mach/i80/dl/README create mode 100644 mach/i80/dl/mccpm.c create mode 100644 mach/i80/dl/nascom.c create mode 100644 mach/i80/dl/proto.make create mode 100644 mach/i80/libem/LIST create mode 100644 mach/i80/libem/libem_s.a create mode 100644 mach/i80/libend/LIST create mode 100644 mach/i80/libend/end_s.a create mode 100644 mach/i80/libmon/LIST create mode 100644 mach/i80/libmon/README create mode 100644 mach/i80/libmon/char.nas.s create mode 100644 mach/i80/libmon/head_em.s create mode 100644 mach/i80/libmon/libmon_s.a create mode 100644 mach/i80/mach_params create mode 100644 mach/i80/ncg/mach.c create mode 100644 mach/i80/ncg/mach.h create mode 100644 mach/i80/ncg/table create mode 100644 mach/i86/Action create mode 100644 mach/i86/as/mach0.c create mode 100644 mach/i86/as/mach1.c create mode 100644 mach/i86/as/mach2.c create mode 100644 mach/i86/as/mach3.c create mode 100644 mach/i86/as/mach4.c create mode 100644 mach/i86/as/mach5.c create mode 100644 mach/i86/ce/EM_table create mode 100644 mach/i86/ce/as.c create mode 100644 mach/i86/ce/as.h create mode 100644 mach/i86/ce/as_table create mode 100644 mach/i86/ce/mach.c create mode 100644 mach/i86/ce/mach.h create mode 100644 mach/i86/ce/proto.make create mode 100644 mach/i86/cv/cv.c create mode 100644 mach/i86/cv/proto.make create mode 100644 mach/i86/libem/LIST create mode 100644 mach/i86/libem/libem_s.a create mode 100644 mach/i86/libend/LIST create mode 100644 mach/i86/libend/end_s.a create mode 100644 mach/i86/libfp/byte_order.h create mode 100644 mach/i86/libsys/LIST create mode 100644 mach/i86/libsys/head_em.s create mode 100644 mach/i86/libsys/libmon_s.a create mode 100644 mach/i86/mach_params create mode 100644 mach/i86/ncg/mach.c create mode 100644 mach/i86/ncg/mach.h create mode 100644 mach/i86/ncg/table create mode 100644 mach/m68020/Action create mode 100644 mach/m68020/as/Expect create mode 100644 mach/m68020/as/mach0.c create mode 100644 mach/m68020/as/mach1.c create mode 100644 mach/m68020/as/mach2.c create mode 100644 mach/m68020/as/mach3.c create mode 100644 mach/m68020/as/mach4.c create mode 100644 mach/m68020/as/mach5.c create mode 100644 mach/m68020/ce/EM_table create mode 100644 mach/m68020/ce/as.c create mode 100644 mach/m68020/ce/as.h create mode 100644 mach/m68020/ce/as_table create mode 100644 mach/m68020/ce/mach.c create mode 100644 mach/m68020/ce/mach.h create mode 100644 mach/m68020/ce/proto.make create mode 100644 mach/m68020/cv/cv.c create mode 100644 mach/m68020/cv/proto.make create mode 100644 mach/m68020/libdb/machdep.s create mode 100644 mach/m68020/libem/LIST create mode 100644 mach/m68020/libem/libem_s.a create mode 100644 mach/m68020/libend/LIST create mode 100644 mach/m68020/libend/end_s.a create mode 100644 mach/m68020/libfp/byte_order.h create mode 100644 mach/m68020/libsys/LIST create mode 100644 mach/m68020/libsys/README create mode 100644 mach/m68020/libsys/head_em.s create mode 100644 mach/m68020/libsys/libmon_s.a create mode 100644 mach/m68020/mach_params create mode 100644 mach/m68020/ncg/README create mode 100644 mach/m68020/ncg/instrmacs.h create mode 100644 mach/m68020/ncg/mach.c create mode 100644 mach/m68020/ncg/mach.h create mode 100644 mach/m68020/ncg/table create mode 100644 mach/m68020/ncg/whichone.h create mode 100644 mach/m68020/top/table create mode 100644 mach/m68k2/Action create mode 100644 mach/m68k2/README create mode 100644 mach/m68k2/Unisoft_bug create mode 100644 mach/m68k2/as/mach0.c create mode 100644 mach/m68k2/as/mach1.c create mode 100644 mach/m68k2/as/mach2.c create mode 100644 mach/m68k2/as/mach3.c create mode 100644 mach/m68k2/as/mach4.c create mode 100644 mach/m68k2/as/mach5.c create mode 100644 mach/m68k2/cv/cv.c create mode 100644 mach/m68k2/cv/proto.make create mode 100644 mach/m68k2/dl/dl.c create mode 100644 mach/m68k2/dl/proto.make create mode 100644 mach/m68k2/libem/LIST create mode 100644 mach/m68k2/libem/READ_ME create mode 100644 mach/m68k2/libem/libem_s.a create mode 100644 mach/m68k2/libend/LIST create mode 100644 mach/m68k2/libend/end_s.a create mode 100644 mach/m68k2/libfp/byte_order.h create mode 100644 mach/m68k2/libsys/LIST create mode 100644 mach/m68k2/libsys/head_em.s create mode 100644 mach/m68k2/libsys/libmon_s.a create mode 100644 mach/m68k2/mach_params create mode 100644 mach/m68k2/ncg/table_dir create mode 100644 mach/m68k2/ncg/whichone.h create mode 100644 mach/m68k2/top/table create mode 100644 mach/m68k4/Action create mode 100644 mach/m68k4/libem/LIST create mode 100644 mach/m68k4/libem/libem_s.a create mode 100644 mach/m68k4/libend/LIST create mode 100644 mach/m68k4/libend/end_s.a create mode 100644 mach/m68k4/libfp/byte_order.h create mode 100644 mach/m68k4/libsys/LIST create mode 100644 mach/m68k4/libsys/head_em.s create mode 100644 mach/m68k4/libsys/libmon_s.a create mode 100644 mach/m68k4/mach_params create mode 100644 mach/m68k4/ncg/table_dir create mode 100644 mach/m68k4/ncg/whichone.h create mode 100644 mach/mantra/Action create mode 100644 mach/mantra/cv/Xcv.c create mode 100644 mach/mantra/cv/cv.c create mode 100644 mach/mantra/cv/proto.make create mode 100644 mach/mantra/int/con_float.c create mode 100644 mach/mantra/int/copyright create mode 100644 mach/mantra/int/deffile create mode 100644 mach/mantra/int/em.1 create mode 100644 mach/mantra/int/em.c create mode 100644 mach/mantra/int/mloop0 create mode 100644 mach/mantra/int/mloop1 create mode 100644 mach/mantra/int/mloop2 create mode 100644 mach/mantra/int/mloop3 create mode 100644 mach/mantra/int/mloop4 create mode 100644 mach/mantra/int/mloop5 create mode 100644 mach/mantra/int/mloop6 create mode 100644 mach/mantra/int/mloop7 create mode 100644 mach/mantra/int/mloop8 create mode 100644 mach/mantra/int/mloop9 create mode 100644 mach/mantra/int/mloopa create mode 100644 mach/mantra/int/mloopb create mode 100644 mach/mantra/int/mloopc create mode 100644 mach/mantra/int/proto.make create mode 100644 mach/mantra/libsys/LIST create mode 100644 mach/mantra/libsys/head_em.s create mode 100644 mach/mantra/libsys/libmon_s.a create mode 100644 mach/mantra/mach_params create mode 100644 mach/minix/Action create mode 100644 mach/minix/libsys/LIST create mode 100644 mach/minix/libsys/head_em.s create mode 100644 mach/minix/libsys/libmon_s.a create mode 100644 mach/minix/mach_params create mode 100644 mach/minixST/Action create mode 100644 mach/minixST/cv/cv.c create mode 100644 mach/minixST/cv/proto.make create mode 100644 mach/minixST/libsys/LIST create mode 100644 mach/minixST/libsys/head_em.s create mode 100644 mach/minixST/libsys/libmon_s.a create mode 100644 mach/minixST/mach_params create mode 100644 mach/ns/Action create mode 100644 mach/ns/as/mach0.c create mode 100644 mach/ns/as/mach1.c create mode 100644 mach/ns/as/mach2.c create mode 100644 mach/ns/as/mach3.c create mode 100644 mach/ns/as/mach4.c create mode 100644 mach/ns/as/mach5.c create mode 100644 mach/ns/libem/LIST create mode 100644 mach/ns/libem/libem_s.a create mode 100644 mach/ns/libend/LIST create mode 100644 mach/ns/libend/end_s.a create mode 100644 mach/ns/libmon/LIST create mode 100644 mach/ns/libmon/head_em.s create mode 100644 mach/ns/libmon/libmon_s.a create mode 100644 mach/ns/mach_params create mode 100644 mach/ns/ncg/mach.c create mode 100644 mach/ns/ncg/mach.h create mode 100644 mach/ns/ncg/table create mode 100644 mach/pdp/Action create mode 100644 mach/pdp/as/mach0.c create mode 100644 mach/pdp/as/mach1.c create mode 100644 mach/pdp/as/mach2.c create mode 100644 mach/pdp/as/mach3.c create mode 100644 mach/pdp/as/mach4.c create mode 100644 mach/pdp/as/mach5.c create mode 100644 mach/pdp/cg/mach.c create mode 100644 mach/pdp/cg/mach.h create mode 100644 mach/pdp/cg/table create mode 100644 mach/pdp/cv/cv.c create mode 100644 mach/pdp/cv/proto.make create mode 100644 mach/pdp/int/README create mode 100644 mach/pdp/int/c+ create mode 100644 mach/pdp/int/c- create mode 100644 mach/pdp/int/em.1 create mode 100644 mach/pdp/int/em.c create mode 100644 mach/pdp/int/em_int.s create mode 100644 mach/pdp/int/eminform.1 create mode 100644 mach/pdp/int/eminform.s create mode 100644 mach/pdp/int/f+ create mode 100644 mach/pdp/int/f- create mode 100644 mach/pdp/int/p+ create mode 100644 mach/pdp/int/p- create mode 100644 mach/pdp/int/proto.make create mode 100644 mach/pdp/int/t+ create mode 100644 mach/pdp/int/t- create mode 100644 mach/pdp/libem/LIST create mode 100644 mach/pdp/libem/libem_s.a create mode 100644 mach/pdp/libend/LIST create mode 100644 mach/pdp/libend/end_s.a create mode 100644 mach/pdp/libsys/LIST create mode 100644 mach/pdp/libsys/head_em.s create mode 100644 mach/pdp/libsys/libmon_s.a create mode 100644 mach/pdp/libsys/sys.h create mode 100644 mach/pdp/mach_params create mode 100644 mach/pdp/top/table create mode 100644 mach/pmds/Action create mode 100644 mach/pmds/cv/cv.c create mode 100644 mach/pmds/cv/proto.make create mode 100644 mach/pmds/libsys/LIST create mode 100644 mach/pmds/libsys/head_em.s create mode 100644 mach/pmds/libsys/libmon_s.a create mode 100644 mach/pmds/mach_params create mode 100644 mach/pmds4/Action create mode 100644 mach/pmds4/libsys/LIST create mode 100644 mach/pmds4/libsys/head_em.s create mode 100644 mach/pmds4/libsys/libmon_s.a create mode 100644 mach/pmds4/mach_params create mode 100644 mach/proto/as/comm0.h create mode 100644 mach/proto/as/comm1.h create mode 100644 mach/proto/as/comm2.y create mode 100644 mach/proto/as/comm3.c create mode 100644 mach/proto/as/comm4.c create mode 100644 mach/proto/as/comm5.c create mode 100644 mach/proto/as/comm6.c create mode 100644 mach/proto/as/comm7.c create mode 100644 mach/proto/as/comm8.c create mode 100644 mach/proto/as/proto.make create mode 100644 mach/proto/cg/assert.h create mode 100644 mach/proto/cg/codegen.c create mode 100644 mach/proto/cg/compute.c create mode 100644 mach/proto/cg/data.h create mode 100644 mach/proto/cg/equiv.c create mode 100644 mach/proto/cg/equiv.h create mode 100644 mach/proto/cg/extern.h create mode 100644 mach/proto/cg/fillem.c create mode 100644 mach/proto/cg/gencode.c create mode 100644 mach/proto/cg/glosym.c create mode 100644 mach/proto/cg/glosym.h create mode 100644 mach/proto/cg/main.c create mode 100644 mach/proto/cg/move.c create mode 100644 mach/proto/cg/nextem.c create mode 100644 mach/proto/cg/param.h create mode 100644 mach/proto/cg/proto.make create mode 100644 mach/proto/cg/reg.c create mode 100644 mach/proto/cg/regvar.c create mode 100644 mach/proto/cg/regvar.h create mode 100644 mach/proto/cg/result.h create mode 100644 mach/proto/cg/salloc.c create mode 100644 mach/proto/cg/state.c create mode 100644 mach/proto/cg/state.h create mode 100644 mach/proto/cg/subr.c create mode 100644 mach/proto/cg/types.h create mode 100644 mach/proto/cg/var.c create mode 100644 mach/proto/fp/FP.script create mode 100644 mach/proto/fp/FP_bias.h create mode 100644 mach/proto/fp/FP_shift.h create mode 100644 mach/proto/fp/FP_trap.h create mode 100644 mach/proto/fp/FP_types.h create mode 100644 mach/proto/fp/add_ext.c create mode 100644 mach/proto/fp/adder.c create mode 100644 mach/proto/fp/adf4.c create mode 100644 mach/proto/fp/adf8.c create mode 100644 mach/proto/fp/cff4.c create mode 100644 mach/proto/fp/cff8.c create mode 100644 mach/proto/fp/cfi.c create mode 100644 mach/proto/fp/cfu.c create mode 100644 mach/proto/fp/cif4.c create mode 100644 mach/proto/fp/cif8.c create mode 100644 mach/proto/fp/cmf4.c create mode 100644 mach/proto/fp/cmf8.c create mode 100644 mach/proto/fp/compact.c create mode 100644 mach/proto/fp/cuf4.c create mode 100644 mach/proto/fp/cuf8.c create mode 100644 mach/proto/fp/div_ext.c create mode 100644 mach/proto/fp/dvf4.c create mode 100644 mach/proto/fp/dvf8.c create mode 100644 mach/proto/fp/extend.c create mode 100644 mach/proto/fp/fef4.c create mode 100644 mach/proto/fp/fef8.c create mode 100644 mach/proto/fp/fif4.c create mode 100644 mach/proto/fp/fif8.c create mode 100644 mach/proto/fp/fptrp.e create mode 100644 mach/proto/fp/get_put.h create mode 100644 mach/proto/fp/mlf4.c create mode 100644 mach/proto/fp/mlf8.c create mode 100644 mach/proto/fp/mul_ext.c create mode 100644 mach/proto/fp/ngf4.c create mode 100644 mach/proto/fp/ngf8.c create mode 100644 mach/proto/fp/nrm_ext.c create mode 100644 mach/proto/fp/sbf4.c create mode 100644 mach/proto/fp/sbf8.c create mode 100644 mach/proto/fp/sft_ext.c create mode 100644 mach/proto/fp/shifter.c create mode 100644 mach/proto/fp/sub_ext.c create mode 100644 mach/proto/fp/zrf4.c create mode 100644 mach/proto/fp/zrf8.c create mode 100644 mach/proto/fp/zrf_ext.c create mode 100644 mach/proto/grind/READ_ME create mode 100644 mach/proto/grind/atlin.c create mode 100644 mach/proto/grind/lib.e create mode 100644 mach/proto/grind/par_misc.e create mode 100644 mach/proto/libg/barrier.c create mode 100755 mach/proto/libg/compmodule create mode 100644 mach/proto/libg/proto.libbc create mode 100644 mach/proto/libg/proto.libcc create mode 100644 mach/proto/libg/proto.libcc.ansi create mode 100644 mach/proto/libg/proto.libdb create mode 100644 mach/proto/libg/proto.libem create mode 100644 mach/proto/libg/proto.libend create mode 100644 mach/proto/libg/proto.libf77 create mode 100644 mach/proto/libg/proto.libfp create mode 100644 mach/proto/libg/proto.libm2 create mode 100644 mach/proto/libg/proto.libmon create mode 100644 mach/proto/libg/proto.liboc create mode 100644 mach/proto/libg/proto.libpc create mode 100644 mach/proto/libg/proto.libsys create mode 100644 mach/proto/libg/proto.make create mode 100644 mach/proto/libg/proto.sysmon create mode 100644 mach/proto/ncg/assert.h create mode 100644 mach/proto/ncg/codegen.c create mode 100644 mach/proto/ncg/compute.c create mode 100644 mach/proto/ncg/data.h create mode 100644 mach/proto/ncg/equiv.c create mode 100644 mach/proto/ncg/equiv.h create mode 100644 mach/proto/ncg/extern.h create mode 100644 mach/proto/ncg/fillem.c create mode 100644 mach/proto/ncg/gencode.c create mode 100644 mach/proto/ncg/glosym.c create mode 100644 mach/proto/ncg/glosym.h create mode 100644 mach/proto/ncg/label.c create mode 100644 mach/proto/ncg/label.h create mode 100644 mach/proto/ncg/main.c create mode 100644 mach/proto/ncg/move.c create mode 100644 mach/proto/ncg/nextem.c create mode 100644 mach/proto/ncg/param.h create mode 100644 mach/proto/ncg/proto.make create mode 100644 mach/proto/ncg/reg.c create mode 100644 mach/proto/ncg/regvar.c create mode 100644 mach/proto/ncg/regvar.h create mode 100644 mach/proto/ncg/result.h create mode 100644 mach/proto/ncg/salloc.c create mode 100644 mach/proto/ncg/state.c create mode 100644 mach/proto/ncg/state.h create mode 100644 mach/proto/ncg/subr.c create mode 100644 mach/proto/ncg/types.h create mode 100644 mach/proto/ncg/var.c create mode 100644 mach/proto/top/proto.make create mode 100644 mach/proto/top/queue.c create mode 100644 mach/proto/top/queue.h create mode 100644 mach/proto/top/top.c create mode 100644 mach/proto/top/top.h create mode 100644 mach/s2650/Action create mode 100644 mach/s2650/as/mach0.c create mode 100644 mach/s2650/as/mach1.c create mode 100644 mach/s2650/as/mach2.c create mode 100644 mach/s2650/as/mach3.c create mode 100644 mach/s2650/as/mach4.c create mode 100644 mach/s2650/as/mach5.c create mode 100644 mach/sparc/Action create mode 100644 mach/sparc/ce/EM_table create mode 100644 mach/sparc/ce/EM_table.x create mode 100644 mach/sparc/ce/Makefile create mode 100644 mach/sparc/ce/back.src/Makefile create mode 100644 mach/sparc/ce/back.src/back.h create mode 100644 mach/sparc/ce/back.src/con_str.c create mode 100644 mach/sparc/ce/back.src/const.h create mode 100644 mach/sparc/ce/back.src/do_open.c create mode 100644 mach/sparc/ce/back.src/gen_str.c create mode 100644 mach/sparc/ce/back.src/header.h create mode 100644 mach/sparc/ce/back.src/rom_str.c create mode 100644 mach/sparc/ce/back.src/symboldef.c create mode 100644 mach/sparc/ce/cache.c create mode 100644 mach/sparc/ce/cache.c.x create mode 100644 mach/sparc/ce/ce.src/C_con_scon.c create mode 100644 mach/sparc/ce/ce.src/C_cst.c create mode 100644 mach/sparc/ce/ce.src/C_dlb.c create mode 100644 mach/sparc/ce/ce.src/C_dnam.c create mode 100644 mach/sparc/ce/ce.src/C_exa_dnam.c create mode 100644 mach/sparc/ce/ce.src/C_ilb.c create mode 100644 mach/sparc/ce/ce.src/C_mes_begin.c create mode 100644 mach/sparc/ce/ce.src/C_mes_end.c create mode 100644 mach/sparc/ce/ce.src/C_pnam.c create mode 100644 mach/sparc/ce/ce.src/C_pro.c create mode 100644 mach/sparc/ce/ce.src/C_rom_scon.c create mode 100644 mach/sparc/ce/ce.src/C_scon.c create mode 100644 mach/sparc/ce/ce.src/misc.c create mode 100644 mach/sparc/ce/ce.src/ms_reg.c create mode 100644 mach/sparc/ce/cegpp create mode 100644 mach/sparc/ce/mach.c create mode 100644 mach/sparc/ce/mach.h create mode 100644 mach/sparc/ce/mach_em.h create mode 100644 mach/sparc/ce/misc.h create mode 100644 mach/sparc/ce/ms_reg.h create mode 100644 mach/sparc/ce/proto.make create mode 100644 mach/sparc/ce/push_pop.h create mode 100644 mach/sparc/ce_cg/convert.c create mode 100644 mach/sparc/ce_cg/proto.make create mode 100644 mach/sparc/libdb/machdep.s create mode 100644 mach/sparc/libem/LIST create mode 100644 mach/sparc/libem/libem_s.a create mode 100644 mach/sparc/libend/LIST create mode 100644 mach/sparc/libend/end_s.a create mode 100644 mach/sparc/libsys/LIST create mode 100644 mach/sparc/libsys/SYS.h create mode 100644 mach/sparc/libsys/head_em.s create mode 100644 mach/sparc/libsys/libmon_s.a create mode 100644 mach/sparc/libsys/syscall.h create mode 100644 mach/sparc/mach_params create mode 100644 mach/sparc/top/table create mode 100644 mach/sparc_solaris/Action create mode 100644 mach/sparc_solaris/ce/proto.make create mode 100644 mach/sparc_solaris/ce_cg/proto.make create mode 100644 mach/sparc_solaris/libdb/machdep.s create mode 100644 mach/sparc_solaris/libem/LIST create mode 100644 mach/sparc_solaris/libem/Makefile create mode 100644 mach/sparc_solaris/libem/READ_ME create mode 100644 mach/sparc_solaris/libem/libem_s.a create mode 100644 mach/sparc_solaris/libend/LIST create mode 100644 mach/sparc_solaris/libend/end_s.a create mode 100644 mach/sparc_solaris/libsys/LIST create mode 100644 mach/sparc_solaris/libsys/SYS.h create mode 100644 mach/sparc_solaris/libsys/head_em.s create mode 100644 mach/sparc_solaris/libsys/libmon_s.a create mode 100644 mach/sparc_solaris/mach_params create mode 100644 mach/sun2/Action create mode 100644 mach/sun2/cv/proto.make create mode 100644 mach/sun2/libsys/LIST create mode 100644 mach/sun2/libsys/head_em.s create mode 100644 mach/sun2/libsys/libmon_s.a create mode 100644 mach/sun2/libsys/syscall.h create mode 100644 mach/sun2/mach_params create mode 100644 mach/sun3/Action create mode 100644 mach/sun3/ce/EM_table create mode 100644 mach/sun3/ce/Make.back create mode 100644 mach/sun3/ce/as.c create mode 100644 mach/sun3/ce/as.h create mode 100644 mach/sun3/ce/as_table create mode 100644 mach/sun3/ce/do_close.c create mode 100644 mach/sun3/ce/do_open.c create mode 100644 mach/sun3/ce/end_back.c create mode 100644 mach/sun3/ce/mach.c create mode 100644 mach/sun3/ce/mach.h create mode 100644 mach/sun3/ce/misc.c create mode 100644 mach/sun3/ce/output.c create mode 100644 mach/sun3/ce/proto.make create mode 100644 mach/sun3/ce/relocation.c create mode 100644 mach/sun3/cv/Xcv.c create mode 100644 mach/sun3/cv/cv.c create mode 100644 mach/sun3/cv/proto.make create mode 100644 mach/sun3/libce/adf4.s create mode 100644 mach/sun3/libce/adf8.s create mode 100644 mach/sun3/libce/cff.s create mode 100644 mach/sun3/libce/cfi.s create mode 100644 mach/sun3/libce/cfu.s create mode 100644 mach/sun3/libce/cif.s create mode 100644 mach/sun3/libce/cmf4.s create mode 100644 mach/sun3/libce/cmf8.s create mode 100644 mach/sun3/libce/cuf.s create mode 100644 mach/sun3/libce/dvf4.s create mode 100644 mach/sun3/libce/dvf8.s create mode 100644 mach/sun3/libce/fef4.s create mode 100644 mach/sun3/libce/fef8.s create mode 100644 mach/sun3/libce/fif4.s create mode 100644 mach/sun3/libce/fif8.s create mode 100644 mach/sun3/libce/head_em.s create mode 100644 mach/sun3/libce/mlf4.s create mode 100644 mach/sun3/libce/mlf8.s create mode 100644 mach/sun3/libce/proto.make create mode 100644 mach/sun3/libce/sbf4.s create mode 100644 mach/sun3/libce/sbf8.s create mode 100644 mach/sun3/libce/sys.s create mode 100644 mach/sun3/libce/vars.s create mode 100644 mach/sun3/libsys/LIST create mode 100644 mach/sun3/libsys/head_em.s create mode 100644 mach/sun3/libsys/libmon_s.a create mode 100644 mach/sun3/libsys/syscall.h create mode 100644 mach/sun3/mach_params create mode 100644 mach/vax4/Action create mode 100644 mach/vax4/as/mach0.c create mode 100644 mach/vax4/as/mach1.c create mode 100644 mach/vax4/as/mach2.c create mode 100644 mach/vax4/as/mach3.c create mode 100644 mach/vax4/as/mach4.c create mode 100644 mach/vax4/as/mach5.c create mode 100644 mach/vax4/ce/EM_table create mode 100644 mach/vax4/ce/Make.back create mode 100644 mach/vax4/ce/as.c create mode 100644 mach/vax4/ce/as.h create mode 100644 mach/vax4/ce/as_table create mode 100644 mach/vax4/ce/do_close.c create mode 100644 mach/vax4/ce/do_open.c create mode 100644 mach/vax4/ce/end_back.c create mode 100644 mach/vax4/ce/mach.c create mode 100644 mach/vax4/ce/mach.h create mode 100644 mach/vax4/ce/output.c create mode 100644 mach/vax4/ce/proto.make create mode 100644 mach/vax4/ce/relocation.c create mode 100644 mach/vax4/cg/mach.c create mode 100644 mach/vax4/cg/mach.h create mode 100644 mach/vax4/cg/table create mode 100644 mach/vax4/cv/cv.c create mode 100644 mach/vax4/cv/proto.make create mode 100644 mach/vax4/libbsd4_1a/LIST create mode 100644 mach/vax4/libbsd4_1a/head_em.s create mode 100644 mach/vax4/libbsd4_1a/libmon_s.a create mode 100644 mach/vax4/libbsd4_2/LIST create mode 100644 mach/vax4/libbsd4_2/head_em.s create mode 100644 mach/vax4/libbsd4_2/libmon_s.a create mode 100644 mach/vax4/libbsd4_2/syscall.h create mode 100644 mach/vax4/libce/proto.make create mode 100644 mach/vax4/libce/sys.s create mode 100644 mach/vax4/libdb/machdep.s create mode 100644 mach/vax4/libem/LIST create mode 100644 mach/vax4/libem/libem_s.a create mode 100644 mach/vax4/libend/LIST create mode 100644 mach/vax4/libend/end_s.a create mode 100644 mach/vax4/libsysV_2/LIST create mode 100644 mach/vax4/libsysV_2/head_em.s create mode 100644 mach/vax4/libsysV_2/libmon_s.a create mode 100644 mach/vax4/mach_params create mode 100644 mach/vax4/top/table create mode 100644 mach/xenix3/Action create mode 100644 mach/xenix3/cv/chstack.c create mode 100644 mach/xenix3/cv/cv.c create mode 100644 mach/xenix3/cv/proto.make create mode 100644 mach/xenix3/libsys/LIST create mode 100644 mach/xenix3/libsys/head_em.s create mode 100644 mach/xenix3/libsys/libmon_s.a create mode 100644 mach/xenix3/mach_params create mode 100644 mach/z80/Action create mode 100644 mach/z80/as/mach0.c create mode 100644 mach/z80/as/mach1.c create mode 100644 mach/z80/as/mach2.c create mode 100644 mach/z80/as/mach3.c create mode 100644 mach/z80/as/mach4.c create mode 100644 mach/z80/as/mach5.c create mode 100644 mach/z80/cg/mach.c create mode 100644 mach/z80/cg/mach.h create mode 100644 mach/z80/cg/table create mode 100644 mach/z80/int/READ_ME create mode 100644 mach/z80/int/atof.s create mode 100644 mach/z80/int/cv.c create mode 100644 mach/z80/int/dl.c create mode 100755 mach/z80/int/dosort create mode 100644 mach/z80/int/dvi4.s create mode 100644 mach/z80/int/dvu4.s create mode 100644 mach/z80/int/eb.s create mode 100644 mach/z80/int/em.s create mode 100644 mach/z80/int/em22 create mode 100644 mach/z80/int/fpp.s create mode 100644 mach/z80/int/mli4.s create mode 100644 mach/z80/int/proto.make create mode 100644 mach/z80/libem/LIST create mode 100644 mach/z80/libem/libem_s.a create mode 100644 mach/z80/libend/LIST create mode 100644 mach/z80/libend/end_s.a create mode 100644 mach/z80/libmon/LIST create mode 100644 mach/z80/libmon/README create mode 100644 mach/z80/libmon/char.her.s create mode 100644 mach/z80/libmon/char.nas.s create mode 100644 mach/z80/libmon/head_em.s create mode 100644 mach/z80/libmon/libmon_s.a create mode 100644 mach/z80/libmon/mon.cpm.s create mode 100644 mach/z80/libmon/mon.s create mode 100644 mach/z80/mach_params create mode 100644 mach/z8000/Action create mode 100644 mach/z8000/as/README create mode 100644 mach/z8000/as/mach0.c create mode 100644 mach/z8000/as/mach1.c create mode 100644 mach/z8000/as/mach2.c create mode 100644 mach/z8000/as/mach3.c create mode 100644 mach/z8000/as/mach4.c create mode 100644 mach/z8000/as/mach5.c create mode 100644 mach/z8000/cg/mach.c create mode 100644 mach/z8000/cg/mach.h create mode 100644 mach/z8000/cg/table create mode 100644 mach/z8000/libem/LIST create mode 100644 mach/z8000/libem/libem_s.a create mode 100644 mach/z8000/libend/LIST create mode 100644 mach/z8000/libend/end_s.a create mode 100644 mach/z8000/libmon/LIST create mode 100644 mach/z8000/libmon/head_em.s create mode 100644 mach/z8000/libmon/libmon_s.a create mode 100644 mach/z8000/mach_params create mode 100644 man/6500_as.6 create mode 100644 man/6800_as.6 create mode 100644 man/6805_as.6 create mode 100644 man/6809_as.6 create mode 100644 man/8080_as.6 create mode 100644 man/em_cg.6 create mode 100644 man/em_ncg.6 create mode 100644 man/head create mode 100644 man/i386_as.6 create mode 100644 man/i86_as.6 create mode 100644 man/libmon.7 create mode 100644 man/libpc.7 create mode 100644 man/m68k2_as.6 create mode 100644 man/ns_as.6 create mode 100644 man/pc_prlib.7 create mode 100644 man/pdp_as.6 create mode 100644 man/proto.make create mode 100644 man/uni_ass.6 create mode 100644 man/z8000_as.6 create mode 100644 man/z80_as.6 create mode 100644 modules/h/ansi.h create mode 100644 modules/h/em.h create mode 100644 modules/h/emO_code.h create mode 100644 modules/h/em_arith.h create mode 100644 modules/h/em_code.h create mode 100644 modules/h/em_codeCE.h create mode 100644 modules/h/em_codeO.h create mode 100644 modules/h/em_label.h create mode 100644 modules/h/em_mesX.h create mode 100644 modules/h/proto.make create mode 100644 modules/src/Action create mode 100644 modules/src/Action.lint create mode 100644 modules/src/alloc/Malloc.c create mode 100644 modules/src/alloc/No_Mem.c create mode 100644 modules/src/alloc/Realloc.c create mode 100644 modules/src/alloc/Salloc.c create mode 100644 modules/src/alloc/Srealloc.c create mode 100644 modules/src/alloc/alloc.3 create mode 100644 modules/src/alloc/alloc.h create mode 100644 modules/src/alloc/botch.c create mode 100644 modules/src/alloc/clear.c create mode 100644 modules/src/alloc/proto.make create mode 100644 modules/src/alloc/st_alloc.c create mode 100644 modules/src/alloc/std_alloc.c create mode 100644 modules/src/assert/BadAssert.c create mode 100644 modules/src/assert/assert.3 create mode 100644 modules/src/assert/assert.h create mode 100644 modules/src/assert/proto.make create mode 100644 modules/src/em_code/C_out.c create mode 100644 modules/src/em_code/bhcst.c create mode 100644 modules/src/em_code/bhdlb.c create mode 100644 modules/src/em_code/bhdnam.c create mode 100644 modules/src/em_code/bhfcon.c create mode 100644 modules/src/em_code/bhicon.c create mode 100644 modules/src/em_code/bhilb.c create mode 100644 modules/src/em_code/bhpnam.c create mode 100644 modules/src/em_code/bhucon.c create mode 100644 modules/src/em_code/convert.c create mode 100644 modules/src/em_code/crcst.c create mode 100644 modules/src/em_code/crdlb.c create mode 100644 modules/src/em_code/crdnam.c create mode 100644 modules/src/em_code/crilb.c create mode 100644 modules/src/em_code/crpnam.c create mode 100644 modules/src/em_code/crscon.c create mode 100644 modules/src/em_code/crxcon.c create mode 100644 modules/src/em_code/cst.c create mode 100644 modules/src/em_code/dfdlb.c create mode 100644 modules/src/em_code/dfdnam.c create mode 100644 modules/src/em_code/dfilb.c create mode 100644 modules/src/em_code/dlb.c create mode 100644 modules/src/em_code/dnam.c create mode 100644 modules/src/em_code/em.c create mode 100644 modules/src/em_code/em.nogen create mode 100644 modules/src/em_code/em_code.3X create mode 100644 modules/src/em_code/em_private.h create mode 100644 modules/src/em_code/end.c create mode 100644 modules/src/em_code/endarg.c create mode 100644 modules/src/em_code/exc.c create mode 100644 modules/src/em_code/failed.c create mode 100644 modules/src/em_code/fcon.c create mode 100644 modules/src/em_code/getid.c create mode 100644 modules/src/em_code/icon.c create mode 100644 modules/src/em_code/ilb.c create mode 100644 modules/src/em_code/insert.c create mode 100644 modules/src/em_code/insert.h create mode 100644 modules/src/em_code/internerr.c create mode 100755 modules/src/em_code/make.em.gen create mode 100755 modules/src/em_code/make.sh create mode 100644 modules/src/em_code/msend.c create mode 100644 modules/src/em_code/msstart.c create mode 100644 modules/src/em_code/op.c create mode 100644 modules/src/em_code/opcst.c create mode 100644 modules/src/em_code/opdlb.c create mode 100644 modules/src/em_code/opdnam.c create mode 100644 modules/src/em_code/opilb.c create mode 100644 modules/src/em_code/opnarg.c create mode 100644 modules/src/em_code/oppnam.c create mode 100644 modules/src/em_code/pnam.c create mode 100644 modules/src/em_code/pro.c create mode 100644 modules/src/em_code/pronarg.c create mode 100644 modules/src/em_code/proto.make create mode 100644 modules/src/em_code/psdlb.c create mode 100644 modules/src/em_code/psdnam.c create mode 100644 modules/src/em_code/pspnam.c create mode 100644 modules/src/em_code/scon.c create mode 100644 modules/src/em_code/ucon.c create mode 100644 modules/src/em_mes/C_ms_com.c create mode 100644 modules/src/em_mes/C_ms_ego.c create mode 100644 modules/src/em_mes/C_ms_emx.c create mode 100644 modules/src/em_mes/C_ms_err.c create mode 100644 modules/src/em_mes/C_ms_flt.c create mode 100644 modules/src/em_mes/C_ms_gto.c create mode 100644 modules/src/em_mes/C_ms_opt.c create mode 100644 modules/src/em_mes/C_ms_par.c create mode 100644 modules/src/em_mes/C_ms_reg.c create mode 100644 modules/src/em_mes/C_ms_src.c create mode 100644 modules/src/em_mes/C_ms_stb.c create mode 100644 modules/src/em_mes/C_ms_std.c create mode 100644 modules/src/em_mes/em_mes.3 create mode 100644 modules/src/em_mes/proto.make create mode 100644 modules/src/em_opt/aux.c create mode 100644 modules/src/em_opt/em_codeO.h create mode 100644 modules/src/em_opt/em_nopt.6 create mode 100644 modules/src/em_opt/em_opt.3 create mode 100644 modules/src/em_opt/findworst.c create mode 100644 modules/src/em_opt/initlex.c create mode 100644 modules/src/em_opt/main.c create mode 100644 modules/src/em_opt/makefuns.awk create mode 100644 modules/src/em_opt/mkstrct.c create mode 100644 modules/src/em_opt/nopt.c create mode 100644 modules/src/em_opt/nopt.h create mode 100644 modules/src/em_opt/outcalls.c create mode 100644 modules/src/em_opt/outputdfa.c create mode 100644 modules/src/em_opt/parser.g create mode 100644 modules/src/em_opt/parser.h create mode 100644 modules/src/em_opt/patterns create mode 100644 modules/src/em_opt/proto.make create mode 100644 modules/src/em_opt/pseudo.r create mode 100644 modules/src/em_opt/syntax.l create mode 100644 modules/src/flt_arith/b64_add.c create mode 100644 modules/src/flt_arith/b64_sft.c create mode 100644 modules/src/flt_arith/flt_add.c create mode 100644 modules/src/flt_arith/flt_ar2flt.c create mode 100644 modules/src/flt_arith/flt_arith.3 create mode 100644 modules/src/flt_arith/flt_arith.h create mode 100644 modules/src/flt_arith/flt_chk.c create mode 100644 modules/src/flt_arith/flt_cmp.c create mode 100644 modules/src/flt_arith/flt_div.c create mode 100644 modules/src/flt_arith/flt_flt2ar.c create mode 100644 modules/src/flt_arith/flt_misc.h create mode 100644 modules/src/flt_arith/flt_modf.c create mode 100644 modules/src/flt_arith/flt_mul.c create mode 100644 modules/src/flt_arith/flt_nrm.c create mode 100644 modules/src/flt_arith/flt_str2fl.c create mode 100644 modules/src/flt_arith/flt_umin.c create mode 100644 modules/src/flt_arith/proto.make create mode 100644 modules/src/flt_arith/split.c create mode 100644 modules/src/flt_arith/test.c create mode 100644 modules/src/flt_arith/ucmp.c create mode 100644 modules/src/idf/idf.3 create mode 100644 modules/src/idf/idf_pkg.body create mode 100644 modules/src/idf/idf_pkg.spec create mode 100644 modules/src/idf/proto.make create mode 100644 modules/src/input/AtEoIF.c create mode 100644 modules/src/input/AtEoIT.c create mode 100644 modules/src/input/inp_pkg.body create mode 100644 modules/src/input/inp_pkg.spec create mode 100644 modules/src/input/input.3 create mode 100644 modules/src/input/proto.make create mode 100644 modules/src/malloc/READ_ME create mode 100755 modules/src/malloc/add_file create mode 100644 modules/src/malloc/check.c create mode 100644 modules/src/malloc/check.h create mode 100644 modules/src/malloc/getsize.c create mode 100644 modules/src/malloc/global.c create mode 100644 modules/src/malloc/impl.h create mode 100644 modules/src/malloc/log.c create mode 100644 modules/src/malloc/log.h create mode 100644 modules/src/malloc/mal.c create mode 100644 modules/src/malloc/param.h create mode 100644 modules/src/malloc/phys.c create mode 100644 modules/src/malloc/phys.h create mode 100644 modules/src/malloc/proto.make create mode 100644 modules/src/malloc/size_type.h create mode 100644 modules/src/object/obj.h create mode 100644 modules/src/object/object.3 create mode 100644 modules/src/object/object.h create mode 100644 modules/src/object/proto.make create mode 100644 modules/src/object/rd.c create mode 100644 modules/src/object/rd_arhdr.c create mode 100644 modules/src/object/rd_bytes.c create mode 100644 modules/src/object/rd_int2.c create mode 100644 modules/src/object/rd_long.c create mode 100644 modules/src/object/rd_ranlib.c create mode 100644 modules/src/object/rd_unsig2.c create mode 100644 modules/src/object/wr.c create mode 100644 modules/src/object/wr_arhdr.c create mode 100644 modules/src/object/wr_bytes.c create mode 100644 modules/src/object/wr_int2.c create mode 100644 modules/src/object/wr_long.c create mode 100644 modules/src/object/wr_putc.c create mode 100644 modules/src/object/wr_ranlib.c create mode 100644 modules/src/print/doprnt.c create mode 100644 modules/src/print/format.c create mode 100644 modules/src/print/fprint.c create mode 100644 modules/src/print/param.h create mode 100644 modules/src/print/print.3 create mode 100644 modules/src/print/print.c create mode 100644 modules/src/print/print.h create mode 100644 modules/src/print/proto.make create mode 100644 modules/src/print/sprint.c create mode 100644 modules/src/read_em/EM_vars.c create mode 100755 modules/src/read_em/argtype create mode 100644 modules/src/read_em/em_comp.h create mode 100755 modules/src/read_em/m_C_mnem create mode 100755 modules/src/read_em/m_C_mnem_na create mode 100644 modules/src/read_em/mkcalls.c create mode 100644 modules/src/read_em/proto.make create mode 100644 modules/src/read_em/read_em.3 create mode 100644 modules/src/read_em/read_em.c create mode 100644 modules/src/read_em/reade.c create mode 100644 modules/src/read_em/readk.c create mode 100644 modules/src/string/ack_string.h create mode 100644 modules/src/string/bts2str.c create mode 100644 modules/src/string/btscat.c create mode 100644 modules/src/string/btscmp.c create mode 100644 modules/src/string/btscpy.c create mode 100644 modules/src/string/btszero.c create mode 100644 modules/src/string/long2str.c create mode 100644 modules/src/string/proto.make create mode 100644 modules/src/string/str2bts.c create mode 100644 modules/src/string/str2long.c create mode 100644 modules/src/string/strcat.c create mode 100644 modules/src/string/strcmp.c create mode 100644 modules/src/string/strcpy.c create mode 100644 modules/src/string/strindex.c create mode 100644 modules/src/string/string.3 create mode 100644 modules/src/string/strlen.c create mode 100644 modules/src/string/strncat.c create mode 100644 modules/src/string/strncmp.c create mode 100644 modules/src/string/strncpy.c create mode 100644 modules/src/string/strrindex.c create mode 100644 modules/src/string/strzero.c create mode 100644 modules/src/system/access.c create mode 100644 modules/src/system/break.c create mode 100644 modules/src/system/chmode.c create mode 100644 modules/src/system/close.c create mode 100644 modules/src/system/create.c create mode 100644 modules/src/system/filesize.c create mode 100644 modules/src/system/lock.c create mode 100644 modules/src/system/modtime.c create mode 100644 modules/src/system/open.c create mode 100644 modules/src/system/proto.make create mode 100644 modules/src/system/read.c create mode 100644 modules/src/system/remove.c create mode 100644 modules/src/system/rename.c create mode 100644 modules/src/system/seek.c create mode 100644 modules/src/system/stop.c create mode 100644 modules/src/system/system.3 create mode 100644 modules/src/system/system.c create mode 100644 modules/src/system/system.h create mode 100644 modules/src/system/time.c create mode 100644 modules/src/system/unlock.c create mode 100644 modules/src/system/write.c create mode 100644 util/LLgen/COPYING create mode 100644 util/LLgen/LLgen.1 create mode 100644 util/LLgen/Makefile create mode 100644 util/LLgen/READ_ME create mode 100644 util/LLgen/lib/incl create mode 100644 util/LLgen/lib/nc_incl create mode 100644 util/LLgen/lib/nc_rec create mode 100644 util/LLgen/lib/rec create mode 100644 util/LLgen/proto.make create mode 100644 util/LLgen/src/LLgen.c.dist create mode 100644 util/LLgen/src/LLgen.g create mode 100644 util/LLgen/src/Lpars.c.dist create mode 100644 util/LLgen/src/Lpars.h.dist create mode 100644 util/LLgen/src/Makefile create mode 100644 util/LLgen/src/alloc.c create mode 100644 util/LLgen/src/cclass.c create mode 100644 util/LLgen/src/cclass.h create mode 100644 util/LLgen/src/check.c create mode 100644 util/LLgen/src/compute.c create mode 100644 util/LLgen/src/extern.h create mode 100644 util/LLgen/src/gencode.c create mode 100644 util/LLgen/src/global.c create mode 100644 util/LLgen/src/io.h create mode 100644 util/LLgen/src/machdep.c create mode 100644 util/LLgen/src/main.c create mode 100644 util/LLgen/src/name.c create mode 100644 util/LLgen/src/proto.make create mode 100644 util/LLgen/src/reach.c create mode 100644 util/LLgen/src/savegram.c create mode 100644 util/LLgen/src/sets.c create mode 100644 util/LLgen/src/sets.h create mode 100644 util/LLgen/src/tokens.c.dist create mode 100644 util/LLgen/src/tokens.g create mode 100644 util/LLgen/src/types.h create mode 100644 util/ack/ack.1.X create mode 100644 util/ack/ack.h create mode 100644 util/ack/data.c create mode 100644 util/ack/data.h create mode 100644 util/ack/dmach.h create mode 100644 util/ack/files.c create mode 100644 util/ack/grows.c create mode 100644 util/ack/grows.h create mode 100644 util/ack/list.c create mode 100644 util/ack/list.h create mode 100644 util/ack/main.c create mode 100644 util/ack/mktables.c create mode 100644 util/ack/proto.make create mode 100644 util/ack/rmach.c create mode 100644 util/ack/run.c create mode 100644 util/ack/scan.c create mode 100644 util/ack/svars.c create mode 100644 util/ack/trans.c create mode 100644 util/ack/trans.h create mode 100644 util/ack/util.c create mode 100644 util/amisc/anm.1 create mode 100644 util/amisc/anm.c create mode 100644 util/amisc/ashow.c create mode 100644 util/amisc/asize.1 create mode 100644 util/amisc/asize.c create mode 100644 util/amisc/astrip.1 create mode 100644 util/amisc/astrip.c create mode 100644 util/amisc/proto.make create mode 100644 util/arch/aal.1 create mode 100644 util/arch/arch.1 create mode 100644 util/arch/arch.5 create mode 100644 util/arch/archiver.c create mode 100644 util/arch/proto.make create mode 100644 util/ass/asprint.p create mode 100644 util/ass/ass00.c create mode 100644 util/ass/ass00.h create mode 100644 util/ass/ass30.c create mode 100644 util/ass/ass40.c create mode 100644 util/ass/ass50.c create mode 100644 util/ass/ass60.c create mode 100644 util/ass/ass70.c create mode 100644 util/ass/ass80.c create mode 100644 util/ass/assci.c create mode 100644 util/ass/asscm.c create mode 100644 util/ass/assda.c create mode 100644 util/ass/assex.h create mode 100644 util/ass/assrl.c create mode 100644 util/ass/em_ass.6 create mode 100644 util/ass/maktab.c create mode 100644 util/ass/proto.make create mode 100644 util/byacc/ACKNOWLEDGEMENTS create mode 100644 util/byacc/NEW_FEATURES create mode 100644 util/byacc/NO_WARRANTY create mode 100644 util/byacc/README create mode 100644 util/byacc/closure.c create mode 100644 util/byacc/defs.h create mode 100644 util/byacc/error.c create mode 100644 util/byacc/lalr.c create mode 100644 util/byacc/lr0.c create mode 100644 util/byacc/main.c create mode 100644 util/byacc/manpage create mode 100644 util/byacc/mkpar.c create mode 100644 util/byacc/output.c create mode 100644 util/byacc/proto.make create mode 100644 util/byacc/reader.c create mode 100644 util/byacc/skeleton.c create mode 100644 util/byacc/symtab.c create mode 100644 util/byacc/verbose.c create mode 100644 util/byacc/warshall.c create mode 100644 util/ceg/Action create mode 100644 util/ceg/EM_parser/as_EM_pars/arg_type.h create mode 100644 util/ceg/EM_parser/as_EM_pars/dist.c create mode 100644 util/ceg/EM_parser/as_EM_pars/em_decl.h create mode 100644 util/ceg/EM_parser/as_EM_pars/em_parser.h create mode 100644 util/ceg/EM_parser/as_EM_pars/error.c create mode 100644 util/ceg/EM_parser/as_EM_pars/proto.make create mode 100644 util/ceg/EM_parser/common/C_instr2.c create mode 100644 util/ceg/EM_parser/common/action.c create mode 100644 util/ceg/EM_parser/common/arg_type.h create mode 100644 util/ceg/EM_parser/common/decl.h create mode 100644 util/ceg/EM_parser/common/default.c create mode 100644 util/ceg/EM_parser/common/em_parser.h create mode 100644 util/ceg/EM_parser/common/eval.c create mode 100644 util/ceg/EM_parser/common/help.c create mode 100644 util/ceg/EM_parser/common/mylex.c create mode 100644 util/ceg/EM_parser/common/pars.g create mode 100644 util/ceg/EM_parser/common/proto.make create mode 100644 util/ceg/EM_parser/common/scan.c create mode 100644 util/ceg/EM_parser/obj_EM_pars/arg_type.h create mode 100644 util/ceg/EM_parser/obj_EM_pars/dist.c create mode 100644 util/ceg/EM_parser/obj_EM_pars/em_parser.h create mode 100644 util/ceg/EM_parser/obj_EM_pars/proto.make create mode 100644 util/ceg/as_parser/as_parser.h create mode 100644 util/ceg/as_parser/const.h create mode 100644 util/ceg/as_parser/conversion.c create mode 100644 util/ceg/as_parser/decl.h create mode 100644 util/ceg/as_parser/eval/eval.c create mode 100644 util/ceg/as_parser/eval/proto.make create mode 100644 util/ceg/as_parser/eval/states.h create mode 100644 util/ceg/as_parser/help.c create mode 100644 util/ceg/as_parser/pars.g create mode 100644 util/ceg/as_parser/proto.make create mode 100644 util/ceg/as_parser/table.l create mode 100644 util/ceg/assemble/as_assemble/READ_ME create mode 100644 util/ceg/assemble/as_assemble/assemble.c create mode 100644 util/ceg/assemble/as_assemble/block_as.c create mode 100644 util/ceg/assemble/obj_assemble/READ_ME create mode 100644 util/ceg/assemble/obj_assemble/assemble.c create mode 100644 util/ceg/assemble/obj_assemble/block_as.c create mode 100644 util/ceg/assemble/obj_assemble/const.h create mode 100644 util/ceg/assemble/proto.make create mode 100644 util/ceg/ce_back/as_back/READ_ME create mode 100644 util/ceg/ce_back/as_back/back.h create mode 100644 util/ceg/ce_back/as_back/bottom.c create mode 100644 util/ceg/ce_back/as_back/bss.c create mode 100644 util/ceg/ce_back/as_back/con1.c create mode 100644 util/ceg/ce_back/as_back/con2.c create mode 100644 util/ceg/ce_back/as_back/con4.c create mode 100644 util/ceg/ce_back/as_back/dbsym.c create mode 100644 util/ceg/ce_back/as_back/do_close.c create mode 100644 util/ceg/ce_back/as_back/do_open.c create mode 100644 util/ceg/ce_back/as_back/end_back.c create mode 100644 util/ceg/ce_back/as_back/gen1.c create mode 100644 util/ceg/ce_back/as_back/gen2.c create mode 100644 util/ceg/ce_back/as_back/gen4.c create mode 100644 util/ceg/ce_back/as_back/header.h create mode 100644 util/ceg/ce_back/as_back/init_back.c create mode 100644 util/ceg/ce_back/as_back/proto.make create mode 100644 util/ceg/ce_back/as_back/reloc1.c create mode 100644 util/ceg/ce_back/as_back/reloc2.c create mode 100644 util/ceg/ce_back/as_back/reloc4.c create mode 100644 util/ceg/ce_back/as_back/rom1.c create mode 100644 util/ceg/ce_back/as_back/rom2.c create mode 100644 util/ceg/ce_back/as_back/rom4.c create mode 100644 util/ceg/ce_back/as_back/set_global.c create mode 100644 util/ceg/ce_back/as_back/set_local.c create mode 100644 util/ceg/ce_back/as_back/switchseg.c create mode 100644 util/ceg/ce_back/as_back/symboldef.c create mode 100644 util/ceg/ce_back/as_back/text1.c create mode 100644 util/ceg/ce_back/as_back/text2.c create mode 100644 util/ceg/ce_back/as_back/text4.c create mode 100644 util/ceg/ce_back/obj_back/READ_ME create mode 100644 util/ceg/ce_back/obj_back/back.h create mode 100644 util/ceg/ce_back/obj_back/common.c create mode 100644 util/ceg/ce_back/obj_back/con2.c create mode 100644 util/ceg/ce_back/obj_back/con4.c create mode 100644 util/ceg/ce_back/obj_back/data.c create mode 100644 util/ceg/ce_back/obj_back/data.h create mode 100644 util/ceg/ce_back/obj_back/dbsym.c create mode 100644 util/ceg/ce_back/obj_back/do_close.c create mode 100644 util/ceg/ce_back/obj_back/do_open.c create mode 100644 util/ceg/ce_back/obj_back/end_back.c create mode 100644 util/ceg/ce_back/obj_back/extnd.c create mode 100644 util/ceg/ce_back/obj_back/gen1.c create mode 100644 util/ceg/ce_back/obj_back/gen2.c create mode 100644 util/ceg/ce_back/obj_back/gen4.c create mode 100644 util/ceg/ce_back/obj_back/hash.h create mode 100644 util/ceg/ce_back/obj_back/header.h create mode 100644 util/ceg/ce_back/obj_back/init_back.c create mode 100644 util/ceg/ce_back/obj_back/label.c create mode 100644 util/ceg/ce_back/obj_back/memory.c create mode 100644 util/ceg/ce_back/obj_back/misc.c create mode 100644 util/ceg/ce_back/obj_back/output.c create mode 100644 util/ceg/ce_back/obj_back/proto.make create mode 100644 util/ceg/ce_back/obj_back/reloc1.c create mode 100644 util/ceg/ce_back/obj_back/reloc2.c create mode 100644 util/ceg/ce_back/obj_back/reloc4.c create mode 100644 util/ceg/ce_back/obj_back/relocation.c create mode 100644 util/ceg/ce_back/obj_back/rom2.c create mode 100644 util/ceg/ce_back/obj_back/rom4.c create mode 100644 util/ceg/ce_back/obj_back/set_global.c create mode 100644 util/ceg/ce_back/obj_back/set_local.c create mode 100644 util/ceg/ce_back/obj_back/switchseg.c create mode 100644 util/ceg/ce_back/obj_back/symboldef.c create mode 100644 util/ceg/ce_back/obj_back/symtable.c create mode 100644 util/ceg/ce_back/obj_back/text2.c create mode 100644 util/ceg/ce_back/obj_back/text4.c create mode 100644 util/ceg/ce_back/proto.make create mode 100644 util/ceg/defaults/C_out_skel.c create mode 100644 util/ceg/defaults/EM_vars.c create mode 100644 util/ceg/defaults/READ_ME create mode 100755 util/ceg/defaults/argtype create mode 100755 util/ceg/defaults/m_C_mnem create mode 100755 util/ceg/defaults/m_C_mnem_na create mode 100644 util/ceg/defaults/message/C_cst.c create mode 100644 util/ceg/defaults/message/C_dlb.c create mode 100644 util/ceg/defaults/message/C_dnam.c create mode 100644 util/ceg/defaults/message/C_fcon.c create mode 100644 util/ceg/defaults/message/C_icon.c create mode 100644 util/ceg/defaults/message/C_ilb.c create mode 100644 util/ceg/defaults/message/C_mes_begin.c create mode 100644 util/ceg/defaults/message/C_mes_end.c create mode 100644 util/ceg/defaults/message/C_pnam.c create mode 100644 util/ceg/defaults/message/C_scon.c create mode 100644 util/ceg/defaults/message/C_ucon.c create mode 100755 util/ceg/defaults/mk_C_out create mode 100644 util/ceg/defaults/not_impl/not_impl.c create mode 100644 util/ceg/defaults/not_impl/not_impl_table create mode 100644 util/ceg/defaults/proto.make create mode 100644 util/ceg/defaults/pseudo/C_busy.c create mode 100644 util/ceg/defaults/pseudo/C_close.c create mode 100644 util/ceg/defaults/pseudo/C_df_dlb.c create mode 100644 util/ceg/defaults/pseudo/C_df_dnam.c create mode 100644 util/ceg/defaults/pseudo/C_df_ilb.c create mode 100644 util/ceg/defaults/pseudo/C_end.c create mode 100644 util/ceg/defaults/pseudo/C_end_narg.c create mode 100644 util/ceg/defaults/pseudo/C_exa_dlb.c create mode 100644 util/ceg/defaults/pseudo/C_exa_dnam.c create mode 100644 util/ceg/defaults/pseudo/C_exp.c create mode 100644 util/ceg/defaults/pseudo/C_ina_dlb.c create mode 100644 util/ceg/defaults/pseudo/C_ina_dnam.c create mode 100644 util/ceg/defaults/pseudo/C_init.c create mode 100644 util/ceg/defaults/pseudo/C_inp.c create mode 100644 util/ceg/defaults/pseudo/C_insertpart.c create mode 100644 util/ceg/defaults/pseudo/C_magic.c create mode 100644 util/ceg/defaults/pseudo/C_open.c create mode 100644 util/ceg/defaults/pseudo/C_pro.c create mode 100644 util/ceg/defaults/pseudo/C_pro_narg.c create mode 100644 util/ceg/defaults/pseudo_vars.c create mode 100644 util/ceg/defaults/storage/C_bss_cst.c create mode 100644 util/ceg/defaults/storage/C_bss_dlb.c create mode 100644 util/ceg/defaults/storage/C_bss_dnam.c create mode 100644 util/ceg/defaults/storage/C_bss_ilb.c create mode 100644 util/ceg/defaults/storage/C_bss_pnam.c create mode 100644 util/ceg/defaults/storage/C_con_cst.c create mode 100644 util/ceg/defaults/storage/C_con_dlb.c create mode 100644 util/ceg/defaults/storage/C_con_dnam.c create mode 100644 util/ceg/defaults/storage/C_con_ilb.c create mode 100644 util/ceg/defaults/storage/C_con_pnam.c create mode 100644 util/ceg/defaults/storage/C_con_scon.c create mode 100644 util/ceg/defaults/storage/C_hol_cst.c create mode 100644 util/ceg/defaults/storage/C_hol_dlb.c create mode 100644 util/ceg/defaults/storage/C_hol_dnam.c create mode 100644 util/ceg/defaults/storage/C_hol_ilb.c create mode 100644 util/ceg/defaults/storage/C_hol_pnam.c create mode 100644 util/ceg/defaults/storage/C_rom_cst.c create mode 100644 util/ceg/defaults/storage/C_rom_dlb.c create mode 100644 util/ceg/defaults/storage/C_rom_dnam.c create mode 100644 util/ceg/defaults/storage/C_rom_ilb.c create mode 100644 util/ceg/defaults/storage/C_rom_pnam.c create mode 100644 util/ceg/defaults/storage/C_rom_scon.c create mode 100644 util/ceg/util/install_ceg create mode 100644 util/ceg/util/make_asobj create mode 100644 util/ceg/util/make_back create mode 100644 util/ceg/util/make_ce create mode 100644 util/ceg/util/make_ceg create mode 100644 util/ceg/util/make_own create mode 100644 util/ceg/util/proto.make create mode 100755 util/ceg/util/update_ceg create mode 100644 util/cgg/bootgram.y create mode 100644 util/cgg/booth.h create mode 100644 util/cgg/bootlex.l create mode 100644 util/cgg/main.c create mode 100644 util/cgg/proto.make create mode 100644 util/cmisc/GCIPM.c create mode 100644 util/cmisc/cclash.1 create mode 100644 util/cmisc/cclash.c create mode 100644 util/cmisc/cid.1 create mode 100644 util/cmisc/cid.c create mode 100644 util/cmisc/mkdep.1 create mode 100644 util/cmisc/mkdep.c create mode 100644 util/cmisc/prid.1 create mode 100644 util/cmisc/prid.c create mode 100644 util/cmisc/proto.make create mode 100644 util/cmisc/tabgen.1 create mode 100644 util/cmisc/tabgen.c create mode 100644 util/cpp/LLlex.c create mode 100644 util/cpp/LLlex.h create mode 100644 util/cpp/LLmessage.c create mode 100644 util/cpp/Parameters create mode 100644 util/cpp/bits.h create mode 100644 util/cpp/ch7bin.c create mode 100644 util/cpp/ch7mon.c create mode 100644 util/cpp/char.tab create mode 100644 util/cpp/class.h create mode 100644 util/cpp/cpp.6 create mode 100644 util/cpp/domacro.c create mode 100644 util/cpp/error.c create mode 100644 util/cpp/expr.c create mode 100644 util/cpp/expression.g create mode 100644 util/cpp/file_info.h create mode 100644 util/cpp/idf.c create mode 100644 util/cpp/idf.h create mode 100644 util/cpp/init.c create mode 100644 util/cpp/input.c create mode 100644 util/cpp/input.h create mode 100644 util/cpp/interface.h create mode 100644 util/cpp/macro.h create mode 100644 util/cpp/main.c create mode 100755 util/cpp/make.hfiles create mode 100755 util/cpp/make.tokcase create mode 100755 util/cpp/make.tokfile create mode 100644 util/cpp/next.c create mode 100644 util/cpp/options.c create mode 100644 util/cpp/preprocess.c create mode 100644 util/cpp/proto.make create mode 100644 util/cpp/replace.c create mode 100644 util/cpp/scan.c create mode 100644 util/cpp/skip.c create mode 100644 util/cpp/tokenname.c create mode 100644 util/data/em_flag.c create mode 100644 util/data/em_mnem.c create mode 100644 util/data/em_pseu.c create mode 100644 util/data/em_ptyp.c create mode 100644 util/data/proto.make create mode 100644 util/ego/Action create mode 100644 util/ego/bo/bo.c create mode 100644 util/ego/bo/proto.make create mode 100644 util/ego/ca/ca.c create mode 100644 util/ego/ca/ca.h create mode 100644 util/ego/ca/ca_put.c create mode 100644 util/ego/ca/ca_put.h create mode 100644 util/ego/ca/proto.make create mode 100644 util/ego/cf/cf.c create mode 100644 util/ego/cf/cf.h create mode 100644 util/ego/cf/cf_idom.c create mode 100644 util/ego/cf/cf_idom.h create mode 100644 util/ego/cf/cf_loop.c create mode 100644 util/ego/cf/cf_loop.h create mode 100644 util/ego/cf/cf_succ.c create mode 100644 util/ego/cf/cf_succ.h create mode 100644 util/ego/cf/proto.make create mode 100644 util/ego/cj/cj.c create mode 100644 util/ego/cj/proto.make create mode 100644 util/ego/cs/cs.c create mode 100644 util/ego/cs/cs.h create mode 100644 util/ego/cs/cs_alloc.c create mode 100644 util/ego/cs/cs_alloc.h create mode 100644 util/ego/cs/cs_aux.c create mode 100644 util/ego/cs/cs_aux.h create mode 100644 util/ego/cs/cs_avail.c create mode 100644 util/ego/cs/cs_avail.h create mode 100644 util/ego/cs/cs_debug.c create mode 100644 util/ego/cs/cs_debug.h create mode 100644 util/ego/cs/cs_elim.c create mode 100644 util/ego/cs/cs_elim.h create mode 100644 util/ego/cs/cs_entity.c create mode 100644 util/ego/cs/cs_entity.h create mode 100644 util/ego/cs/cs_getent.c create mode 100644 util/ego/cs/cs_getent.h create mode 100644 util/ego/cs/cs_kill.c create mode 100644 util/ego/cs/cs_kill.h create mode 100644 util/ego/cs/cs_partit.c create mode 100644 util/ego/cs/cs_partit.h create mode 100644 util/ego/cs/cs_profit.c create mode 100644 util/ego/cs/cs_profit.h create mode 100644 util/ego/cs/cs_stack.c create mode 100644 util/ego/cs/cs_stack.h create mode 100644 util/ego/cs/cs_vnm.c create mode 100644 util/ego/cs/cs_vnm.h create mode 100644 util/ego/cs/proto.make create mode 100644 util/ego/descr/descr.sed create mode 100644 util/ego/descr/em22.descr create mode 100644 util/ego/descr/em24.descr create mode 100644 util/ego/descr/em44.descr create mode 100644 util/ego/descr/i386.descr create mode 100644 util/ego/descr/i86.descr create mode 100644 util/ego/descr/m68020.descr create mode 100644 util/ego/descr/m68k2.descr create mode 100644 util/ego/descr/m68k4.descr create mode 100644 util/ego/descr/pdp.descr create mode 100644 util/ego/descr/proto.make create mode 100644 util/ego/descr/sparc.descr create mode 100644 util/ego/descr/vax4.descr create mode 100644 util/ego/em_ego/em_ego.c create mode 100644 util/ego/em_ego/proto.make create mode 100644 util/ego/ic/ic.c create mode 100644 util/ego/ic/ic.h create mode 100644 util/ego/ic/ic_aux.c create mode 100644 util/ego/ic/ic_aux.h create mode 100644 util/ego/ic/ic_io.c create mode 100644 util/ego/ic/ic_io.h create mode 100644 util/ego/ic/ic_lib.c create mode 100644 util/ego/ic/ic_lib.h create mode 100644 util/ego/ic/ic_lookup.c create mode 100644 util/ego/ic/ic_lookup.h create mode 100644 util/ego/ic/proto.make create mode 100644 util/ego/il/il.c create mode 100644 util/ego/il/il.h create mode 100644 util/ego/il/il1_anal.c create mode 100644 util/ego/il/il1_anal.h create mode 100644 util/ego/il/il1_aux.c create mode 100644 util/ego/il/il1_aux.h create mode 100644 util/ego/il/il1_cal.c create mode 100644 util/ego/il/il1_cal.h create mode 100644 util/ego/il/il1_formal.c create mode 100644 util/ego/il/il1_formal.h create mode 100644 util/ego/il/il2_aux.c create mode 100644 util/ego/il/il2_aux.h create mode 100644 util/ego/il/il3_aux.c create mode 100644 util/ego/il/il3_aux.h create mode 100644 util/ego/il/il3_change.c create mode 100644 util/ego/il/il3_change.h create mode 100644 util/ego/il/il3_subst.c create mode 100644 util/ego/il/il3_subst.h create mode 100644 util/ego/il/il_aux.c create mode 100644 util/ego/il/il_aux.h create mode 100644 util/ego/il/proto.make create mode 100644 util/ego/lv/lv.c create mode 100644 util/ego/lv/lv.h create mode 100644 util/ego/lv/proto.make create mode 100644 util/ego/ra/itemtab.src create mode 100644 util/ego/ra/makeitems.c create mode 100644 util/ego/ra/proto.make create mode 100644 util/ego/ra/ra.c create mode 100644 util/ego/ra/ra.h create mode 100644 util/ego/ra/ra_allocl.c create mode 100644 util/ego/ra/ra_allocl.h create mode 100644 util/ego/ra/ra_aux.c create mode 100644 util/ego/ra/ra_aux.h create mode 100644 util/ego/ra/ra_interv.c create mode 100644 util/ego/ra/ra_interv.h create mode 100644 util/ego/ra/ra_items.c create mode 100644 util/ego/ra/ra_items.h create mode 100644 util/ego/ra/ra_lifet.c create mode 100644 util/ego/ra/ra_lifet.h create mode 100644 util/ego/ra/ra_pack.c create mode 100644 util/ego/ra/ra_pack.h create mode 100644 util/ego/ra/ra_profits.c create mode 100644 util/ego/ra/ra_profits.h create mode 100644 util/ego/ra/ra_xform.c create mode 100644 util/ego/ra/ra_xform.h create mode 100644 util/ego/share/alloc.c create mode 100644 util/ego/share/alloc.h create mode 100644 util/ego/share/aux.c create mode 100644 util/ego/share/aux.h create mode 100644 util/ego/share/cldefs.src create mode 100644 util/ego/share/cset.c create mode 100644 util/ego/share/cset.h create mode 100644 util/ego/share/debug.c create mode 100644 util/ego/share/debug.h create mode 100644 util/ego/share/def.h create mode 100644 util/ego/share/files.c create mode 100644 util/ego/share/files.h create mode 100644 util/ego/share/get.c create mode 100644 util/ego/share/get.h create mode 100644 util/ego/share/global.c create mode 100644 util/ego/share/global.h create mode 100644 util/ego/share/go.c create mode 100644 util/ego/share/go.h create mode 100644 util/ego/share/init_glob.c create mode 100644 util/ego/share/init_glob.h create mode 100644 util/ego/share/locals.c create mode 100644 util/ego/share/locals.h create mode 100644 util/ego/share/lset.c create mode 100644 util/ego/share/lset.h create mode 100644 util/ego/share/makecldef.c create mode 100644 util/ego/share/map.c create mode 100644 util/ego/share/map.h create mode 100644 util/ego/share/parser.c create mode 100644 util/ego/share/parser.h create mode 100644 util/ego/share/pop_push.awk create mode 100644 util/ego/share/pop_push.h create mode 100644 util/ego/share/proto.make create mode 100644 util/ego/share/put.c create mode 100644 util/ego/share/put.h create mode 100644 util/ego/share/show.c create mode 100644 util/ego/share/stack_chg.c create mode 100644 util/ego/share/stack_chg.h create mode 100644 util/ego/share/types.h create mode 100644 util/ego/sp/proto.make create mode 100644 util/ego/sp/sp.c create mode 100644 util/ego/sr/proto.make create mode 100644 util/ego/sr/sr.c create mode 100644 util/ego/sr/sr.h create mode 100644 util/ego/sr/sr_aux.c create mode 100644 util/ego/sr/sr_aux.h create mode 100644 util/ego/sr/sr_cand.c create mode 100644 util/ego/sr/sr_cand.h create mode 100644 util/ego/sr/sr_expr.c create mode 100644 util/ego/sr/sr_expr.h create mode 100644 util/ego/sr/sr_iv.c create mode 100644 util/ego/sr/sr_iv.h create mode 100644 util/ego/sr/sr_reduce.c create mode 100644 util/ego/sr/sr_reduce.h create mode 100644 util/ego/sr/sr_xform.c create mode 100644 util/ego/sr/sr_xform.h create mode 100644 util/ego/ud/proto.make create mode 100644 util/ego/ud/ud.c create mode 100644 util/ego/ud/ud.h create mode 100644 util/ego/ud/ud_aux.c create mode 100644 util/ego/ud/ud_aux.h create mode 100644 util/ego/ud/ud_const.c create mode 100644 util/ego/ud/ud_const.h create mode 100644 util/ego/ud/ud_copy.c create mode 100644 util/ego/ud/ud_copy.h create mode 100644 util/ego/ud/ud_defs.c create mode 100644 util/ego/ud/ud_defs.h create mode 100644 util/flex/COPYING create mode 100644 util/flex/Changes create mode 100644 util/flex/Headers create mode 100644 util/flex/Makefile create mode 100644 util/flex/README create mode 100644 util/flex/ccl.c create mode 100644 util/flex/dfa.c create mode 100644 util/flex/ecs.c create mode 100644 util/flex/flex.1 create mode 100644 util/flex/flex.skel create mode 100644 util/flex/flexdef.h create mode 100644 util/flex/flexdoc.1 create mode 100644 util/flex/gen.c create mode 100644 util/flex/initscan.c create mode 100644 util/flex/libmain.c create mode 100644 util/flex/main.c create mode 100644 util/flex/misc.c create mode 100644 util/flex/nfa.c create mode 100644 util/flex/parse.y create mode 100644 util/flex/proto.make create mode 100644 util/flex/scan.l create mode 100644 util/flex/sym.c create mode 100644 util/flex/tblcmp.c create mode 100644 util/flex/yylex.c create mode 100644 util/grind/Makefile create mode 100644 util/grind/PROBLEMS create mode 100644 util/grind/READ_ME create mode 100644 util/grind/avl.cc create mode 100644 util/grind/avl.h create mode 100644 util/grind/c.c create mode 100644 util/grind/char.ct create mode 100644 util/grind/class.h create mode 100644 util/grind/commands.g create mode 100644 util/grind/db_symtab.g create mode 100644 util/grind/do_comm.c create mode 100644 util/grind/expr.c create mode 100644 util/grind/expr.h create mode 100644 util/grind/file.hh create mode 100644 util/grind/grind.1 create mode 100644 util/grind/idf.c create mode 100644 util/grind/idf.h create mode 100644 util/grind/itemlist.cc create mode 100644 util/grind/langdep.cc create mode 100644 util/grind/langdep.h create mode 100644 util/grind/list.c create mode 100644 util/grind/main.c create mode 100755 util/grind/make.allocd create mode 100755 util/grind/make.next create mode 100755 util/grind/make.ops create mode 100755 util/grind/make.tokcase create mode 100755 util/grind/make.tokfile create mode 100644 util/grind/message.h create mode 100644 util/grind/misc.h create mode 100644 util/grind/modula-2.c create mode 100644 util/grind/operator.h create mode 100644 util/grind/operators.ot create mode 100644 util/grind/ops.c create mode 100644 util/grind/ops.h create mode 100644 util/grind/pascal.c create mode 100644 util/grind/position.c create mode 100644 util/grind/position.h create mode 100644 util/grind/print.c create mode 100644 util/grind/proto.main create mode 100644 util/grind/proto.make create mode 100644 util/grind/rd.c create mode 100644 util/grind/rd.h create mode 100644 util/grind/run.c create mode 100644 util/grind/scope.cc create mode 100644 util/grind/scope.h create mode 100644 util/grind/symbol.c create mode 100644 util/grind/symbol.hh create mode 100644 util/grind/token.h create mode 100644 util/grind/tokenname.c create mode 100644 util/grind/tokenname.h create mode 100644 util/grind/tree.c create mode 100644 util/grind/tree.hh create mode 100644 util/grind/type.c create mode 100644 util/grind/type.hh create mode 100755 util/int/M.trap_msg create mode 100755 util/int/M.warn_h create mode 100755 util/int/M.warn_msg create mode 100644 util/int/READ_ME create mode 100644 util/int/alloc.c create mode 100644 util/int/alloc.h create mode 100644 util/int/core.c create mode 100644 util/int/data.c create mode 100644 util/int/debug.h create mode 100644 util/int/disassemble.c create mode 100644 util/int/do_array.c create mode 100644 util/int/do_branch.c create mode 100644 util/int/do_comp.c create mode 100644 util/int/do_conv.c create mode 100644 util/int/do_fpar.c create mode 100644 util/int/do_incdec.c create mode 100644 util/int/do_intar.c create mode 100644 util/int/do_load.c create mode 100644 util/int/do_logic.c create mode 100644 util/int/do_misc.c create mode 100644 util/int/do_proc.c create mode 100644 util/int/do_ptrar.c create mode 100644 util/int/do_sets.c create mode 100644 util/int/do_store.c create mode 100644 util/int/do_unsar.c create mode 100644 util/int/dump.c create mode 100644 util/int/e.out.h create mode 100644 util/int/fra.c create mode 100644 util/int/fra.h create mode 100644 util/int/global.c create mode 100644 util/int/global.h create mode 100644 util/int/init.c create mode 100644 util/int/int.1 create mode 100644 util/int/io.c create mode 100644 util/int/linfil.h create mode 100644 util/int/log.c create mode 100644 util/int/log.h create mode 100644 util/int/logging.h create mode 100644 util/int/m_ioctl.c create mode 100644 util/int/m_sigtrp.c create mode 100644 util/int/main.c create mode 100644 util/int/mem.h create mode 100644 util/int/memdirect.h create mode 100644 util/int/moncalls.c create mode 100644 util/int/monstruct.c create mode 100644 util/int/monstruct.h create mode 100644 util/int/nofloat.h create mode 100644 util/int/opcode.h create mode 100644 util/int/proctab.c create mode 100644 util/int/proctab.h create mode 100644 util/int/proto.make create mode 100644 util/int/read.c create mode 100644 util/int/read.h create mode 100644 util/int/rsb.c create mode 100644 util/int/rsb.h create mode 100644 util/int/segcheck.h create mode 100644 util/int/segment.c create mode 100644 util/int/shadow.h create mode 100644 util/int/stack.c create mode 100644 util/int/switch.c create mode 100644 util/int/switch/READ_ME create mode 100644 util/int/switch/mkiswitch.c create mode 100644 util/int/switch/mkswitch.c create mode 100644 util/int/switch/proto.make create mode 100644 util/int/sysidf.h create mode 100644 util/int/tally.c create mode 100644 util/int/test/READ_ME create mode 100644 util/int/test/args.c create mode 100644 util/int/test/awa.p create mode 100644 util/int/test/fork2.c create mode 100644 util/int/test/ioc0.c create mode 100644 util/int/test/proto.make create mode 100644 util/int/test/prtime.c create mode 100644 util/int/test/set.c create mode 100644 util/int/test/sig.c create mode 100644 util/int/text.c create mode 100644 util/int/text.h create mode 100644 util/int/trap.c create mode 100644 util/int/trap.h create mode 100644 util/int/v7ioctl.h create mode 100644 util/int/warn.c create mode 100644 util/led/WRONG create mode 100644 util/led/ack.out.5 create mode 100644 util/led/archive.c create mode 100644 util/led/assert.h create mode 100644 util/led/const.h create mode 100644 util/led/debug.h create mode 100644 util/led/defs.h create mode 100644 util/led/error.c create mode 100644 util/led/extract.c create mode 100644 util/led/finish.c create mode 100644 util/led/led.6 create mode 100644 util/led/mach.c create mode 100644 util/led/main.c create mode 100644 util/led/memory.c create mode 100644 util/led/memory.h create mode 100644 util/led/orig.h create mode 100644 util/led/output.c create mode 100644 util/led/proto.make create mode 100644 util/led/read.c create mode 100644 util/led/relocate.c create mode 100644 util/led/save.c create mode 100644 util/led/scan.c create mode 100644 util/led/scan.h create mode 100644 util/led/sym.c create mode 100644 util/led/write.c create mode 100644 util/misc/convert.c create mode 100644 util/misc/em_decode.6 create mode 100644 util/misc/esize.1 create mode 100644 util/misc/esize.c create mode 100644 util/misc/proto.make create mode 100644 util/ncgg/assert.h create mode 100644 util/ncgg/cgg.y create mode 100644 util/ncgg/coerc.c create mode 100644 util/ncgg/cost.h create mode 100755 util/ncgg/cvtkeywords create mode 100644 util/ncgg/emlookup.c create mode 100644 util/ncgg/error.c create mode 100644 util/ncgg/expr.c create mode 100644 util/ncgg/expr.h create mode 100644 util/ncgg/extern.h create mode 100644 util/ncgg/hall.c create mode 100644 util/ncgg/instruct.c create mode 100644 util/ncgg/instruct.h create mode 100644 util/ncgg/iocc.c create mode 100644 util/ncgg/iocc.h create mode 100644 util/ncgg/keywords create mode 100644 util/ncgg/lookup.c create mode 100644 util/ncgg/lookup.h create mode 100644 util/ncgg/main.c create mode 100644 util/ncgg/ncgg.6 create mode 100644 util/ncgg/output.c create mode 100644 util/ncgg/param.h create mode 100644 util/ncgg/property.h create mode 100644 util/ncgg/proto.make create mode 100644 util/ncgg/pseudo.h create mode 100644 util/ncgg/reg.h create mode 100644 util/ncgg/regvar.h create mode 100644 util/ncgg/scan.l create mode 100644 util/ncgg/set.c create mode 100644 util/ncgg/set.h create mode 100644 util/ncgg/strlookup.c create mode 100644 util/ncgg/subr.c create mode 100644 util/ncgg/token.h create mode 100644 util/ncgg/var.c create mode 100644 util/ncgg/varinfo.h create mode 100644 util/opt/alloc.c create mode 100644 util/opt/alloc.h create mode 100644 util/opt/assert.h create mode 100644 util/opt/backward.c create mode 100644 util/opt/cleanup.c create mode 100644 util/opt/em_opt.6 create mode 100644 util/opt/ext.h create mode 100644 util/opt/flow.c create mode 100644 util/opt/getline.c create mode 100644 util/opt/line.h create mode 100644 util/opt/lookup.c create mode 100644 util/opt/lookup.h create mode 100644 util/opt/main.c create mode 100644 util/opt/mktab.y create mode 100644 util/opt/optim.h create mode 100644 util/opt/param.h create mode 100644 util/opt/pattern.h create mode 100644 util/opt/patterns create mode 100644 util/opt/peephole.c create mode 100644 util/opt/pop_push.awk create mode 100644 util/opt/pop_push.h create mode 100644 util/opt/process.c create mode 100644 util/opt/proinf.h create mode 100644 util/opt/proto.make create mode 100644 util/opt/putline.c create mode 100644 util/opt/reg.c create mode 100644 util/opt/scan.l create mode 100644 util/opt/special.c create mode 100644 util/opt/tes.c create mode 100644 util/opt/tes.h create mode 100755 util/opt/testopt create mode 100644 util/opt/types.h create mode 100644 util/opt/util.c create mode 100644 util/opt/var.c create mode 100755 util/shf/march.sh create mode 100644 util/shf/proto.make create mode 100644 util/topgen/LLlex.c create mode 100644 util/topgen/hash.c create mode 100644 util/topgen/main.c create mode 100644 util/topgen/misc.h create mode 100644 util/topgen/pattern.c create mode 100644 util/topgen/proto.make create mode 100644 util/topgen/symtab.c create mode 100644 util/topgen/symtab.h create mode 100644 util/topgen/token.h create mode 100644 util/topgen/topgen.g create mode 100644 util/topgen/tunable.h diff --git a/Action b/Action new file mode 100644 index 0000000..54a724d --- /dev/null +++ b/Action @@ -0,0 +1,276 @@ +name "System definition" +dir first +action ack_sys +failure "You have to run the shell script first/first" +fatal +end +name "Manual pages" +dir man +end +! name "EM definition" +! dir etc +! end +name "EM definition library" +dir util/data +end +name "C utilities" +dir util/cmisc +end +name "Yacc parser generator" +dir util/byacc +end +name "Flex lexical analyzer generator" +dir util/flex +action "make firstinstall && make clean" +end +name "Include files for modules" +dir modules/h +end +name "Modules" +dir modules/src +indir +end +name "LL(1) Parser generator" +dir util/LLgen +action "make firstinstall && make clean" +end +name "C preprocessor" +dir util/cpp +end +name "Peephole optimizer libraries" +dir modules/src/em_opt +end +name "ACK object utilities" +dir util/amisc +end +name "Encode/Decode" +dir util/misc +end +name "Shell files in bin" +dir util/shf +end +name "EM assembler" +dir util/ass +end +name "EM Peephole optimizer" +dir util/opt +end +name "EM Global optimizer" +dir util/ego +indir +end +name "ACK archiver" +dir util/arch +end +name "Program 'ack'" +dir util/ack +end +name "Bootstrap for backend tables" +dir util/cgg +end +name "Bootstrap for newest form of backend tables" +dir util/ncgg +end +name "Bootstrap for code expanders" +dir util/ceg +indir +end +name "LED link editor" +dir util/led +end +name "TOPGEN target optimizer generator" +dir util/topgen +end +name "C frontend" +dir lang/cem/cemcom +end +name "ANSI-C frontend" +dir lang/cem/cemcom.ansi +end +name "ANSI-C preprocessor" +dir lang/cem/cpp.ansi +end +name "ANSI-C header files" +dir lang/cem/libcc.ansi +end +name "LINT C program checker" +dir lang/cem/lint +end +name "EM definition lint-library" +action "make lintlib" +dir util/data +end +name "Modules lint libraries" +dir modules/src +indir "Action.lint" +end +name "Global optimizer lint libraries" +dir util/ego/share +action "make lintlib" +end +name "Pascal frontend" +dir lang/pc/comp +end +name "Basic frontend" +dir lang/basic/src +end +name "Occam frontend" +dir lang/occam/comp +end +name "Modula-2 frontend" +dir lang/m2/comp +end +name "Modula-2 definition modules" +dir lang/m2/libm2 +end +name "Modula-2 makefile generator" +dir lang/m2/m2mm +end +name "Fortran to C compiler" +dir lang/fortran/comp +end +name "EM interpreter in C" +dir util/int +end +name "Symbolic debugger" +dir util/grind +end +name "Intel 8086 support" +dir mach/i86 +indir +end +name "Intel 80286 support for Xenix" +dir mach/xenix3 +indir +end +name "Intel 80386 support for Xenix 386 System V" +dir mach/i386 +indir +end +name "MSC6500 support" +dir mach/6500 +indir +end +name "Motorola 6800 support" +dir mach/6800 +indir +end +name "Motorola 6805 support" +dir mach/6805 +indir +end +name "Motorola 6809 support" +dir mach/6809 +indir +end +name "Intel 8080 support" +dir mach/i80 +indir +end +name "2-2 Interpreter support" +dir mach/em22 +indir +end +name "2-4 Interpreter support" +dir mach/em24 +indir +end +name "4-4 Interpreter support" +dir mach/em44 +indir +end +name "Motorola 68000 2-4 support" +dir mach/m68k2 +indir +end +name "Motorola 68000 4-4 support" +dir mach/m68k4 +indir +end +name "NS16032 support" +dir mach/ns +indir +end +name "PDP 11 support" +dir mach/pdp +indir +end +name "PMDS support" +dir mach/pmds +indir +end +name "PMDS 4/4 support" +dir mach/pmds4 +indir +end +name "Signetics 2650 support" +dir mach/s2650 +indir +end +name "Vax 4-4 support" +dir mach/vax4 +indir +end +name "M68020 System V/68 support" +dir mach/m68020 +indir +end +name "Sun 3 M68020 support" +dir mach/sun3 +indir +end +name "Sun 4 SPARC SunOs 4 support" +dir mach/sparc +system "sparc|sparc_solaris" +indir +end +name "Sun 4 SPARC Solaris support" +dir mach/sparc_solaris +system "sparc_solaris" +indir +end +name "Sun 2 M68000 support" +dir mach/sun2 +indir +end +name "Mantra M68000 System V.0 support" +dir mach/mantra +indir +end +name "PC Minix support" +dir mach/minix +indir +end +name "Atari ST Minix support" +dir mach/minixST +indir +end +name "Z80 support" +dir mach/z80 +indir +end +name "Zilog Z8000 support" +dir mach/z8000 +indir +end +name "Acorn Archimedes support" +dir mach/arm +indir +end +name "Documentation" +dir doc +end +name "Motorola 68000 interpreters" +system "m68*|sun*" +dir mach/mantra/int +end +name "Fast compilers" +system "m68020|sun3|i386|vax*" +dir fast +indir +end +name "Fast cc-compatible C compiler" +system "sun3|vax*" +dir fcc +indir +end diff --git a/Copyright b/Copyright new file mode 100644 index 0000000..d62a10a --- /dev/null +++ b/Copyright @@ -0,0 +1,32 @@ +Copyright (c) 1987,1990, 1993 Vrije Universiteit, Amsterdam, The Netherlands. +All rights reserved. + +Redistribution and use of the Amsterdam Compiler Kit in source and +binary forms, with or without modification, are permitted provided +that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of Vrije Universiteit nor the names of the + software authors or contributors may be used to endorse or + promote products derived from this software without specific + prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS, AUTHORS, AND +CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL VRIJE UNIVERSITEIT OR ANY AUTHORS OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/NEW b/NEW new file mode 100644 index 0000000..d0e1777 --- /dev/null +++ b/NEW @@ -0,0 +1,34 @@ +This is ACK distribution 5.5. + +The only addition with respect to the 5th ACK distribution is the support +for Solaris 2 on SPARCs. It also contains many bug fixes. + +Notes for the 5th ACK distribution: + +It is not wise to mix files created by the previous version of the Kit +with files belonging to this version, although that might sometimes work. +Many problems with the previous distribution have been fixed. +The major additions are: + + - an ANSI C compiler + - a LINT C program checker, both non-ansi and ansi + - an Intel 80386 back-end + - a SPARC code expander + - a source level debugger for Pascal, Modula-2, C, and ANSI C + - an Acorn Archimedes back-end + - code-expanders for VAX, Intel 80386 and Motorola M68020 processors, + and very fast Pascal, Modula-2, ANSI C, and C compilers constructed + using these code expanders + - a cc-compatible very fast C compiler for SUN-3 and VAX. + +Also added, but not part of the Kit proper are + - flex: a lexical analyzer generator + - byacc: yacc-clone by UCB + - f2c: a Fortran to C compiler by AT&T. + +See the ACK installation manual for their copyright notices. + +-- +Ceriel Jacobs, Dept. of Mathematics and Computer Science, Vrije Universiteit, +De Boelelaan 1081a, 1081 HV Amsterdam, The Netherlands +Email: ceriel@cs.vu.nl Fax: +31 20 444 7653 diff --git a/README b/README new file mode 100644 index 0000000..649cba0 --- /dev/null +++ b/README @@ -0,0 +1,3 @@ +Before starting installation you should read the file doc/install.pr. + +Note: This is all very old stuff. It has not been tried on recent Unix systems. diff --git a/TakeAction b/TakeAction new file mode 100755 index 0000000..6a25701 --- /dev/null +++ b/TakeAction @@ -0,0 +1,144 @@ +case $# in +0) PAR='make install && make clean' ; CMD=Action ;; +1) PAR="$1" ; CMD=Action ;; +2) PAR="$1" ; CMD="$2" ;; +*) echo Syntax: "$0" [command [file]] ; exit 1 ;; +esac +if test -r "$CMD" +then : +else + case "$CMD" in + Action) echo No Action file present ;; + *) echo No Action file "($CMD)" present ;; + esac +fi +case $0 in +/*) THISFILE=$0 + ;; +*) if [ -f $0 ] + then + THISFILE=`pwd`/$0 + else + THISFILE=$0 + fi + ;; +esac +SYS= +RETC=0 +{ while read LINE +do + eval set $LINE + case x"$1" in + x!*) ;; + xname) SYS="$2" + ACTION='$PAR' + DIR=. + FM=no + FAIL='Failed for $SYS, see $DIR/Out' + SUCC='$SYS -- done' + ATYPE= + FATAL=no + DOIT=yes + ;; + xfatal) FATAL=yes ;; + xaction|xindir) case x$ATYPE in + x) ACTION=$2 ; ATYPE=$1 + case $ATYPE$FM in + indirno) FAIL='Failed for $SYS' ;; + esac + ;; + *) echo Already specified an $ATYPE for this name + RETC=65 ;; + esac ;; + xfailure) FM=yes + FAIL="$2" ;; + xsuccess) SUCC="$2" ;; + xdir) DIR="$2" ;; + xsystem) PAT="$2" + oIFS=$IFS + IFS="|" + eval set $2 + case x`ack_sys` in + x$1|x$2|x$3|x$4|x$5|x$6|x$7) ;; + *) echo "Sorry, $SYS can only be made on $PAT systems" + DOIT=no + ;; + esac + IFS=$oIFS + ;; + xend) case $DOIT in + no) continue ;; + esac + case x$SYS in + x) echo Missing name line; RETC=65 ;; + *) if test -d $DIR + then ( + cd $DIR + X= + case $ATYPE in + indir) + if $THISFILE "$PAR" $ACTION + then eval echo $SUCC + else RETC=2 ; eval echo $FAIL + fi ;; + *) + case "$ACTION" in + '$PAR') + ACTION="$PAR" + ;; + *) ;; + esac + if [ -f No$CMD ] + then + x=`cat No$CMD` + if [ "$ACTION" = "$x" ] + then + ACTION='echo "No actions performed, No$CMD file present"' + SUCC='$SYS -- skipped' + fi + fi + if eval "{ $ACTION ; } >Out 2>&1 No$CMD 2>/dev/null + fi + else RETC=1 ; X=: ; eval echo $FAIL + fi + ;; + esac + (echo ------- `pwd` + cat Out + $X rm -f Out + ) 2>/dev/null 1>&- 1>&3 + exit $RETC + ) + case $? in + 0) ;; + *) case $RETC in + 0) RETC=$? ;; + esac ;; + esac + else + echo Directory $DIR for $SYS is inaccessible + RETC=66 + fi ;; + esac + case $FATAL$RETC in + yes0) ;; + yes*) echo Fatal error, installation stopped. + exit $RETC ;; + esac + SYS= + ;; + *) echo Unknown keyword "$1" + RETC=67 ;; + esac +done +exit $RETC +} <$CMD +RETX=$? +case $RETX in +0) exit $RETC ;; +*) exit $RETX ;; +esac diff --git a/bin/cc-and-mkdep.ack b/bin/cc-and-mkdep.ack new file mode 100755 index 0000000..4089057 --- /dev/null +++ b/bin/cc-and-mkdep.ack @@ -0,0 +1,7 @@ +: '$Id: cc-and-mkdep.ack,v 1.3 1994/06/23 13:46:30 ceriel Exp $' + +: Compile and make dependencies. First argument is the file on which the +: dependencies must be produced. This version is for ACK. +n=$1 +shift +exec $CC -Rcem-A$n -Rcem-m $* diff --git a/bin/cc-and-mkdep.all b/bin/cc-and-mkdep.all new file mode 100755 index 0000000..8e463ec --- /dev/null +++ b/bin/cc-and-mkdep.all @@ -0,0 +1,20 @@ +: '$Id: cc-and-mkdep.all,v 1.3 1994/06/23 13:46:35 ceriel Exp $' + +: Compile and make dependencies. First argument is the file on which the +: dependencies must be produced. This version is a generic one that should +: work for all Unix systems. +n=$1 +shift +cpp_args= +for i in $* +do + case $i in + -I*|-D*|-U*) cpp_args="$cpp_args $i" + ;; + -*) ;; + *) cpp_args="$cpp_args $i" + ;; + esac +done +$UTIL_HOME/lib.bin/cpp -d -m $cpp_args > $n +exec $CC $* diff --git a/bin/cc-and-mkdep.sun b/bin/cc-and-mkdep.sun new file mode 100755 index 0000000..c431ccb --- /dev/null +++ b/bin/cc-and-mkdep.sun @@ -0,0 +1,7 @@ +: '$Id: cc-and-mkdep.sun,v 1.2 1994/06/23 13:46:40 ceriel Exp $' + +: Compile and make dependencies. First argument is the file on which the +: dependencies must be produced. This version is for the SUN cc. +n=$1 +shift +exec $CC -Qpath $UTIL_HOME/lib.bin -Qoption cpp -d$n -Qoption cpp -m $* diff --git a/bin/do_deps b/bin/do_deps new file mode 100755 index 0000000..708a925 --- /dev/null +++ b/bin/do_deps @@ -0,0 +1,18 @@ +: '$Id: do_deps,v 1.2 1994/06/23 13:46:44 ceriel Exp $' + +: Produce dependencies for all argument files + +for i in $* +do + n=`basename $i .c` + if [ -f $n.dep ] + then + : + else + echo $n.'$(SUF): '$i > $n.dep + echo " head -5 $n.dep > $n.dp1" >> $n.dep + echo ' CC="$(CC)" UTIL_HOME="$(UTIL_HOME)" $(CC_AND_MKDEP) '$n.dp2 '$(CFLAGS)' -c $i >> $n.dep + echo " cat $n.dp1 $n.dp2 > $n.dep" >> $n.dep + echo " rm -f $n.dp1 $n.dp2" >> $n.dep + fi +done diff --git a/bin/do_resolve b/bin/do_resolve new file mode 100755 index 0000000..28553d4 --- /dev/null +++ b/bin/do_resolve @@ -0,0 +1,47 @@ +: '$Id: do_resolve,v 1.2 1994/06/23 13:46:48 ceriel Exp $' + +: Resolve name clashes in the files on the argument list. If these +: files reside in another directory, a copy is made in the current +: directory. If not, it is overwritten. Never do this in a source +: directory! A list of the new files is produced on standard output. + +UTIL_BIN=$UTIL_HOME/bin + +trap "rm -f tmp$$ a.out nmclash.* longnames clashes" 0 1 2 3 15 + +: first find out if we have to resolve problems with identifier significance. + +cat > nmclash.c <<'EOF' +/* Accepted if many characters of long names are significant */ +abcdefghijklmnopr() { } +abcdefghijklmnopq() { } +main() { } +EOF +if $CC nmclash.c +then : no identifier significance problem + for i in $* + do + echo $i + done +else + $UTIL_BIN/prid -l7 $* > longnames + + : remove code generating routines from the clashes list. + : code generating routine names start with C_. + : also remove names starting with flt_. + + sed '/^C_/d' < longnames | sed '/^flt_/d' > tmp$$ + $UTIL_BIN/cclash -c -l7 tmp$$ > clashes + for i in $* + do + $UTIL_BIN/cid -Fclashes < $i > tmp$$ + n=`basename $i .xxx` + if cmp -s $n tmp$$ + then + rm -f tmp$$ + else + mv tmp$$ $n + fi + echo $n + done +fi diff --git a/bin/lint-lib.ack b/bin/lint-lib.ack new file mode 100755 index 0000000..8c0a24a --- /dev/null +++ b/bin/lint-lib.ack @@ -0,0 +1,12 @@ +: '$Id: lint-lib.ack,v 1.2 1994/06/23 13:46:53 ceriel Exp $' + +: Create a lint library file. The name of the library file is constructed +: from the first argument. The second argument indicates the directory where +: the result is to be placed. This version is for ACK lint. + +n=$1 +shift +d=$1 +shift +lint -L$n $* +mv $n.llb $d diff --git a/bin/lint-lib.unix b/bin/lint-lib.unix new file mode 100755 index 0000000..5ed06d2 --- /dev/null +++ b/bin/lint-lib.unix @@ -0,0 +1,12 @@ +: '$Id: lint-lib.unix,v 1.2 1994/06/23 13:46:57 ceriel Exp $' + +: Create a lint library file. The name of the library file is constructed +: from the first argument. The second argument indicates the directory where +: the result is to be placed. This version is for Unix lint. + +n=$1 +shift +d=$1 +shift +/usr/bin/lint -C$n $* +mv llib-l$n.ln $d diff --git a/bin/mk_manpage b/bin/mk_manpage new file mode 100755 index 0000000..6b7d953 --- /dev/null +++ b/bin/mk_manpage @@ -0,0 +1,18 @@ +num=`expr $1 : '.*\.\([1-8]\)'` + +if [ -d $2/man ] ; then : ; else mkdir $2/man ; fi +if [ -f $2/man/head ] ; then : ; else cat > $2/man/head <<'EOF' +.rn TH yy +.de TH +.di zz +.yy "\\$1" "\\$2" "\\$3" "\\$4" +.ds ]W 5th ACK distribution +.ds ]D Amsterdam Compiler Kit +.ds ]L "\\$3 +.di +.rm zz +.. +EOF +fi +if [ -d $2/man/man$num ] ; then : ; else mkdir $2/man/man$num ; fi +cat $2/man/head $1 | sed "s!TARGETHOME!$2!" > $2/man/man$num/`expr //$1 : '.*/\([^/]*\)'` diff --git a/bin/rm_deps b/bin/rm_deps new file mode 100755 index 0000000..4a2c910 --- /dev/null +++ b/bin/rm_deps @@ -0,0 +1,8 @@ +: $Id: rm_deps,v 1.2 1994/06/23 13:47:02 ceriel Exp $ + +: remove dependencies from a makefile, write result on standard output. +: we cannot do this directly in a makefile because some make versions +: have # start a comment, always. + +sed -e '/^#DEPENDENCIES/,$d' $1 +echo '#DEPENDENCIES' diff --git a/doc/6500.doc b/doc/6500.doc new file mode 100644 index 0000000..d395e98 --- /dev/null +++ b/doc/6500.doc @@ -0,0 +1,1893 @@ +. \" $Id: 6500.doc,v 2.3 1994/06/24 10:01:32 ceriel Exp $" +.RP +.ND Dec 1984 +.TL +.B +A backend table for the 6500 microprocessor +.R +.AU +Jan van Dalen +.AB +The backend table is part of the Amsterdam Compiler Kit (ACK). +It translates the intermediate language family EM to a machine +code for the MCS6500 microprocessor family. +.AE +.bp +.DS C +.B +THE MCS6500 MICROPROCESSOR. +.R +.DE +.NH 0 +Introduction +.PP +Why a back end table for the MCS6500 microprocessor family. +Although the MCS6500 microprocessor family has an simple +instruction set and internal structure, it is used in a +variety of microcomputers and homecomputers. +This is because of is low cost. +As an example the Apple II, a well known and width spread +microprocessor, uses the MCS6502 CPU. +Also the BBC homecomputer, whose popularity is growing day +by day uses the MCS6502 CPU. +The BBC homecomputer is based on the MCS6502 CPU although +better and stronger microprocessors are available. +The designers of Acorn computer Industries have probably +choosen for the MCS6502 because of the amount of software +available for this CPU. +Since its width spreaded use, a variaty of software +will be needed for it. +One can think of games!!, administration programs, +teaching programs, basic interpreters and other application +programs. +Even do it will not be possible to run the total compiler kit +on a MCS6500 based computer, it is possible to write application +programs in a high level language, such as Pascal or C on a +minicomputer. +These application programs can be tested and compiled on that +minicomputer and put in a ROM (Read Only Memory), for example, +cso that it an be executed by a MCS6500 CPU. +The strategy of writing testprograms on a minicomputer, +compile it and then execute it on a MCS6500 based +microprocessor is used by the development of the back end. +The minicomputer used is M68000 based one, manufactured by +Bleasdale Computer Systems Ltd.. +The micro- or homecomputer used is a BBC microcomputer, +manufactured by Acorn Computer Ltd.. +.NH +The MOS Technology MCS6500 +.PP +The MCS6500 is as a family of CPU devices developed by MOS +Technology [1]. +The members of the MCS6500 family are the same chips in a +different housing. +The MCS6502, the big brother in the family, can handle 64k +bytes of memory, while for example the MCS6504 can only handle +8k bytes of memory. +This difference is due to the fact that the MCS6502 is in a +40 pins house and the MCS6504 has a 28 pins house, so less +address lines are available. +.bp +.NH +The MCS6500 CPU programmable registers +.PP +The MCS6500 series is based on the same chip so all have the +same programmable registers. +.sp 9 +.NH 2 +The accumulator A. +.PP +The accumulator A is the only register on which the arithmetic +and logical instructions can be used. +For example, the instruction ADC (add with carry) adds the +contents of the accumulator A and a byte from memory or data. +.NH 2 +The index register X. +.PP +As the name suggests this register can be used for some +indirect addressing modes. +The modes are explaned below. +.NH 2 +The index register Y. +.PP +This register is, just as the index register X, used for +certain indirect addressing modes. +These addressing modes are different from the modes which +use index register X. +.NH 2 +The program counter PC +.PP +This is the only 16-bit register available. +It is used to point to the next instruction to be +carried out. +.NH 2 +The stack pointer SP +.PP +The stack pointer is an 8-bit register, so the stack can contain +at most 256 bytes. +The CPU always appends 00000001 as highbyte of any stack address, +which means that memory locations +.B +0100 +.R +through +.B +01FF +.R +are permanently assigned to the stack. +.sp 12 +.NH 2 +The status register +.PP +The status register maintains six status flags and a master +interrupt control bit. +.br +These are the six status flags: + Carry (c) + Zero (z) + Overflow (o) + Sign (n) + Decimal mode (d) + Break (b) + + + + + +The bit (i) is the master interrupt control bit. +.NH +The MCS6500 memory layout. +.PP +In the MCS6500 memory space three area's have special meaning. +These area's are: +.IP 1) +Top page. +.IP 2) +Zero page. +.IP 3) +The stack. +.PP +MCS6500 memory is divided up into pages. +These pages consist 256 bytes. +So in a memory address the highbyte denotes the page number +and the lowbyte the offset within the page. +.NH 2 +Top page. +.PP +When a MCS6500 is restared it jumps indirect via memory address +.B +FFFC. +.R +At +.B +FFFC +.R +(lowbyte) and +.B +FFFD +.R +(highbyte) there must be the address of the bootstrap subroutine. +When a break instruction (BRK) occurs or an interrupt takes place, +the MCS6500 jumps indirect through memory address +.B +FFFE. +.R +.B +FFFE +.R +and +.B +FFFF +.R +thus, must contain the address of the interrupt routine. +The former only goes for maskeble interrupt. +There also exist a nonmaskeble interrupt. +This cause the MCS6500 to jump indirect through memory address +.B +FFFA. +.R +So the top six bytes of memory are used by the operating system +and therefore not available for the back end. +.NH 2 +Zero page. +.PP +This page has a special meaning in the sence that addressing +this page uses special opcodes. +Since a page consists of 256 bytes, only one byte is needed +for addressing zero page. +So an instruction which uses zero page occupies two bytes. +It also uses less clock cycle's while carrying out the instruction. +Zero page is also needed when indirect addressing is used. +This means that when indirect addressing is used, the address must +reside in zero page (two consecutive bytes). +In this case (the back end), zero page is used, for example +to hold the local base, the second local base, the stack pointer +etc. +.NH 2 +The stack. +.PP +The stack is described in paragraph 3.5 about the MCS6500 +programmable registers. +.NH +The memory adressing modes +.PP +MCS6500 memory reference instructions use direct addressing, +indexed addressing, and indirect addressing. +.NH 2 +direct addressing. +.PP +Three-byte instructions use the second and third bytes of the +object code to provide a direct 16-bit address: +therefore, 65.536 bytes of memory can be addressed directly. +The commonly used memory reference instructions also have a two-byte +object code variation, where the second byte directly addresses +one of the first 256 bytes. +.NH 2 +Base page, indexed addressing. +.PP +In this case, the instruction has two bytes of object code. +The contents of either the X or Y index registers are added to the +second object code byte in order to compute a memory address. +This may be illustrated as follows: +.sp 15 +Base page, indexed addressing, as illustrated above, is +wraparound - which means that there is no carry. +If the sum of the index register and second object code byte contents +is more than +.B +FF +.R +, the carry bit will be dicarded. +This may be illustrated as follows: +.sp 9 +.NH 2 +Absolute indexed addressing. +.PP +In this case, the contents of either the X or Y register are added +to a 16-bit direct address provided by the second and third bytes +of an instruction's object code. +This may be illustrated as follows: +.sp 10 +.NH 2 +Indirect addressing. +.PP +Instructions that use simple indirect addressing have three bytes of +object code. +The second and third object code bytes provide a 16-bit address; +therefore, the indirect address can be located anywhere in +memory. +This is straightforward indirect addressing. +.NH 3 +Pre-indexed indirect addressing. +.PP +In this case, the object code consists of two bytes and the +second object code byte provides an 8-bit address. +Instructions that use pre-indexed indirect addressing add the contents +of the X index register and the second object code byte to access +a memory location in the first 256 bytes of memory, where the +indirect address will be found: +.sp 18 +When using pre-indexed indirect addressing, once again wraparound +addition is used, which means that when the X index register contents +are added to the second object code byte, any carry will be discarded. +Note that only the X index register can be used with pre-indexed +addressing. +.NH 3 +Post-indexed indirect addressing. +.PP +In this case, the object code consists of two bytes and the +second object code byte provides an 8-bit address. +Now the second object code byte indentifies a location +in the first 256 bytes of memory where an indirect address +will be found. +The contents of the Y index register are added to this indirect +address. +This may be illustrated as follows: +.sp 18 +Note that only the Y index register can be used with post-indexed +indirect addressing. +.bp +.NH +What the CPU has and doesn't has. +.PP +Although the designers of the MCS6500 CPUs family state that +there is nothing very significant about the short stack (only +256 bytes) this stack caused problems for the back end. +The designers say that a 256-byte stack usually is sufficient +for any typical microcomputer, this is only true if the stack +is used only for return addresses of the JSR (jump to +subroutine) instruction. +But since the EM machine is suppost to be a stack machine and +high level languages need the ability of parameters and +locals in there procedures and function, this short stack +is unsufficiant. +So an software stack is implemented in this back end, requiring two +additional subroutines for stack handling. +These two stack handling subroutines slow down the processing time +of a program since the stack is used heavely. +.PP +Since parameters and locals of EM procedures are offseted +from the localbase of that procedure, indirect addressing +is havily used. +Offsets are positive (for parameters) and negative (for +local variables). +As explaned before the addressing modes the MCS6500 have a +post indexed indirect addressing mode. +This addressing mode can only handle positive offsets. +This raises a problem for accessing the local variables +I have chosen for the next solution. +A second local base is introduced. +This second local base is the real local base subtracted by +a constant BASE. +In the present situation of the back end the value of BASE +is 240. +This means that there are 240 bytes reseved for local +variables to be indirect addressed and 14 bytes for +the parameters. +.DS C +.B +THE CODE GENERATOR. +.R +.DE +.NH 0 +Description of the machine table. +.PP +The machine description table consists of the following sections: +.IP 1. +The macro definitions. +.IP 2. +Constant definitions. +.IP 3. +Register definitions. +.IP 4. +Token definitions. +.IP 5. +Token expressions. +.IP 6. +Code rules. +.IP 7. +Move definitions. +.IP 8. +Test definitions. +.IP 9. +Stack definitions. +.NH 2 +Macro definitions. +.PP +The macro definitions at the top of the table are expanded +by the preprocessor on occurence in the rest of the table. +.NH 2 +Constant definitions. +.PP +There are three constants which must be defined at first. +The are: +.IP EM_WSIZE: 11 +Number of bytes in a machine word. +This is the number of bytes a simple +.B +loc +.R +instruction will put on the stack. +.IP EM_PSIZE: +Number of bytes in a pointer. +This is the number of bytes a +.B +lal +.R +instruction will put on the stack. +.IP EM_BSIZE: +Number of bytes in the hole between AB and LB. +The calling sequence only saves LB on the stack so this +constant is equal to the pointer size. +.NH 1 +Register definitions. +.PP +The only important register definition is the definition of +the registerpair AX. +Since the rest of the machine's registers Y, PC, ST serve +special purposes, the code generator cannot use them. +.NH 2 +Token definitions +.PP +There is a fake token. +This token is put in the table, since the code generator generator +complains if it cannot find one. +.NH 2 +Token expression definitions. +.PP +The token expression is also a fake one. +This token expression is put in the table, since the code generator +generator complains if it cannot find one. +.NH 2 +Code rules. +.PP +The code rule section is the largest section in the table. +They specify EM patterns, stack patterns, code to be generated, +etc. +The syntax is: +.IP code rule: +EM pattern '|' stack pattern '|' code '|' +stack replacement '|' EM replacement '|' +.PP +All patterns are optional, however there must be at least one +pattern present. +If the EM pattern is missing the rule becomes a rewriting +rule or a +.B +coercion +.R +to be used when code generation cannot continue because of an +invalid stack pattern. +The code rules are preceeded by the word CODE:. +.NH 3 +The EM pattern. +.PP +The EM pattern consists of a list of EM mnemonics followed by +a boolean expression. Examples: +.sp 1 +.br +.B +loe +.R +.sp 1 +will match a single +.B +loe +.R +instruction, +.sp 1 +.br +.B +loc loc cif +.R +$1==2 && $2==8 +.sp 1 +is a pattern that will match +.sp 1 +.br +.B +loc +.R +2 +.br +.B +loc +.R +8 +.br +.B +cif +.R +.sp 1 +and +.sp 1 +.br +.B +lol +inc +stl +.R +$1==$3 +.sp 1 +will match for example +.sp 1 +.br +.B +lol +.R +6 +.br +.B +inc +.R +.br +.B +stl +.R +6 +.sp 1 +A missing boolean expession evaluates to TRUE. +.PP +The code generator will match the longest EM pattern on every occasion, +if two patterns of the same length match the first in the table +will be chosen, while all patterns of length greater than or equal +to three are considered to be of the same length. +.NH 3 +The stack pattern. +.PP +The only stack pattern that can occur is R16, which means that the +registerpair AX contains the word on top of the stack. +If this is not the case a coersion occurs. +This coersion generates a "jsr Pop", which means that the top +of the stack is popped and stored in the registerpair AX. +.NH 3 +The code part. +.PP +The code part consists of three parts, stack cleanup, register +allocation, and code to be generated. +All of these may be omitted. +.NH 4 +Stack cleanup. +.PP +When generating something like a branch instruction it might be +needed to empty the fake stack, that is, remove the AX registerpair. +This is done by the instruction remove(ALL) +.NH 4 +Register allocation. +.PP +If the machine code to be generated uses the registerpair AX, +this is signaled to the code generator by the allocate(R16) +instruction. +If the registerpair AX resides on the fake stack, this will result +in a "jsr Push", which means that the registerpair AX is pushed on +the stack and will be free for further use. +If registerpair AX is not on the fake stack nothing happens. +.NH 4 +Code to be generated. +.PP +Code to be generated is specified as a list of items of the following +kind: +.IP 1) +A string in double quotes("This is a string"). +This is copied to the codefile and a newline ('\n') is appended. +Inside the string all normal C string conventions are allowed, +and substitutions can be made of the following sorts. +.RS +.IP a) +$1, $2 etc. These are the operand of the corresponding EM +instructions and are printed according to there type. +To put a real '$' inside the string it must be doubled ('$$'). +.IP b) +%[1], %[2.reg], %[b.1] etc. these have there obvious meaning. +If they describe a complete token (%[1]) the printformat for +the token is used. +If they stand fo a basic term in an expression they will be +printed according to their type. +To put a real '%' inside the string it must be doubled ('%%'). +.IP c) +%( arbitrary expression %). This allows inclusion of arbitrary +expressions inside strings. +Usually not needed very often, so that the akward notation +is not too bad. +Note that %(%[1]%) is equivalent to %[1]. +.RE +.NH 3 +stack replacement. +.PP +The stack replacement is a possibly empty list of items to be +pushed on the fake stack. +Three things can occur: +.IP 1) +%[1] is used if the registerpair AX was on the fake stack and is +to be pushed back onto it. +.IP 2) +%[a] is used if the registerpair AX is allocated with allocate(R16) +and is to be pushed onto the fake stack. +.IP 3) +It can also be empty. +.NH 3 +EM replacement. +.PP +In exeptional cases it might be useful to leave part of the an EM +pattern undone. +For example, a +.B +sdl +.R +instruction might be split into two +.B +stl +.R +instructions when there is no 4-byte quantity on the stack. +The EM replacement part allows one to express this. +Example: +.sp 1 +.br +.B +stl +.R +$1 +.B +stl +.R +$1+2 +.sp 1 +The instructions are inserted in the stream so they can match +the first part of a pattern in the next step. +Note that since the code generator traverses the EM instructions +in a strict linear fashion, it is impossible to let the EM +replacement match later parts of a pattern. +So if there is a pattern +.sp 1 +.br +.B +loc +stl +.R +$1==0 +.sp1 +and the input is +.sp 1 +.br +.B +loc +.R +0 +.B +sdl +.R +4 +.sp 1 +the +.B +loc +.R +0 +will be processed first, then the +.B +sdl +.R +might be split into two +.B +stl +.R +'s but the pattern cannot match now. +.NH 3 +Move definitions. +.PP +This definition is a fake. This definition is put in the +table, since the code generator generator complains if it +cannot find one. +.NH 3 +Test definitions. +.PP +Test definitions aren't used by the table. +.NH 3 +Stack definitions. +.PP +When the generator has to push the registerpair AX, it must +know how to do so. +The machine code to be generated is defined here. +.NH 1 +Some remarks. +.PP +The above description of the machine table is +a description of the table for the MCS6500. +It uses only a part of the possibilities which the code generator +generator offers. +For a more precise and detailed description see [2]. +.DS C +.B +THE BACK END TABLE. +.R +.DE +.NH 0 +Introduction. +.PP +The code rules are divided in 15 groups. +These groups are: +.IP 1. +Load instructions. +.IP 2. +Store instructions. +.IP 3. +Integer arithmetic instructions. +.IP 4. +Unsigned arithmetic instructions. +.IP 5. +Floating point arithmetic instructions. +.IP 6. +Pointer arithmetic instructions. +.IP 7. +Increment, decrement and zero instructions. +.IP 8. +Convert instructions. +.IP 9. +Logical instructions. +.IP 10. +Set manipulation instructions. +.IP 11. +Array instructions. +.IP 12. +Compare instructions. +.IP 13. +Branch instructions. +.IP 14. +Procedure call instructions. +.IP 15. +Miscellaneous instructions. +.PP +From all of these groups one or two typical EM pattern will be explained +in the next paragraphs. +Comment is placed between /* and */ (/* This is a comment */). +.NH +The instructions. +.NH 2 +The load instructions. +.PP +In this group a typical instruction is +.B +lol +.R +. +A +.B +lol +.R +instruction pushes the word at local base + offset, where offset +is the instructions argument, onto the stack. +Since the MCS6500 can only offset by 256 bytes, as explaned at the +memory addressing modes, there is a need for two code rules in the +table. +One which can offset directly and one that must explicit +calculate the address of the local. +.NH 3 +The lol instruction with indirect offsetting. +.PP +In this case an indirect offsetted load from the second local base +is possible. +The table content is: +.sp 1 +.br +.B +lol +.R +IN($1) | | +.br +allocate(R16) /* allocate registerpair AX */ +.br +"ldy #BASE+$1" /* load Y with the offset from the second +.br + local base */ +.br +"lda (LBl),y" /* load indirect the lowbyte of the word */ +.br +"tax" /* move register A to register X */ +.br +"iny" /* increment register Y (offset) */ +.br +"lda (LBl),y" /* load indirect the highbyte of the word */ +.br +| %[a] | | /* push the word onto the fake stack */ +.NH 3 +The lol instruction whose offset is to big. +.PP +In this case, the library subroutine "Lol" is used. +This subroutine expects the offset in registerpair AX, then +calculates the address of the local or parameter, and loads +it into registerpair AX. +The table content is: +.sp 1 +.br +.B +lol +.R +| | +.br +allocate(R16) /* allocate registerpair AX */ +.br +"lda #[$1].h" /* load highbyte of offset into register A */ +.br +"ldx #[$1].l" /* load lowbyte of offset into register X */ +.br +"jsr Lol" /* perform the subroutine */ +.br +| %[a] | | /* push word onto the fake stack */ +.NH 2 +The store instructions. +.PP +In this group a typical instruction is +.B +stl. +.R +A +.B +stl +.R +instruction poppes a word from the stack and stores it in the word +at local base + offset, where offset is the instructions argument. +Here also is the need for two code rules in the table as a result +of the offset limits. +.NH 3 +The stl instruction with indirect offsetting. +.PP +In this case it an indirect offsetted store from the second local +base is possible. +The table content is: +.sp 1 +.br +.B +stl +.R +IN($1) | R16 | /* expect registerpair AX on top of the +.br + fake stack */ +.br +"ldy #BASE+1+$1" /* load Y with the offset from the +.br + second local base */ +.br +"sta (LBl),y" /* store the highbyte of the word from A */ +.br +"txa" /* move register X to register A */ +.br +"dey" /* decrement offset */ +.br +"sta (LBl),y" /* store the lowbyte of the word from A */ +.br +| | | +.NH 3 +The stl instruction whose offset is to big. +.PP +In this case the library subroutine 'Stl' is used. +This subroutine expects the offset in registerpair AX, then +calculates the address, poppes the word stores it at its place. +The table content is: +.sp 1 +.br +.B +stl +.R +| | +.br +allocate(R16) /* allocate registerpair AX */ +.br +"lda #[$1].h" /* load highbyte of offset in register A */ +.br +"ldx #[$1].l" /* load lowbyte of offset in register X */ +.br +"jsr Stl" /* perform the subroutine */ +.br +| | | +.NH 2 +Integer arithmetic instructions. +.PP +In this group typical instructions are +.B +adi +.R +and +.B +mli. +.R +These instructions, in this table, are implemented for 2-byte +and 4-byte integers. +The only arithmetic instructions available on the MCS6500 are +the ADC (add with carry), and SBC (subtract with not(carry)). +Not(carry) here means that in a subtraction, the one's complement +of the carry is taken. +The absence of multiply and division instructions forces the +use of subroutines to handle these cases. +Because there are no registers left to perform on the multiply +and division, zero page is used here. +The 4-byte integer arithmetic is implemented, because in C there +exists the integer type long. +A user is freely to use the type long, but will pay in performance. +.NH 3 +The adi instruction. +.PP +In case of the +.B +adi +.R +2 (and +.B +sbi +.R +2) instruction there are many EM +patterns, so that the instruction can be performed in line in +most cases. +For the worst case there exists a subroutine in the library +which deals with the EM instruction. +In case of a +.B +adi +.R +4 (or +.B +sbi +.R +4) there only is a subroutine to deal with it. +A table content is: +.sp 1 +.br +.B +lol lol adi +.R +(IN($1) && IN($2) && $3==2) | | /* is it in range */ +.br +allocate(R16) /* allocate registerpair AX */ +.br +"ldy #BASE+$1+1" /* load Y with offset for first operand */ +.br +"lda (LBl),y" /* load indirect highbyte first operand */ +.br +"pha" /* save highbyte first operand on hard_stack */ +.br +"dey" /* decrement offset first operand */ +.br +"lda (LBl),y" /* load indirect lowbyte first operand */ +.br +"ldy #BASE+$2" /* load Y with offset for second operand */ +.br +"clc" /* clear carry for addition */ +.br +"adc (LBl),y" /* add the lowbytes of the operands */ +.br +"tax" /* store lowbyte of result in place */ +.br +"iny" /* increment offset second operand */ +.br +"pla" /* get highbyte first operand */ +.br +"adc (LBl),y" /* add the highbytes of the operands */ +.br +| %[a] | | /* push the result onto the fake stack */ +.NH 3 +The mli instruction. +.PP +The +.B +mli +.R +2 instruction uses most the subroutine 'Mlinp'. +This subroutine expects the multiplicand in zero page +at locations ARTH, ARTH+1, while the multiplier is in zero +page locations ARTH+2, ARTH+3. +For a description of the algorithms used for multiplication and +division, see [3]. +A table content is: +.sp 1 +.br +.B +lol lol mli +.R +(IN($1) && IN($2) && $3==2) | | +.br +allocate(R16) /* allocate registerpair AX */ +.br +"ldy #BASE+$1" /* load Y with offset of multiplicand */ +.br +"lda (LBl),y" /* load indirect lowbyte of multiplicand */ +.br +"sta ARTH" /* store lowbyte in zero page */ +.br +"iny" /* increment offset of multiplicand */ +.br +"lda (LBl),y" /* load indirect highbyte of multiplicand */ +.br +"sta ARTH+1" /* store highbyte in zero page */ +.br +"ldy #BASE+$2" /* load Y with offset of multiplier */ +.br +"lda (LBl),y" /* load indirect lowbyte of multiplier */ +.br +"sta ARTH+2" /* store lowbyte in zero page */ +.br +"iny" /* increment offset of multiplier */ +.br +"lda (LBl),y" /* load indirect highbyte of multiplier */ +.br +"sta ARTH+3" /* store highbyte in zero page */ +.br +"jsr Mlinp" /* perform the multiply */ +.br +| %[a] | | /* push result onto fake stack */ +.NH 2 +The unsgned arithmetic instructions. +.PP +Since unsigned addition an subtraction is performed in the same way +as signed addition and subtraction, these cases are dealt with by +an EM replacement. +For mutiplication and division there are special subroutines. +.NH 3 +Unsigned addition. +.PP +This is an example of the EM replacement strategy. +.sp 1 +.br +.B +lol lol adu +.R + | | | | +.B +lol +.R +$1 +.B +lol +.R +$2 +.B +adi +.R +$3 | +.NH 2 +Floating point arithmetic. +.PP +Floating point arithmetic isn't implemented in this table. +.NH 2 +Pointer arithmetic instructions. +.PP +A typical pointer arithmetic instruction is +.B +adp +.R +2. +This instruction adds an offset and a pointer. +A table content is: +.sp 1 +.br +.B +adp +.R + | | | | +.B +loc +.R +$1 +.B +adi +.R +2 | +.NH 2 +Increment, decrement and zero instructions. +.PP +In this group a typical instruction is +.B +inl +.R +, which increments a local or parameter. +The MCS6500 doesn't have an instruction to increment the +accumulator A, so the 'ADC' instruction must be used. +A table content is: +.sp 1 +.br +.B +inl +.R +IN($1) | | +.br +allocate(R16) /* allocate registerpair AX */ +.br +"ldy #BASE+$1" /* load Y with offset of the local */ +.br +"clc" /* clear carry for addition */ +.br +"lda (LBl),y" /* load indirect lowbyte of local */ +.br +"adc #1" /* increment lowbyte */ +.br +"sta (LBl),y" /* restore indirect the incremented lowbyte */ +.br +"bcc 1f" /* if carry is clear then ready */ +.br +"iny" /* increment offset of local */ +.br +"lda (LBl),y" /* load indirect highbyte of local */ +.br +"adc #0" /* add carry to highbyte */ +.br +"sta (LBl),y\\n1:" /* restore indirect the highbyte */ +.PP +If the offset of the local or parameter is to big, first the +local or parameter is fetched, than incremented, and then +restored. +.NH 2 +Convert instructions. +.PP +In this case there are two convert instructions +which really do something. +One of them is in line code, and deals with the extension of +a character (1-byte) to an integer. +The other one is a subroutine which handles the conversion +between 2-byte integers and 4-byte integers. +.NH 3 +The in line conversion. +.PP +The table content is: +.sp 1 +.br +.B +loc loc cii +.R +$1==1 && $2==2 | R16 | +.br +"txa" /* see if sign extension is needed */ +.br +"bpl 1f" /* there is no need for sign extension */ +.br +"lda #0FFh" /* sign extension here */ +.br +"bne 2f" /* conversion ready */ +.br +"1: lda #0\\n2:" /* no sign extension here */ +.NH 2 +Logical instructions. +.PP +A typical instruction in this group is the logical +.B +and +.R +on two 2-byte words. +The logical +.B +and +.R +on groups of more than two bytes (max 254) +is also possible and uses a library subroutine. +.NH 3 +The logical and on 2-byte groups. +.PP +The table content is: +.sp 1 +.br +.B +and +.R +$1==2 | R16 | /* one group must be on the fake stack */ +.br +"sta ARTH+1" /* temporary save of first group highbyte */ +.br +"stx ARTH" /* temporary save of first group lowbyte */ +.br +"jsr Pop" /* pop second group from the stack */ +.br +"and ARTH+1" /* logical and on highbytes */ +.br +"pha" /* temporary save the result's highbyte */ +.br +"txa" /* logical and can only be done in A */ +.br +"and ARTH" /* logical and on lowbytes */ +.br +"tax" /* restore results lowbyte */ +.br +"pla" /* restore results highbyte */ +.br +| %[1] | | /* push result onto fake stack */ +.NH 2 +Set manipulation instructions. +.PP +A typical EM pattern in this group is +.B +loc inn zeq +.R +$1>0 && $1<16 && $2==2. +This EM pattern works on sets of 16 bits. +Sets can be bigger (max 256 bytes = 2048 bits), but than a +library routine is used instead of in line code. +The table content of the above EM pattern is: +.sp 1 +.br +.B +loc inn zeq +.R +$1>0 && $1<16 && $2==2 | R16 | +.br +"ldy #$1+1" /* load Y with bit number */ +.br +"stx ARTH" /* cannot rotate X, so use zero page */ +.br +"1: lsr a" /* right shift A */ +.br +"ror ARTH" /* right rotate zero page location */ +.br +"dey" /* decrement Y */ +.br +"bne 1b" /* shift $1 times */ +.br +"bcc $1" /* no carry, so bit is zero */ +.NH 2 +Array instructions. +.PP +In this group a typical EM pattern is +.B +lae lar +.R +defined(rom(1,3)) | | | | +.B +lae +.R +$1 +.B +aar +.R +$2 +.B +loi +.R +rom(1,3). +This pattern uses the +.B +aar +.R +instruction, which is part of a typical EM pattern: +.sp 1 +.br +.B +lae aar +.R +$2==2 && rom(1,3)==2 && rom(1,1)==0 | R16 | /* registerpair AX contains +the index in the array */ +.br +"pha" /* save highbyte of index */ +.br +"txa" /* move lowbyte of index to A */ +.br +"asl a" /* shift left lowbyte == 2 times lowbyte */ +.br +"tax" /* restore lowbyte */ +.br +"pla" /* restore highbyte */ +.br +"rol a" /* rotate left highbyte == 2 times highbyte */ +.br +| %[1] | adi 2 | /* push new index, add to lowerbound array */ +.NH 2 +Compare instructions. +.PP +In this group all EM patterns are performed by calling +a subroutine. +Subroutines are used here because comparison is only +possible byte by byte. +This means a lot of code, and since compare are used frequently +a lot of in line code would be generated, and thus reducing +the space left for the software stack. +These subroutines can be found in the library. +.NH 2 +Branch instructions. +.PP +A typical branch instruction is +.B +beq. +.R +The table content for it is: +.sp 1 +.br +.B +beq +.R +| R16 | +.br +"sta BRANCH+1" /* save highbyte second operand in zero page */ +.br +"stx BRANCH" /* save lowbyte second operand in zero page */ +.br +"jsr Pop" /* pop the first operand */ +.br +"cmp BRANCH+1" /* compare the highbytes */ +.br +"bne 1f" /* there not equal so go on */ +.br +"cpx BRANCH" /* compare the lowbytes */ +.br +"beq $1\\n1:" /* lowbytes are also equal, so branch */ +.PP +Another typical instruction in this group is +.B +zeq. +.R +The table content is: +.sp 1 +.br +.B +zeq +.R +| R16 | +.br +"tay" /* move A to Y for setting testbits */ +.br +"bmi $1" /* highbyte s minus so branch */ +.br +"txa" /* move X to A for setting testbits */ +.br +"beq $1\\n1:" /* lowbyte also zero, thus branch */ +.NH 2 +Procedure call instructions. +.PP +In this group one code generation might seem a little +akward. +It is the EM instruction +.B +cai +.R +which generates a 'jsr Indir'. +This is because there is no indirect jump_subroutine in the +MCS6500. +The only solution is to store the address in zero page, and then +do a 'jsr' to a known label. +At this label there must be an indirect jump instruction, which +perform a jump to the address stored in zero page. +In this case the label is Indir, and the address is stored in +zero page at the addresses ADDR, ADDR+1. +The tabel content is: +.sp 1 +.br +.B +cai +.R +| R16 | +.br +"stx ADDR" /* store lowbyte of address in zero page */ +.br +"sta ADDR+1" /* store highbyte of address in zero page */ +.br +"jsr Indir" /* use the indirect jump */ +.br +| | | +.NH 2 +Miscellaneous instructions. +.PP +In this group, as the name suggests, there is no +typical EM instruction or EM pattern. +Most of the MCS6500 code to be generated uses a library subroutine +or is straightforward. +.DS C +.B +PERFORMANCE. +.R +.DE +.NH 0 +Introduction. +.PP +To measure the performance of the back end table some timing +tests are done. +What to time? +In this case, the execution time of several Pascal statements +are timed. +Statements in C, which have a Pascal equivalence are timed also. +The statements are timed as follows. +A test program is been written, which executes two +nested for_loops from 1 to 1.000. +Within these for_loops the statement, which is to be tested, is placed, +so the statement will be executed 1.000.000 times. +Then the same program is executed without the test statement. +The time difference between the two executions is the time +neccesairy to execute the test statement 1.000.000 times. +The total time to execute the test statement requires thus the +time difference divided by 1.000.000. +.NH 0 +Testing Pascal statements. +.PP +The next statements are tested. +.IP 1) +int1 := 0; +.IP 2) +int1 := int2 - 1; +.IP 3) +int1 := int1 + 1; +.IP 4) +int1 := icon1 - icon2; +.IP 5) +int1 := icon2 div icon1; +.IP 6) +int1 := int2 * int3; +.IP 7) +bool := (int1 < 0); +.IP 8) +bool := (int1 < 3); +.IP 9) +bool := ((int1 > 3) or (int1 < 3)) +.IP 10) +case int1 of 1: bool := false; 2: bool := true end; +.IP 11) +if int1 = 0 then int2 := 3; +.IP 12) +while int1 > 0 do int1 := int1 - 1; +.IP 13) +m := a[k]; +.IP 14) +let2 := ['a'..'c']; +.IP 15) +P3(x); +.IP 16) +dum := F3(x); +.IP 17) +s.overhead := 5400; +.IP 18) +with s do overhead := 5400; +.PP +These statement were tested in a procedure test. +.sp 1 +.br +procedure test; +.br +var i, j, ... : integer; +.br + bool : boolean; +.br + let2 : set of char; +.br +begin +.br + for i := 1 to 1000 +.br + for j := 1 to 1000 +.br + STATEMENT +.br +end; +.sp 1 +.PP +STATEMENT is one of the statements as shown above, or it is +the empty statement. +The assignment of used variables, if neccesairy, is done before +the first for_loop. +In case of the statement which uses the procedure call, statement +15, a dummy procedure is declared whose body is empty. +In case of the statement which uses the function, statement 16, +this function returns its argument. +for the timing of C statements a similar test program was +written. +.sp 1 +.br +main() +.br +{ +.br + int i, j, ...; +.br + for (i = 1; i <= 1000; i++) +.br + for (j = 1; j <= 1000; j++) +.br + STATEMENT +.br +} +.sp 1 +.NH +The results. +.PP +Here are tables with the results of the time measurments. +Times are in microseconds (10^-6). +Some statements appear twice in the tables. +In the second case an array of 200 integers was declerated +before the variable to be tested, so this variable cannot +be accessed by indirect addressing from the second local base. +This results in a larger execution time of the statement to be +tested. +The column 68000 contains the times measured on a Bleasdale, +M68000 based, computer. +The times in column pdp are measured on a DEC pdp11/44, where +the times from column 6500 come from a BBC microcomputer. +.bp +.TS +expand; +c s s s +c c c c +lw35 nw7 nw7 nw7. +Pascal timing results +statement 68000 pdp 6500 +_ +T{ +int1 := 0; +T} 4.0 5.8 16.7 + 4.0 4.2 97.8 +_ +T{ +int1 := int2 - 1; +T} 7.2 7.1 27.2 + 6.9 7.1 206.5 +_ +T{ +int1 := int1 + 1; +T} 6.9 6.8 27.2 + 6.4 6.7 106.5 +_ +T{ +int1 := icon1 + icon2; +T} 6.2 6.2 25.6 + 6.2 6.0 106.6 +_ +T{ +int1 := icon2 div icon1; +T} 14.9 14.3 372.6 + 14.9 14.7 453.7 +_ +T{ +int1 := int2 * int3; +T} 11.5 12.0 558.1 + 11.3 11.6 728.6 +_ +T{ +bool := (int1 < 0); +T} 7.2 6.9 122.8 + 7.8 8.1 453.2 +_ +T{ +bool := (int1 < 3); +T} 7.3 7.6 126.0 + 7.2 8.1 232.2 +_ +T{ +bool := ((int1 > 3) or (int1 < 3)) +T} 10.1 12.0 307.8 + 10.2 11.9 440.1 +_ +T{ +case int1 of 1: bool := false; 2: bool := true end; +T} 18.3 17.9 165.7 +_ +T{ +if int1 = 0 then int2 := 3; +T} 9.5 8.5 133.8 +_ +T{ +while int1 > 0 do int1 := int1 - 1; +T} 6.9 6.9 126.0 +_ +T{ +m := a[k]; +T} 7.2 6.8 134.3 +_ +T{ +let2 := ['a'..'c']; +T} 38.4 38.8 447.4 +_ +T{ +P3(x); +T} 18.9 18.8 180.3 +_ +T{ +dum := F3(x); +T} 26.8 27.1 343.3 +_ +T{ +s.overhead := 5400; +T} 4.6 4.1 16.7 +_ +T{ +with s do overhead := 5400; +T} 4.2 4.3 16.7 +.TE +.TS +expand; +c s s s +c c c c +lw35 nw7 nw7 nw7. +C timing results +statement 68000time pdptime 6500time +_ +T{ +int1 = 0; +T} 4.1 3.6 17.2 + 4.1 4.1 97.7 +_ +T{ +int1 = int2 - 1; +T} 6.6 6.9 27.2 + 6.1 6.5 206.4 +_ +T{ +int1 = int1 + 1; +T} 6.4 7.3 27.2 + 6.3 6.2 206.4 +_ +T{ +int1 = int2 * int3; +T} 11.4 12.3 522.6 + 9.6 10.1 721.2 +_ +T{ +int1 = (int2 < 0); +T} 7.2 7.6 126.4 + 7.4 7.7 232.5 +_ +T{ +int1 = (int2 < 3); +T} 7.0 7.5 126.0 + 7.8 7.8 232.6 +_ +T{ +int1 = ((int2 > 3) || (int2 < 3)); +T} 11.8 12.2 193.4 + 11.5 13.2 245.6 +_ +T{ +switch (int1) { case 1: int1 = 0; break; case 2: int1 = 1; break; } +T} 28.3 29.2 164.1 +_ +T{ +if (int1 == 0) int2 = 3; +T} 4.8 4.8 19.4 +_ +T{ +while (int2 > 0) int2 = int2 - 1; +T} 5.8 6.0 125.9 +_ +T{ +int2 = a[int2]; +T} 4.8 5.1 192.8 +_ +T{ +P3(int2); +T} 18.8 18.4 180.3 +_ +T{ +int2 = F3(int2); +T} 27.0 27.2 309.4 +_ +T{ +s.overhead = 5400; +T} 5.0 4.1 16.7 +.TE +.NH +Pascal statements which don't have a C equivalent. +.PP +At first, the two statements who perform an operation on constants +are left out. +These are left out while the C front end does constant folding, +while the Pascal front end doesn't. +So in C the statements int1 = icon1 + icon2; and int1 = icon1 / icont2; +will use the same amount of time since the expression is evaluated +by the front end. +The two other statements (let2 := ['a'..'c']; and +.B +with +.R +s +.B +do +.R +overhead := 5400;), aren't included in the C statement timing table, +because there constructs do not exist in C. +Although in C there can be direct bit manipulation, and thus can +be used to implement sets I have not used it here. +The +.B +with +.R +statement does not exists in C and there is nothing with the slightest +resemblance to it. +.PP +At first sight in the table , it looked if there is no much difference +in the times for the M68000 and the pdp11/44, in comparison with the +times needed by the MCS6500. +To verify this impression, I calculated the correlation coefficient +between the times of the M68000 and pdp11/44. +It turned out to be 0.997 for both the Pascal time tests and the C +time tests. +Since the correlation coefficient is near to one and the difference +between the times is small, they can be considered to be the same +as seen from the times of the MCS6500. +Then I have tried to make a grafic of the times from the M68000 and +the MCS6500. +Well, there was't any correlation to been seen, taken all the times. +The only correlation one could see, with some effort, was in the +times for the first three Pascal statements. +The two first C statements show also a correlation, which two points +always do. +.PP +Also the three Pascal statements +.B +case +.R +, +.B +if +.R +, +and +.B +while +.R +have a correlation coefficient of 0.999. +This is probably because the +.B +case +.R +statement uses a subroutine in both cases and the other two +statements +.B +if +.R +and, +.B +while +.R +generate in line code. +The last two Pascal statements use the same time, since the front +end wil generate the same EM code for both. +.PP +The independence between the rest of the test times is because +in these cases the object code for the MCS6500 uses library +subroutines, while the other processors can handle the EM code +with in line code. +.PP +It is clear that the MCS6500 is a slower device, it needs longer +execution times, the need of more library subroutines, but +there is no constant factor between it execution times and those +of other processors. +.PP +The slowing down of the MCS6500 as result of the need of a +library subroutine is illustrated by the muliplication +statement. +The MCS6500 needs a library subroutine, while the other +two processors have a machine instruction to perform the +multiply. +This results in a factor of 48.5, when the operands can be accessed +indirect by the MCS6500. +When the MCS6500 cannot access the operands indirectly the situation +is even worse. +The slight differences between the MCS6500 execution times for +Pascal statements and C statements is probably the result of the +front end, and thus beyond the scope of this discussion. +.PP +Another timing test is done in C on the statement k = i + j + 1983. +This statement is tested on many UNIX* +.FS +* UNIX is a Trademark of Bell Laboratories. +.FE +systems. +For a complete list see appendix A. +The slowest one is the IBM XT, which runs on a 8088 microprocessor. +The fasted one is the Amdahl computer. +Here is short table to illustrate the performance of the +MCS6500. +.TS +c c c +c n n. +machine short int +IBM XT 53.4 53.4 +Amdahl 0.5 0.3 +MCS6500 150.2 150.2 +.TE +The MCS6500 is three times slower than the IBM XT, but threehundred +times slower than the Amdahl. +The reason why the times on the IBM XT and the MCS6500 are the +same for short's and int's, is that most C compilers make the types +short and integer the same size on 16-bit machines. +In this project the MCS6500 is regarded as a 16-bit machine. +.NH +Length tests. +.PP +I have also compiled several programs written in Pascal and C to +see if there is a resemblance between the number of bytes generated +in the machine's language. +In the tables: +.IP length: 9 +The number of bytes of the source program. +.IP 68000: +The number of bytes of the a.out file for a M68000. +.IP pdp: +The number of bytes of the a.out file for a pdp11/44. +.IP 6500: +The number of bytes of the a.out file for a MCS6500. +.LP +These are the results: +.TS +c s s s +c c c c +n n n n. +Pascal programs +length 68000 pdp 6500 +_ +19946 14383 16090 26710 +19484 20169 20190 35416 +10849 10469 11464 18949 +273 4221 5106 7944 +1854 5807 6610 10301 +.TE +.TS +c s s s +c c c c +n n n n. +C progams +length 68000 pdp 6500 +_ +9444 6927 8234 11559 +7655 14353 18240 26251 +4775 11309 15934 19910 +639 6337 9660 12494 +.TE +.PP +In contrast to the execution times of the test statements, the +object code files sizes show a constant factor between them. +After calculating the correlation coefficient, I have calculated +the line fitted between sizes. +.FS +* x is the number of bytes +.FE +.TS +c s s +c c c +l c c. +Pascal programs +processor corr. coef. fitted line +_ +68000-pdp 0.996 +68000-6500 0.999 1.76x + 502* +pdp-6500 0.999 1.80x - 1577 +.TE +.TS +c s s +c c c +l c c. +C programs +processor corr. coef. fitted line +_ +68000-pdp 0.974 +68000-6500 0.992 1.80x + 502* +pdp-6500 0.980 1.40x - 1577 +.TE +.PP +As seen from the tables above the correlation coefficient for +Pascal programs is better than the ones for C programs. +Thus the line fits best for Pascal programs. +With the formula of the best fitted line one can now estimate +the size of the object code, which a program needs, for a MCS6500 +without having the compiler at hand. +One also can see from these formula that the object code +generated for a MCS6500 is about 1.8 times more than for the other +processors. +Since the number of bytes in the source file havily depends on the +programmer, how many spaces he or she uses, the size of the indenting +in structured programs, etc., there is no correlation between the +size of the source file and the size of the object file. +Also the use of comments has its influence on the size. +.bp +.DS C +.B +SUMMARY. +.R +.DE +.NH 0 +Summary +.PP +In this chapter some final conclusions are made. +.PP +In spite of its simplicity, the MCS6500 is strong enough to +implement a EM machine. +A serious deficy of the MCS6500 is the missing of 16-bit +general purpose registers, and especially the missing of a +16-bit stackpointer. +As pointed out before, one 16-bit register can be simulated +by a pair of 8-bit registers, in fact, the accumulator A to +hold the highbyte, and the index register X to hold the lowbyte +of the word. +By lack of a 16-bit stackpointer, zero page must be used to hold +a stackpointer and there are also two subroutines needed for +manipulating the stack (Push and Pop). +.PP +As seen at the time tests, the simple instruction set of the +MCS6500 forces the use of library subroutines. +These library subroutines increas the execution time of the +programs. +.PP +The sizes of the object code files show a strong correlation +in contrast to the execution times. +With this correlatiuon one canestimate the size of a program +if it is to be used on a MCS6500. +.bp +.NH 0 +.B +REFERENCES. +.R +.IP 1. +Osborn, A., Jacobson, S., and Kane, J. The Mos Technology MCS6500. +.B +An Introduction to Microcomputers , +.R +Volume II, Some Real Products (june 1977) chap. 9. +.RS +.PP +A hardware description of some real existing CPU's, such as +the Intel Z80, MCS6500, etc. is given in this book. +.RE +.IP 2. +van Staveren, H. +The table driven code generator from the Amsterdam Compiler Kit. +Vrije Universiteit, Amsterdam, (July 11, 1983). +.RS +.PP +The defining document for writing a back end table. +.RE +.IP 3. +Tanenbaum, A.S. Structured Computer Organization. +Prentice Hall. (1976). +.RS +.PP +In this book computers are described as a hierarchy of levels, +with each one performing some well-defined function. +.RE diff --git a/doc/LLgen/LLgen.n b/doc/LLgen/LLgen.n new file mode 100644 index 0000000..35d6aec --- /dev/null +++ b/doc/LLgen/LLgen.n @@ -0,0 +1,1077 @@ +.\" $Id: LLgen.n,v 1.16 1994/12/20 12:40:21 ceriel Exp $ +.\" Run this paper off with +.\" refer [options] -p LLgen.refs LLgen.doc | [n]eqn | tbl | (nt)roff -ms +.if '\*(>.'' \{\ +. if '\*(<.'' \{\ +. if n .ds >. . +. if n .ds >, , +. if t .ds <. . +. if t .ds <, ,\ +\}\ +\} +.cs 5 22u +.ND +.EQ +delim @@ +.EN +.TL +LLgen, an extended LL(1) parser generator +.AU +Ceriel J. H. Jacobs +.AI +Dept. of Mathematics and Computer Science +Vrije Universiteit +Amsterdam, The Netherlands +.AB +\fILLgen\fR provides a +tool for generating an efficient recursive descent parser +with no backtrack from +an Extended Context Free syntax. +The \fILLgen\fR +user specifies the syntax, together with code +describing actions associated with the parsing process. +\fILLgen\fR +turns this specification into a number of subroutines that handle the +parsing process. +.PP +The grammar may be ambiguous. +\fILLgen\fR contains both static and dynamic facilities +to resolve these ambiguities. +.PP +The specification can be split into several files, for each of +which \fILLgen\fR generates an output file containing the +corresponding part of the parser. +Furthermore, only output files that differ from their previous +version are updated. +Other output files are not affected in any +way. +This allows the user to recompile only those output files that have +changed. +.PP +The subroutine produced by \fILLgen\fR calls a user supplied routine +that must return the next token. This way, the input to the +parser can be split into single characters or higher level +tokens. +.PP +An error recovery mechanism is generated almost completely +automatically. +It is based on so called \fBdefault choices\fR, which are +implicitly or explicitly specified by the user. +.PP +\fILLgen\fR has succesfully been used to create recognizers for +Pascal, C, and Modula-2. +.AE +.NH +Introduction +.PP +\fILLgen\fR +provides a tool for generating an efficient recursive +descent parser with no backtrack from an Extended Context Free +syntax. +A parser generated by +\fILLgen\fR +will be called +\fILLparse\fR +for the rest of this document. +It is assumed that the reader has some knowledge of LL(1) grammars and +recursive descent parsers. +For a survey on the subject, see reference +.[ ( +griffiths +.]). +.PP +Extended LL(1) parsers are an extension of LL(1) parsers. They are +derived from an Extended Context-Free (ECF) syntax instead of a Context-Free +(CF) syntax. +ECF syntax is described in section 2. +Section 3 provides an outline of a +specification as accepted by +\fILLgen\fR and also discusses the lexical conventions of +grammar specification files. +Section 4 provides a description of the way the +\fILLgen\fR +user can associate +actions with the syntax. These actions must be written in the programming +language C, +.[ +kernighan ritchie +.] +which also is the target language of \fILLgen\fR. +The error recovery technique is discussed in section 5. +This section also discusses what the user can do about it. +Section 6 discusses +the facilities \fILLgen\fR offers +to resolve ambiguities and conflicts. +\fILLgen\fR offers facilities to resolve them both at parser +generation time and during the execution of \fILLparse\fR. +Section 7 discusses the +\fILLgen\fR +working environment. +It also discusses the lexical analyzer that must be supplied by the +user. +This lexical analyzer must read the input stream and break it +up into basic input items, called \fBtokens\fR for the rest of +this document. +Appendix A gives a summary of the +\fILLgen\fR +input syntax. +Appendix B gives an example. +It is very instructive to compare this example with the one +given in reference +.[ ( +yacc +.]). +It demonstrates the struggle \fILLparse\fR and other LL(1) +parsers have with expressions. +Appendix C gives an example of the \fILLgen\fR features +allowing the user to recompile only those output files that +have changed, using the \fImake\fR program. +.[ +make +.] +.NH +The Extended Context-Free Syntax +.PP +The extensions of an ECF syntax with respect to an ordinary CF syntax are: +.IP 1. 10 +An ECF syntax contains the repetition operator: "N" (N represents a positive +integer). +.IP 2. 10 +An ECF syntax contains the closure set operator without and with +upperbound: "*" and "*N". +.IP 3. 10 +An ECF syntax contains the positive closure set operator without and with +upperbound: "+" and "+N". +.IP 4. 10 +An ECF syntax contains the optional operator: "?", which is a +shorthand for "*1". +.IP 5. 10 +An ECF syntax contains parentheses "[" and "]" which can be +used for grouping. +.PP +We can describe the syntax of an ECF syntax with an ECF syntax : +.DS +.ft CW +grammar : rule + + ; +.ft R +.DE +This grammar rule states that a grammar consists of one or more +rules. +.DS +.ft CW +rule : nonterminal ':' productionrule ';' + ; +.ft R +.DE +A rule consists of a left hand side, the nonterminal, +followed by ":", +the \fBproduce symbol\fR, followed by a production rule, followed by a +";", in\%di\%ca\%ting the end of the rule. +.DS +.ft CW +productionrule : production [ '|' production ]* + ; +.ft R +.DE +A production rule consists of one or +more alternative productions separated by "|". This symbol is called the +\fBalternation symbol\fR. +.DS +.ft CW +production : term * + ; +.ft R +.DE +A production consists of a possibly empty list of terms. +So, empty productions are allowed. +.DS +.ft CW +term : element repeats + ; +.ft R +.DE +A term is an element, possibly with a repeat specification. +.DS +.ft CW +element : LITERAL + | IDENTIFIER + | '[' productionrule ']' + ; +.ft R +.DE +An element can be a LITERAL, which basically is a single character +between apostrophes, it can be an IDENTIFIER, which is either a +nonterminal or a token, and it can be a production rule +between square parentheses. +.DS +.ft CW +repeats : '?' + | [ '*' | '+' ] NUMBER ? + | NUMBER ? + ; +.ft R +.DE +These are the repeat specifications discussed above. Notice that +this specification may be empty. +.PP +The class of ECF languages +is identical with the class of CF languages. However, in many +cases recursive definitions of language features can now be +replaced by iterative ones. This tends to reduce the number of +nonterminals and gives rise to very efficient recursive descent +parsers. +.NH +Grammar Specifications +.PP +The major part of a +\fILLgen\fR +grammar specification consists of an +ECF syntax specification. +Names in this syntax specification refer to either tokens or nonterminal +symbols. +\fILLgen\fR +requires token names to be declared as such. This way it +can be avoided that a typing error in a nonterminal name causes it to +be accepted as a token name. The token declarations will be +discussed later. +A name will be regarded as a nonterminal symbol, unless it is declared +as a token name. +If there is no production rule for a nonterminal symbol, \fILLgen\fR +will complain. +.PP +A grammar specification may also include some C routines, +for instance the lexical analyzer and an error reporting +routine. +Thus, a grammar specification file can contain declarations, +grammar rules and C-code. +.PP +Blanks, tabs and newlines are ignored, but may not appear in names or +keywords. +Comments may appear wherever a name is legal (which is almost +everywhere). +They are enclosed in +/* ... */, as in C. Comments do not nest. +.PP +Names may be of arbitrary length, and can be made up of letters, underscore +"\_" and non-initial digits. Upper and lower case letters are distinct. +Only the first 50 characters are significant. +Notice however, that the names for the tokens will be used by the +C-preprocessor. +The number of significant characters therefore depends on the +underlying C-implementation. +A safe rule is to make the identifiers distinct in the first six +characters, case ignored. +.PP +There are two kinds of tokens: +those that are declared and are denoted by a name, +and literals. +.PP +A literal consists of a character enclosed in apostrophes "'". +The "\e" is an escape character within literals. The following escapes +are recognized : +.TS +center; +l l. +\&'\en' newline +\&'\er' return +\&'\e'' apostrophe "'" +\&'\e\e' backslash "\e" +\&'\et' tab +\&'\eb' backspace +\&'\ef' form feed +\&'\exxx' "xxx" in octal +.TE +.PP +Names representing tokens must be declared before they are used. +This can be done using the "\fB%token\fR" keyword, +by writing +.nf +.ft CW +.sp 1 +%token name1, name2, . . . ; +.ft R +.fi +.PP +\fILLparse\fR is designed to recognize special nonterminal +symbols called \fBstart symbols\fR. +\fILLgen\fR allows for more than one start symbol. +Thus, grammars with more than one entry point are accepted. +The start symbols must be declared explicitly using the +"\fB%start\fR" keyword. It can be used whenever a declaration is +legal, f.i.: +.nf +.ft CW +.sp 1 +%start LLparse, specification ; +.ft R +.fi +.sp 1 +declares "specification" as a start symbol and associates the +identifier "LLparse" with it. +"LLparse" will now be the name of the C-function that must be +called to recognize "specification". +.NH +Actions +.PP +\fILLgen\fR +allows arbitrary insertions of actions within the right hand side +of a production rule in the ECF syntax. An action consists of a number of C +statements, enclosed in the brackets "{" and "}". +.PP +\fILLgen\fR +generates a parsing routine for each rule in the grammar. The actions +supplied by the user are just inserted in the proper place. +There may also be declarations before the statements in the +action, as +the "{" and "}" are copied into the target code along with the +action. The scope of these declarations terminates with the +closing bracket "}" of the action. +.PP +In addition to actions, it is also possible to declare local variables +in the parsing routine, which can then be used in the actions. +Such a declaration consists of a number of C variable declarations, +enclosed in the brackets "{" and "}". It must be placed +right in front of the ":" in the grammar rule. +The scope of these local variables consists of the complete +grammar rule. +.PP +In order to facilitate communication between the actions and +\fILLparse\fR, +the parsing routines can be given C-like parameters. +Each parameter must be declared separately, and each of these declarations must +end with a semicolon. +For the last parameter, the semicolon is optional. +.PP +So, for example +.nf +.ft CW +.sp 1 +expr(int *pval;) { int fact; } : + /* + * Rule with one parameter, a pointer to an int. + * Parameter specifications are ordinary C declarations. + * One local variable, of type int. + */ + factor (&fact) { *pval = fact; } + /* + * factor is another nonterminal symbol. + * One actual parameter is supplied. + * Notice that the parameter passing mechanism is that + * of C. + */ + [ '+' factor (&fact) { *pval += fact; } ]* + /* + * remember the '*' means zero or more times + */ + ; +.sp 1 +.ft R +.fi +is a rule to recognize a number of factors, separated by "+", and +to compute their sum. +.PP +\fILLgen\fR +generates C code, so the parameter passing mechanism is that of +C, as is shown in the example above. +.PP +Actions often manipulate attributes of the token just read. +For instance, when an identifier is read, its name must be +looked up in a symbol table. +Therefore, \fILLgen\fR generates code +such that at a number of places in the grammar rule +it is defined which token has last been read. +After a token, the last token read is this token. +After a "[" or a "|", the last token read is the next token to +be accepted by \fILLparse\fR. +At all other places, it is undefined which token has last been +read. +The last token read is available in the global integer variable +\fILLsymb\fR. +.PP +The user may also specify C-code wherever a \fILLgen\fR-declaration is +legal. +Again, this code must be enclosed in the brackets "{" and "}". +This way, the user can define global declarations and +C-functions. +To avoid name-conflicts with identifiers generated by +\fILLgen\fR, \fILLparse\fR only uses names beginning with +"LL"; the user should avoid such names. +.NH +Error Recovery +.PP +The error recovery technique used by \fILLgen\fR is a +modification of the one presented in reference +.[ ( +automatic construction error correcting +.]). +It is based on \fBdefault choices\fR, which just are +what the word says, default choices at +every point in the grammar where there is a +choice. +Thus, in an alternation, one of the productions is marked as a +default choice, and in a term with a non-fixed repetition +specification there will also be a default choice (between +doing the term (once more) and continuing with the rest of the +production in which the term appears). +.PP +When \fILLparse\fR detects an error after having parsed the +string @s@, the default choices enable it to compute one +syntactically correct continuation, +consisting of the tokens @t sub 1~...~t sub n@, +such that @s~t sub 1~...~t sub n@ is a string of tokens that +is a member of the language defined by the grammar. +Notice, that the computation of this continuation must +terminate, which implies that the default choices may not +invoke recursive rules. +.PP +At each point in this continuation, a certain number of other +tokens could also be syntactically correct, f.i. the token +@t@ is syntactically correct at point @t sub i@ in this +continuation, if the string @s~t sub 1~...~t sub i~t~s sub 1@ +is a string of the language defined by the grammar for some +string @s sub 1@ and i >= 0. +.PP +The set @T@ +containing all these tokens (including @t sub 1 ,~...,~t sub n@) is computed. +Next, \fILLparse\fR discards zero +or more tokens from its input, until a token +@t@ \(mo @T@ is found. +The error is then corrected by inserting i (i >= 0) tokens +@t sub 1~...~t sub i@, such that the string +@s~t sub 1~...~t sub i~t~s sub 1@ is a string of the language +defined by the grammar, for some @s sub 1@. +Then, normal parsing is resumed. +.PP +The above is difficult to implement in a recursive decent +parser, and is not the way \fILLparse\fR does it, but the +effect is the same. In fact, \fILLparse\fR maintains a list +of tokens that may not be discarded, which is adjusted as +\fILLparse\fR proceeds. This list is just a representation +of the set @T@ mentioned +above. When an error occurs, \fILLparse\fR discards tokens until +a token @t@ that is a member of this list is found. +Then, it continues parsing, following the default choices, +inserting tokens along the way, until this token @t@ is legal. +The selection of +the default choices must guarantee that this will always +happen. +.PP +The default choices are explicitly or implicitly +specified by the user. +By default, the default choice in an alternation is the +alternative with the shortest possible terminal production. +The user can select one of the other productions in the +alternation as the default choice by putting the keyword +"\fB%default\fR" in front of it. +.PP +By default, for terms with a repetition count containing "*" or +"?" the default choice is to continue with the rest of the rule +in which the term appears, and +.sp 1 +.ft CW +.nf + term+ +.fi +.ft R +.sp 1 +is treated as +.sp 1 +.nf +.ft CW + term term* . +.ft R +.fi +.PP +It is also clear, that it can never be the default choice to do +the term (once more), because this could cause the parser to +loop, inserting tokens forever. +However, when the user does not want the parser to skip +tokens that would not have been skipped if the term +would have been the default choice, +the skipping of such a term can be prevented by +using the keyword "\fB%persistent\fR". +For instance, the rule +.sp 1 +.ft CW +.nf +commandlist : command* ; +.fi +.ft R +.sp 1 +could be changed to +.sp 1 +.ft CW +.nf +commandlist : [ %persistent command ]* ; +.fi +.ft R +.sp 1 +The effects of this in case of a syntax error are twofold: +The set @T@ mentioned above will be extended as if "command" were +in the default production, so that fewer tokens will be +skipped. +Also, if the first token that is not skipped is a member of the +subset of @T@ arising from the grammar rule for "command", +\fILLparse\fR will enter that rule. +So, in fact the default choice +is determined dynamically (by \fILLparse\fR). +Again, \fILLgen\fR checks (statically) +that \fILLparse\fR will always terminate, and if not, +\fILLgen\fR will complain. +.PP +An important property of this error recovery method is that, +once a rule is started, it will be finished. +This means that all actions in the rule will be executed +normally, so that the user can be sure that there will be no +inconsistencies in his data structures because of syntax +errors. +Also, as the method is in fact error correcting, the +actions in a rule only have to deal with syntactically correct +input. +.NH +Ambiguities and conflicts +.PP +As \fILLgen\fR generates a recursive descent parser with no backtrack, +it must at all times be able to determine what to do, +based on the current input symbol. +Unfortunately, this cannot be done for all grammars. +Two kinds of conflicts can arise : +.IP 1) 10 +the grammar rule is of the form "production1 | production2", +and \fILLparse\fR cannot decide which production to chose. +This we call an \fBalternation conflict\fR. +.IP 2) 10 +the grammar rule is of the form "[ productionrule ]...", +where ... specifies a non-fixed repetition count, +and \fILLparse\fR cannot decide whether to +choose "productionrule" once more, or to continue. +This we call a \fBrepetition conflict\fR. +.PP +There can be several causes for conflicts: the grammar may be +ambiguous, or the grammar may require a more complex parser +than \fILLgen\fR can construct. +The conflicts can be examined by inspecting the verbose +(-\fBv\fR) option output file. +The conflicts can be resolved by rewriting the grammar +or by using \fBconflict resolvers\fR. +The mechanism described here is based on the attributed parsing +of reference +.[ ( +milton +.]). +.PP +An alternation conflict can be resolved by putting an \fBif condition\fR +in front of the first conflicting production. +It consists of a "\fB%if\fR" followed by a +C-expression between parentheses. +\fILLparse\fR will then evaluate this expression whenever a +token is met at this point on which there is a conflict, so +the conflict will be resolved dynamically. +If the expression evaluates to +non-zero, the first conflicting production is chosen, +otherwise one of the remaining ones is chosen. +.PP +An alternation conflict can also be resolved using the keywords +"\fB%prefer\fR" or "\fB%avoid\fR". "\fB%prefer\fR" +is equivalent in behaviour to +"\fB%if\fR (1)". "\fB%avoid\fR" is equivalent to "\fB%if\fR (0)". +In these cases however, "\fB%prefer\fR" and "\fB%avoid\fR" should be used, +as they resolve the conflict statically and thus +give rise to better C-code. +.PP +A repetition conflict can be resolved by putting a \fBwhile condition\fR +right after the opening parentheses. This while condition +consists of a "\fB%while\fR" followed by a C-expression between +parentheses. Again, \fILLparse\fR will then +evaluate this expression whenever a token is met +at this point on which there is a conflict. +If the expression evaluates to non-zero, the +repeating part is chosen, otherwise the parser continues with +the rest of the rule. +Appendix B will give an example of these features. +.PP +A useful aid in writing conflict resolvers is the "\fB%first\fR" keyword. +It is used to declare a C-macro that forms an expression +returning 1 if the parameter supplied can start a specified +nonterminal, f.i.: +.sp 1 +.nf +.ft CW +%first fmac, nonterm ; +.ft R +.sp 1 +.fi +declares "fmac" as a macro with one parameter, whose value +is a token number. If the parameter +X can start the nonterminal "nonterm", "fmac(X)" is true, +otherwise it is false. +.NH +The LLgen working environment +.PP +\fILLgen\fR generates a number of files: one for each input +file, and two other files: \fILpars.c\fR and \fILpars.h\fR. +\fILpars.h\fR contains "#-define"s for the tokennames. +\fILpars.c\fR contains the error recovery routines and tables. +Only those output files that differ from their previous version +are updated. See appendix C for a possible application of this +feature. +.PP +The names of the output files are constructed as +follows: +in the input file name, the suffix after the last point is +replaced by a "c". If no point is present in the input file +name, ".c" is appended to it. \fILLgen\fR checks that the +filename constructed this way in fact represents a previous +version, or does not exist already. +.PP +The user must provide some environment to obtain a complete +program. +Routines called \fImain\fR and \fILLmessage\fR must be defined. +Also, a lexical analyzer must be provided. +.PP +The routine \fImain\fR must be defined, as it must be in every +C-program. It should eventually call one of the startsymbol +routines. +.PP +The routine \fILLmessage\fR must accept one +parameter, whose value is a token number, zero or -1. +.br +A zero parameter indicates that the current token (the one in +the external variable \fILLsymb\fR) is deleted. +.br +A -1 parameter indicates that the parser expected end of file, but didn't get +it. +The parser will then skip tokens until end of file is detected. +.br +A parameter that is a token number (a positive parameter) +indicates that this +token is to be inserted in front of the token currently in +\fILLsymb\fR. +The user can give the token the proper attributes. +Also, the user must take care, that the token currently in +\fILLsymb\fR is again returned by the \fBnext\fR call to the +lexical analyzer, with the proper attributes. +So, the lexical analyzer must have a facility to push back one +token. +.PP +The user may also supply his own error recovery routines, or handle +errors differently. For this purpose, the name of a routine to be called +when an error occurs may be declared using the keyword \fB%onerror\fR. +This routine takes two parameters. +The first one is either the token number of the +token expected, or 0. In the last case, the error occurred at a choice. +In both cases, the routine must ensure that the next call to the lexical +analyser returns the token that replaces the current one. Of course, +that could well be the current one, in which case +.I LLparse +recovers from the error. +The second parameter contains a list of tokens that are not skipped at the +error point. The list is in the form of a null-terminated array of integers, +whose address is passed. +.PP +The user must supply a lexical analyzer to read the input stream and +break it up into tokens, which are passed to +.I LLparse. +It should be an integer valued function, returning the token number. +The name of this function can be declared using the +"\fB%lexical\fR" keyword. +This keyword can be used wherever a declaration is legal and may appear +only once in the grammar specification, f.i.: +.sp 1 +.nf +.ft CW +%lexical scanner ; +.ft R +.fi +.sp 1 +declares "scanner" as the name of the lexical analyzer. +The default name for the lexical analyzer is "yylex". +The reason for this funny name is that a useful tool for constructing +lexical analyzers is the +.I Lex +program, +.[ +lex +.] +which generates a routine of that name. +.PP +The token numbers are chosen by \fILLgen\fR. +The token number for a literal +is the numerical value of the character in the local character set. +If the tokens have a name, +the "#\ define" mechanism of C is used to give them a value and +to allow the lexical analyzer to return their token numbers symbolically. +These "#\ define"s are collected in the file \fILpars.h\fR which +can be "#\ include"d in any file that needs the token-names. +The maximum token number chosen is defined in the macro \fILL_MAXTOKNO\fP. +.PP +The lexical analyzer must signal the end +of input to \fILLparse\fR +by returning a number less than or equal to zero. +.NH +Programs with more than one parser +.PP +\fILLgen\fR offers a simple facility for having more than one parser in +a program: in this case, the user can change the names of global procedures, +variables, etc, by giving a different prefix, like this: +.sp 1 +.nf +.ft CW +%prefix XX ; +.ft R +.fi +.sp 1 +The effect of this is that all global names start with XX instead of LL, for +the parser that has this prefix. This holds for the variables \fILLsymb\fP, +which now is called \fIXXsymb\fP, for the routine \fILLmessage\fP, +which must now be called \fIXXmessage\fP, and for the macro \fILL_MAXTOKNO\fP, +which is now called \fIXX_MAXTOKNO\fP. +\fILL.output\fP is now \fIXX.output\fP, and \fILpars.c\fP and \fILpars.h\fP +are now called \fIXXpars.c\fP and \fIXXpars.h\fP. +.bp +.SH +References +.[ +$LIST$ +.] +.bp +.SH +Appendix A : LLgen Input Syntax +.PP +This appendix has a description of the \fILLgen\fR input syntax, +as a \fILLgen\fR specification. As a matter of fact, the current +version of \fILLgen\fR is written with \fILLgen\fR. +.nf +.ft CW +.sp 2 +/* + * First the declarations of the terminals + * The order is not important + */ + +%token IDENTIFIER; /* terminal or nonterminal name */ +%token NUMBER; +%token LITERAL; + +/* + * Reserved words + */ + +%token TOKEN; /* %token */ +%token START; /* %start */ +%token PERSISTENT; /* %persistent */ +%token IF; /* %if */ +%token WHILE; /* %while */ +%token AVOID; /* %avoid */ +%token PREFER; /* %prefer */ +%token DEFAULT; /* %default */ +%token LEXICAL; /* %lexical */ +%token PREFIX; /* %prefix */ +%token ONERROR; /* %onerror */ +%token FIRST; /* %first */ + +/* + * Declare LLparse to be a C-routine that recognizes "specification" + */ + +%start LLparse, specification; + +specification + : declaration* + ; + +declaration + : START + IDENTIFIER ',' IDENTIFIER + ';' + | '{' + /* Read C-declaration here */ + '}' + | TOKEN + IDENTIFIER + [ ',' IDENTIFIER ]* + ';' + | FIRST + IDENTIFIER ',' IDENTIFIER + ';' + | LEXICAL + IDENTIFIER + ';' + | PREFIX + IDENTIFIER + ';' + | ONERROR + IDENTIFIER + ';' + | rule + ; + +rule : IDENTIFIER parameters? ldecl? + ':' productions + ';' + ; + +ldecl : '{' + /* Read C-declaration here */ + '}' + ; + +productions + : simpleproduction + [ '|' simpleproduction ]* + ; + +simpleproduction + : DEFAULT? + [ IF '(' /* Read C-expression here */ ')' + | PREFER + | AVOID + ]? + [ element repeats ]* + ; + +element : '{' + /* Read action here */ + '}' + | '[' [ WHILE '(' /* Read C-expression here */ ')' ]? + PERSISTENT? + productions + ']' + | LITERAL + | IDENTIFIER parameters? + ; + +parameters + : '(' /* Read C-parameters here */ ')' + ; + +repeats : /* empty */ + | [ '*' | '+' ] NUMBER? + | NUMBER + | '?' + ; + +.fi +.ft R +.bp +.SH +Appendix B : An example +.PP +This example gives the complete \fILLgen\fR specification of a simple +desk calculator. It has 26 registers, labeled "a" through "z", +and accepts arithmetic expressions made up of the C operators ++, -, *, /, %, &, and |, with their usual priorities. +The value of the expression is +printed. As in C, an integer that begins with 0 is assumed to +be octal; otherwise it is assumed to be decimal. +.PP +Although the example is short and not very complicated, it +demonstrates the use of if and while conditions. In +the example they are in fact used to reduce the number of +nonterminals, and to reduce the overhead due to the recursion +that would be involved in parsing an expression with an +ordinary recursive descent parser. In an ordinary LL(1) +grammar there would be one nonterminal for each operator +priority. The example shows how we can do it all with one +nonterminal, no matter how many priority levels there are. +.sp 1 +.nf +.ft CW +{ +#include +#include +#define MAXPRIO 5 +#define prio(op) (ptab[op]) + +struct token { + int t_tokno; /* token number */ + int t_tval; /* Its attribute */ +} stok = { 0,0 }, tok; + +int nerrors = 0; +int regs[26]; /* Space for the registers */ +int ptab[128]; /* Attribute table */ + +struct token +nexttok() { /* Read next token and return it */ + register c; + struct token new; + + while ((c = getchar()) == ' ' || c == '\et') { /* nothing */ } + if (isdigit(c)) new.t_tokno = DIGIT; + else if (islower(c)) new.t_tokno = IDENT; + else new.t_tokno = c; + if (c >= 0) new.t_tval = ptab[c]; + return new; +} } + +%token DIGIT, IDENT; +%start parse, list; + +list : stat* ; + +stat { int ident, val; } : + %if (stok = nexttok(), + stok.t_tokno == '=') + /* The conflict is resolved by looking one further + * token ahead. The grammar is LL(2) + */ + IDENT + { ident = tok.t_tval; } + '=' expr(1,&val) '\en' + { if (!nerrors) regs[ident] = val; } + | expr(1,&val) '\en' + { if (!nerrors) printf("%d\en",val); } + | '\en' + ; + +expr(int level; int *val;) { int expr; } : + factor(val) + [ %while (prio(tok.t_tokno) >= level) + /* Swallow operators as long as their priority is + * larger than or equal to the level of this invocation + */ + '+' expr(prio('+')+1,&expr) + { *val += expr; } + /* This states that '+' groups left to right. If it + * should group right to left, the rule should read: + * '+' expr(prio('+'),&expr) + */ + | '-' expr(prio('-')+1,&expr) + { *val -= expr; } + | '*' expr(prio('*')+1,&expr) + { *val *= expr; } + | '/' expr(prio('/')+1,&expr) + { *val /= expr; } + | '%' expr(prio('%')+1,&expr) + { *val %= expr; } + | '&' expr(prio('&')+1,&expr) + { *val &= expr; } + | '|' expr(prio('|')+1,&expr) + { *val |= expr; } + ]* + /* Notice the "*" here. It is important. + */ + ; + +factor(int *val;): + '(' expr(1,val) ')' + | '-' expr(MAXPRIO+1,val) + { *val = -*val; } + | number(val) + | IDENT + { *val = regs[tok.t_tval]; } + ; + +number(int *val;) { int base; } + : DIGIT + { base = (*val=tok.t_tval)==0?8:10; } + [ DIGIT + { *val = base * *val + tok.t_tval; } + ]* ; + +%lexical scanner ; +{ +scanner() { + if (stok.t_tokno) { /* a token has been inserted or read ahead */ + tok = stok; + stok.t_tokno = 0; + return tok.t_tokno; + } + if (nerrors && tok.t_tokno == '\en') { + printf("ERROR\en"); + nerrors = 0; + } + tok = nexttok(); + return tok.t_tokno; +} + +LLmessage(insertedtok) { + nerrors++; + if (insertedtok) { /* token inserted, save old token */ + stok = tok; + tok.t_tval = 0; + if (insertedtok < 128) tok.t_tval = ptab[insertedtok]; + } +} + +main() { + register *p; + + for (p = ptab; p < &ptab[128]; p++) *p = 0; + /* for letters, their attribute is their index in the regs array */ + for (p = &ptab['a']; p <= &ptab['z']; p++) *p = p - &ptab['a']; + /* for digits, their attribute is their value */ + for (p = &ptab['0']; p <= &ptab['9']; p++) *p = p - &ptab['0']; + /* for operators, their attribute is their priority */ + ptab['*'] = 4; + ptab['/'] = 4; + ptab['%'] = 4; + ptab['+'] = 3; + ptab['-'] = 3; + ptab['&'] = 2; + ptab['|'] = 1; + parse(); + exit(nerrors); +} } +.fi +.ft R +.bp +.SH +Appendix C. How to use \fILLgen\fR. +.PP +This appendix demonstrates how \fILLgen\fR can be used in +combination with the \fImake\fR program, to make effective use +of the \fILLgen\fR-feature that it only changes output files +when neccessary. \fIMake\fR uses a "makefile", which +is a file containing dependencies and associated commands. +A dependency usually indicates that some files depend on other +files. When a file depends on another file and is older than +that other file, the commands associated with the dependency +are executed. +.PP +So, \fImake\fR seems just the program that we always wanted. +However, it +is not very good in handling programs that generate more than +one file. +As usual, there is a way around this problem. +A sample makefile follows: +.sp 1 +.ft CW +.nf +# The grammar exists of the files decl.g, stat.g and expr.g. +# The ".o"-files are the result of a C-compilation. + +GFILES = decl.g stat.g expr.g +OFILES = decl.o stat.o expr.o Lpars.o +LLOPT = + +# As make does'nt handle programs that generate more than one +# file well, we just don't tell make about it. +# We just create a dummy file, and touch it whenever LLgen is +# executed. This way, the dummy in fact depends on the grammar +# files. +# Then, we execute make again, to do the C-compilations and +# such. + +all: dummy + make parser + +dummy: $(GFILES) + LLgen $(LLOPT) $(GFILES) + touch dummy + +parser: $(OFILES) + $(CC) -o parser $(LDFLAGS) $(OFILES) + +# Some dependencies without actions : +# make already knows what to do about them + +Lpars.o: Lpars.h +stat.o: Lpars.h +decl.o: Lpars.h +expr.o: Lpars.h + +.fi +.ft R diff --git a/doc/LLgen/LLgen.refs b/doc/LLgen/LLgen.refs new file mode 100644 index 0000000..df73595 --- /dev/null +++ b/doc/LLgen/LLgen.refs @@ -0,0 +1,54 @@ +%T An ALL(1) Compiler Generator +%A D. R. Milton +%A L. W. Kirchhoff +%A B. R. Rowland +%B Proc. of the SIGPLAN '79 Symposium on Compiler Construction +%D August 1979 +%J SIGPLAN Notices +%N 8 +%P 152-157 +%V 14 + +%T Lex - A Lexical Analyser Generator +%A M. E. Lesk +%I Bell Laboratories +%D October 1975 +%C Murray Hill, New Jersey +%R Comp. Sci. Tech. Rep. No. 39 + +%T Yacc: Yet Another Compiler Compiler +%A S. C. Johnson +%I Bell Laboratories +%D 1975 +%C Murray Hill, New Jersey +%R Comp. Sci. Tech. Rep. No. 32 + +%T The C Programming Language +%A B. W. Kernighan +%A D. M. Ritchie +%I Prentice-Hall, Inc. +%C Englewood Cliffs, New Jersey +%D 1978 + +%A M. Griffiths +%T LL(1) Grammars and Analysers +%E F. L. Bauer and J. Eickel +%B Compiler Construction, An Advanced Course +%I Springer-Verlag +%C New York, N.Y. +%D 1974 + +%T Make - A Program for Maintaining Computer Programs +%A S. I. Feldman +%J Software - Practice and Experience +%V 10 +%N 8 +%P 255-265 +%D August 1979 + +%T Methods for the Automatic Construction of Error Correcting Parsers +%A J. R\*:ohrich +%J Acta Informatica +%V 13 +%P 115-139 +%D 1980 diff --git a/doc/LLgen/proto.make b/doc/LLgen/proto.make new file mode 100644 index 0000000..4356282 --- /dev/null +++ b/doc/LLgen/proto.make @@ -0,0 +1,20 @@ +# $Id: proto.make,v 1.4 1997/07/10 07:58:30 ceriel Exp $ + +#PARAMS do not remove this line! + +SRC_DIR = $(SRC_HOME)/doc/LLgen + +GRAP=grap +PIC=pic +EQN=eqn +REFER=refer +TBL=tbl + +all: $(TARGET_HOME)/doc/LLgen.doc $(TARGET_HOME)/doc/LLgen_NCER.doc + +$(TARGET_HOME)/doc/LLgen.doc: $(SRC_DIR)/LLgen.n $(SRC_DIR)/LLgen.refs + $(REFER) -sA+T -p $(SRC_DIR)/LLgen.refs $(SRC_DIR)/LLgen.n | $(EQN) | $(TBL) > $@ + +$(TARGET_HOME)/doc/LLgen_NCER.doc: $(SRC_DIR)/LLgen_NCER.n + $(GRAP) $(SRC_DIR)/LLgen_NCER.n | pic | eqn > $@ + diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 0000000..283839a --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,82 @@ +# $Id: Makefile,v 1.35 1996/12/04 14:03:12 ceriel Exp $ + +# This Makefile is not supposed to be used in the doc source directory. +# Instead, it is supposed to be copied to the target doc directory. + +SUF=dit +PRINT=dis +NROFF=troff +MS=-ms +OPR=dip + +RESFILES= \ + toolkit.$(SUF) install.$(SUF) em.$(SUF) ack.$(SUF) v7bugs.$(SUF) \ + peep.$(SUF) cg.$(SUF) ncg.$(SUF) regadd.$(SUF) LLgen.$(SUF) \ + basic.$(SUF) crefman.$(SUF) pascal.$(SUF) pcref.$(SUF) val.$(SUF) \ + ansi_C.$(SUF) \ + 6500.$(SUF) i80.$(SUF) z80.$(SUF) top.$(SUF) ego.$(SUF) \ + m68020.$(SUF) occam.$(SUF) m2ref.$(SUF) ceg.$(SUF) nopt.$(SUF) \ + sparc.$(SUF) int.$(SUF) lint.$(SUF) + +.SUFFIXES: .doc .$(SUF) .lpr .out + +.doc.$(SUF): + $(NROFF) $(MS) $< > $@ + +# directly to the printer: +.doc.lpr: + $(NROFF) $(MS) $< | $(OPR) + +# to standard output +.doc.out: + @$(NROFF) $(MS) $< + +# Exceptions, to be run without -ms + +v7bugs.$(SUF): v7bugs.doc + $(NROFF) v7bugs.doc >$@ + +v7bugs.lpr: v7bugs.doc + $(NROFF) v7bugs.doc | $(OPR) + +v7bugs.out: v7bugs.doc + @$(NROFF) v7bugs.doc + +pcref.$(SUF): pcref.doc + $(NROFF) pcref.doc >$@ + +pcref.lpr: pcref.doc + $(NROFF) pcref.doc | $(OPR) + +pcref.out: pcref.doc + @$(NROFF) pcref.doc + +val.$(SUF): val.doc + $(NROFF) val.doc >$@ + +val.lpr: val.doc + $(NROFF) val.doc | $(OPR) + +val.out: val.doc + @$(NROFF) val.doc + +pr: + @make "SUF="$(SUF) "NROFF="$(NROFF) "MS="$(MS) \ + $(RESFILES) >make.pr.out 2>&1 + @$(PRINT) $(RESFILES) + +# The 'opr' entry creates a lot of paper ... but the user must be able +# to write the doc directory. I hope that this limits the users of +# this entry to persons that know what they are doing. +opr: + @make "SUF="$(SUF) "NROFF="$(NROFF) "MS="$(MS) $(RESFILES) + $(OPR) $(RESFILES) + +clean: + -rm -f $(RESFILES) + +# The distr entry is only used when making a distribution tree. +# It makes a version of the installation manual, suitable for a simple +# line printer. +distr: install.doc + tbl install.doc | nroff -Tlp $(MS) >install.pr diff --git a/doc/READ_ME b/doc/READ_ME new file mode 100644 index 0000000..1ca1295 --- /dev/null +++ b/doc/READ_ME @@ -0,0 +1,8 @@ +Some of these documents use a font called CW. +If this font is not available, reference to it can be changed with +a sed-script like + s/\.ft CW/.ft yourfont/ + s/\\f(CW/\\fyourfont/g + s/^.fp\(.*\)CW$/.fp\1yourfont/ +However, the font must be a constant-width font for the documents to look +reasonable. diff --git a/doc/ack.doc b/doc/ack.doc new file mode 100644 index 0000000..7d4b4d3 --- /dev/null +++ b/doc/ack.doc @@ -0,0 +1,444 @@ +.\" $Id: ack.doc,v 1.10 1994/06/24 10:01:39 ceriel Exp $ +.nr PD 1v +.tr ~ +.TL +Ack Description File +.br +Reference Manual +.AU +Ed Keizer +.AI +Vakgroep Informatica +Vrije Universiteit +Amsterdam +.NH +Introduction +.PP +The program \fIack\fP(I) internally maintains a table of +possible transformations and a table of string variables. +The transformation table contains one entry for each possible +transformation of a file. +Which transformations are used depends on the suffix of the +source file. +Each transformation table entry tells which input suffixes are +allowed and what suffix/name the output file has. +When the output file does not already satisfy the request of the +user (indicated with the flag \fB\-c.suffix\fP), the table is scanned +starting with the next transformation in the table for another +transformation that has as input suffix the output suffix of +the previous transformation. +A few special transformations are recognized, among them is the +combiner, which is +a program combining several files into one. +When no stop suffix was specified (flag \fB\-c.suffix\fP) \fIack\fP +stops after executing the combiner with as arguments the \- +possibly transformed \- input files and libraries. +\fIAck\fP will only perform the transformations in the order in +which they are presented in the table. +.LP +The string variables are used while creating the argument list +and program call name for +a particular transformation. +.NH +Which descriptions are used +.PP +\fIAck\fP always uses two description files: one to define the +front-end transformations and one for the machine dependent +back-end transformations. +Each description has a name. +First the way of determining +the name of the descriptions needed is described. +.PP +When the shell environment variable ACKFE is set \fIack\fP uses +that to determine the front-end table name, otherwise it uses +\fBfe\fP. +.PP +The way the backend table name is determined is more +convoluted. +.br +First, when the last filename in the program call name is not +one of \fIack\fP or the front-end call-names, +this filename is used as the backend description name. +Second, when the \fB\-m\fP is present the \fB\-m\fP is chopped of this +flag and the rest is used as the backend description name. +Third, when both failed the shell environment variable ACKM is +used. +Last, when also ACKM was not present the default backend is +used, determined by the definition of ACKM in h/local.h. +The presence and value of the definition of ACKM is +determined at compile time of \fIack\fP. +.PP +Now, we have the names, but that is only the first step. +\fIAck\fP stores a few descriptions at compile time. +This descriptions are simply files read in at compile time. +At the moment of writing this document, the descriptions +included are: pdp, fe, i86, m68k2, vax2 and int. +The name of a description is first searched for internally, +then in lib/descr/\fIname\fP, then in +lib/\fIname\fP/descr, and finally in the current +directory of the user. +.NH +Using the description file +.PP +Before starting on a narrative of the description file, +the introduction of a few terms is necessary. +All these terms are used to describe the scanning of zero +terminated strings, thereby producing another string or +sequence of strings. +.IP Backslashing 5 +.br +All characters preceded by \e are modified to prevent +recognition at further scanning. +This modification is undone before a string is passed to the +outside world as argument or message. +When reading the description files the +sequences \e\e, \e# and \e have a special meaning. +\e\e translates to a single \e, \e# translates to a single # +that is not +recognized as the start of comment, but can be used in +recognition and finally, \e translates to nothing at +all, thereby allowing continuation lines. +.nr PD 0 +.IP "Variable replacement" +.br +The scan recognizes the sequences {{, {NAME} and {NAME?text} +Where NAME can be any combination if characters excluding ? and +} and text may be anything excluding }. +(~\e} is allowed of course~) +The first sequence produces an unescaped single {. +The second produces the contents of the NAME, definitions are +done by \fIack\fP and in description files. +When the NAME is not defined an error message is produced on +the diagnostic output. +The last sequence produces the contents of NAME if it is +defined and text otherwise. +.PP +.IP "Expression replacement" +.br +Syntax: (\fIsuffix sequence\fP:\fIsuffix sequence\fP=\fItext\fP) +.br +Example: (.c.p.e:.e=tail_em) +.br +If the two suffix sequences have a common member \-~\&.e in this +case~\- the text is produced. +When no common member is present the empty string is produced. +Thus the example given is a constant expression. +Normally, one of the suffix sequences is produced by variable +replacement. +\fIAck\fP sets three variables while performing the diverse +transformations: HEAD, TAIL and RTS. +All three variables depend on the properties \fIrts\fP and +\fIneed\fP from the transformations used. +Whenever a transformation is used for the first time, +the text following the \fIneed\fP is appended to both the HEAD and +TAIL variable. +The value of the variable RTS is determined by the first +transformation used with a \fIrts\fP property. +.IP +Two runtime flags have effect on the value of one or more of +these variables. +The flag \fB\-.suffix\fP has the same effect on these three variables +as if a file with that \fBsuffix\fP was included in the argument list +and had to be translated. +The flag \fB\-r.suffix\fP only has that effect on the TAIL +variable. +The program call names \fIacc\fP and \fIcc\fP have the effect +of an automatic \fB\-.c\fP flag. +\fIApc\fP and \fIpc\fP have the effect of an automatic \fB\-.p\fP flag. +.IP "Line splitting" +.br +The string is transformed into a sequence of strings by replacing +the blank space by string separators (nulls). +.IP "IO replacement" +.br +The > in the string is replaced by the output file name. +The < in the string is replaced by the input file name. +When multiple input files are present the string is duplicated +for each input file name. +.nr PD 1v +.LP +Each description is a sequence of variable definitions followed +by a sequence of transformation definitions. +Variable definitions use a line each, transformations +definitions consist of a sequence of lines. +Empty lines are discarded, as are lines with nothing but +comment. +Comment is started by a # character, and continues to the end +of the line. +Three special two-characters sequences exist: \e#, \e\e and +\e. +Their effect is described under 'backslashing' above. +Each \- nonempty \- line starts with a keyword, possibly +preceded by blank space. +The keyword can be followed by a further specification. +The two are separated by blank space. +.PP +Variable definitions use the keyword \fIvar\fP and look like this: +.DS X + var NAME=text +.DE +The name can be any identifier, the text may contain any +character. +Blank space before the equal sign is not part of the NAME. +Blank space after the equal is considered as part of the text. +The text is scanned for variable replacement before it is +associated with the variable name. +.br +.sp 2 +The start of a transformation definition is indicated by the +keyword \fIname\fP. +The last line of such a definition contains the keyword +\fIend\fP. +The lines in between associate properties to a transformation +and may be presented in any order. +The identifier after the \fIname\fP keyword determines the name +of the transformation. +This name is used for debugging and by the \fB\-R\fP flag. +The keywords are used to specify which input suffices are +recognized by that transformation, +the program to run, the arguments to be handed to that program +and the name or suffix of the resulting output file. +Two keywords are used to indicate which run-time startoffs and +libraries are needed. +The possible keywords are: +.IP \fIfrom\fP +.br +followed by a sequence of suffices. +Each file with one of these suffices is allowed as input file. +Preprocessor transformations do not need the \fIfrom\fP +keyword. All other transformations do. +.nr PD 0 +.IP \fIto\fP +.br +followed by the suffix of the output file name or in the case of a +linker +the output file name. +.IP \fIprogram\fP +.br +followed by name of the load file of the program, a pathname most likely +starts with either a / or {EM}. +This keyword must be +present, the remainder of the line +is subject to backslashing and variable replacement. +.IP \fImapflag\fP +.br +The mapflags are used to grab flags given to \fIack\fP and +pass them on to a specific transformation. +This feature uses a few simple pattern matching and replacement +facilities. +Multiple occurrences of this keyword are allowed. +This text following the keyword is +subjected to backslashing. +The keyword is followed by a match expression and a variable +assignment separated by blank space. +As soon as both description files are read, \fIack\fP looks +at all transformations in these files to find a match for the +flags given to \fIack\fP. +The flags \fB\-m\fP, \fB\-o\fP, +\fB\-O\fP, \fB\-r\fP, \fB\-v\fP, \fB\-g\fP, \-\fB\-c\fP, \fB\-t\fP, +\fB\-k\fP, \fB\-R\fP and \-\fB\-.\fP are specific to \fIack\fP and +not handed down to any transformation. +The matching is performed in the order in which the entries +appear in the definition. +The scanning stops after first match is found. +When a match is found, the variable assignment is executed. +A * in the match expression matches any sequence of characters, +a * in the right hand part of the assignment is +replaced by the characters matched by +the * in the expression. +The right hand part is also subject to variable replacement. +The variable will probably be used in the program arguments. +The \fB\-l\fP flags are special, +the order in which they are presented to \fIack\fP must be +preserved. +The identifier LNAME is used in conjunction with the scanning of +\fB\-l\fP flags. +The value assigned to LNAME is used to replace the flag. +The example further on shows the use of all this. +.IP \fIargs\fP +.br +The keyword is followed by the program call arguments. +It is subject to backslashing, variable replacement, expression +replacement, line splitting and IO replacement. +The variables assigned to by \fImapflags\fP will probably be +used here. +The flags not recognized by \fIack\fP or any of the transformations +are passed to the linker and inserted before all other arguments. +.IP \fIstdin\fP +.br +This keyword indicates that the transformation reads from standard input. +.IP \fIstdout\fP +.br +This keyword indicates that the transformation writes on standard output. +.IP \fIoptimizer\fP +.br +The presence of this keyword indicates that this transformation is an optimizer. +It can be followed by a number, indicating the "level" of the +optimizer (see description of the -O option in the ack(1ACK) manual page). +.IP \fIpriority\fP +.br +This \-~optional~\- keyword is followed by a number. Positive priority means +that the transformation is likely to be used, negative priority means that +the transformation is unlikely to be used. +Priorities can also be set with a ack(1ACK) command line option. +Priorities come in handy when there are several implementations of a +certain transformation. They can then be used to select a default one. +.IP \fIlinker\fP +.br +This keyword indicates that this transformation is the linker. +.IP \fIcombiner\fP +.br +This keyword indicates that this transformation is a combiner. A combiner +is a program combining several files into one, but is not a linker. +An example of a combiner is the global optimizer. +.IP \fIprep\fP +.br +This \-~optional~\- keyword is followed an option indicating its relation +to the preprocessor. +The possible options are: +.DS X + always the input files must be preprocessed + cond the input files must be preprocessed when starting with # + is this transformation is the preprocessor +.DE +.IP \fIrts\fP +.br +This \-~optional~\- keyword indicates that the rest of the line must be +used to set the variable RTS, if it was not already set. +Thus the variable RTS is set by the first transformation +executed which such a property or as a result from \fIack\fP's program +call name (acc, cc, apc or pc) or by the \fB\-.suffix\fP flag. +.IP \fIneed\fP +.br +This \-~optional~\- keyword indicates that the rest of the line must be +concatenated to the HEAD and TAIL variables. +This is done once for every transformation used or indicated +by one of the program call names mentioned above or indicated +by the \fB\-.suffix\fP flag. +.br +.nr PD 1v +.NH +Conventions used in description files +.PP +\fIAck\fP reads two description files. +A few of the variables defined in the machine specific file +are used by the descriptions of the front-ends. +Other variables, set by \fIack\fP, are of use to all +transformations. +.PP +\fIAck\fP sets the variable EM to the home directory of the +Amsterdam Compiler Kit. +The variable SOURCE is set to the name of the argument that is currently +being massaged, this is useful for debugging. +The variable SUFFIX is set to the suffix of the argument that is +currently being massaged. +.br +The variable M indicates the +directory in lib/{M}/tail_..... and NAME is the string to +be defined by the preprocessor with \-D{NAME}. +The definitions of {w}, {s}, {l}, {d}, {f} and {p} indicate +EM_WSIZE, EM_SSIZE, EM_LSIZE, EM_DSIZE, EM_FSIZE and EM_PSIZE +respectively. +.br +The variable INCLUDES is used as the last argument to \fIcpp\fP. +It is used to add directories to +the list of directories containing #include files. +.PP +The variables HEAD, TAIL and RTS are set by \fIack\fP and used +to compose the arguments for the linker. +.NH +Example +.PP +Description for front-end +.DS X +.ta 4n 40n +name cpp # the C-preprocessor + # no from, it's governed by the P property + to .i # result files have suffix i + program {EM}/lib/cpp # pathname of loadfile + mapflag \-I* CPP_F={CPP_F?} \-I* # grab \-I.. \-U.. and + mapflag \-U* CPP_F={CPP_F?} \-U* # \-D.. to use as arguments + mapflag \-D* CPP_F={CPP_F?} \-D* # in the variable CPP_F + args {CPP_F?} {INCLUDES?} \-D{NAME} \-DEM_WSIZE={w} \-DEM_PSIZE={p} \e + \-DEM_SSIZE={s} \-DEM_LSIZE={l} \-DEM_FSIZE={f} \-DEM_DSIZE={d} < + # The arguments are: first the \-[IUD]... + # then the include dir's for this machine + # then the NAME and size values finally + # followed by the input file name + stdout # Output on stdout + prep is # Is preprocessor +end +name cem # the C-compiler proper + from .c # used for files with suffix .c + to .k # produces compact code files + program {EM}/lib/em_cem # pathname of loadfile + mapflag \-p CEM_F={CEM_F?} \-Xp # pass \-p as \-Xp to cem + mapflag \-L CEM_F={CEM_F?} \-l # pass \-L as \-l to cem + args \-Vw{w}i{w}p{p}f{f}s{s}l{l}d{d} {CEM_F?} + # the arguments are the object sizes in + # the \-V... flag and possibly \-l and \-Xp + stdin # input from stdin + stdout # output on stdout + prep always # use cpp + rts .c # use the C run-time system + need .c # use the C libraries +end +name decode # make human readable files from compact code + from .k.m # accept files with suffix .k or .m + to .e # produce .e files + program {EM}/lib/em_decode # pathname of loadfile + args < # the input file name is the only argument + stdout # the output comes on stdout +end +.DE + +.DS X +.ta 4n 40n +Example of a backend, in this case the EM assembler/loader. + +var w=2 # wordsize 2 +var p=2 # pointersize 2 +var s=2 # short size 2 +var l=4 # long size 4 +var f=4 # float size 4 +var d=8 # double size 8 +var M=em22 +var NAME=em22 # for cpp (NAME=em22 results in #define em22 1) +var LIB=lib/{M}/tail_ # part of file name for libraries +var RT=lib/{M}/head_ # part of file name for run-time startoff +var SIZE_FLAG=\-sm # default internal table size flag +var INCLUDES=\-I{EM}/include # use {EM}/include for #include files +name asld # Assembler/loader + from .k.m.a # accepts compact code and archives + to e.out # output file name + program {EM}/lib/em_ass # load file pathname + mapflag \-l* LNAME={EM}/{LIB}* # e.g. \-ly becomes + # {EM}/mach/int/lib/tail_y + mapflag \-+* ASS_F={ASS_F?} \-+* # recognize \-+ and \-\- + mapflag \-\-* ASS_F={ASS_F?} \-\-* + mapflag \-s* SIZE_FLAG=\-s* # overwrite old value of SIZE_FLAG + args {SIZE_FLAG} \e + ({RTS}:.c={EM}/{RT}cc) ({RTS}:.p={EM}/{RT}pc) \-o > < \e + (.p:{TAIL}={EM}/{LIB}pc) \e + (.c:{TAIL}={EM}/{LIB}cc.1s {EM}/{LIB}cc.2g) \e + (.c.p:{TAIL}={EM}/{LIB}mon) + # \-s[sml] must be first argument + # the next line contains the choice for head_cc or head_pc + # and the specification of in- and output. + # the last three args lines choose libraries + linker +end +.DE + +The command \fIack \-mem22 \-v \-v \-I../h \-L \-ly prog.c\fP +would result in the following +calls (with exec(II)): +.DS X +.ta 4n +1) /lib/cpp \-I../h \-I/usr/em/include \-Dem22 \-DEM_WSIZE=2 \-DEM_PSIZE=2 \e + \-DEM_SSIZE=2 \-DEM_LSIZE=4 \-DEM_FSIZE=4 \-DEM_DSIZE=8 prog.c +2) /usr/em/lib/em_cem \-Vw2i2p2f4s2l4d8 \-l +3) /usr/em/lib/em_ass \-sm /usr/em/lib/em22/head_cc \-o e.out prog.k + /usr/em/lib/em22/tail_y /usr/em/lib/em22/tail_cc.1s + /usr/em/lib/em22/tail_cc.2g /usr/em/lib/em22/tail_mon +.DE diff --git a/doc/ansi_C.doc b/doc/ansi_C.doc new file mode 100755 index 0000000..df9d0c2 --- /dev/null +++ b/doc/ansi_C.doc @@ -0,0 +1,365 @@ +.de NS +.sp +.in 0 +\\fBANS \\$1:\\fP +.. +.TL +Amsterdam Compiler Kit-ANSI C compiler compliance statements +.AU +Hans van Eck +.AI +Dept. of Mathematics and Computer Science +Vrije Universiteit +Amsterdam, The Netherlands +.PP +This document specifies the implementation-defined behaviour of the ANSI-C +front end of the Amsterdam Compiler Kit as required by ANS X3.159-1989. Since +the implementation-defined behaviour sometimes depends on the machine +compiling on or for, some items will be left unspecified in this +document\(dg. +.FS +\(dg when cross-compiling, run-time behaviour may be different from +compile-time behaviour +.FE +The compiler assumes that it runs on a UNIX system. +.NS A.6.3.1 +.IP - +Diagnostics are placed on the standard error output. They have the +following specification: +.br +"", line : [()] +.br +There are three classes of diagnostics: "error", "strict" and "warning". +When the class is "error", the is absent. +.br +The class "strict" is used for violations of the standard which are +not severe enough to stop compilation. An example is the the occurrence +of non white-space after an '#else' or '#endif' pre-processing +directive. The class "warning" is used for legal but dubious +constructions. An example is overflow of constant expressions. +.NS A.6.3.2 +.IP - +The function 'main' can have two arguments. The first argument is an +integer specifying the number of arguments on the command line. The second +argument is a pointer to an array of pointers to the arguments (as +strings). +.IP - +Interactive devices are terminals. +.NS A.6.3.3 +.IP - +The number of significant characters is an option. By default it is 64. +There is a distinction between upper and lower case. +.NS A.6.3.4 +.IP - +The compiler assumes ASCII-characters in both the source and execution +character set. +.IP - +There are no multi-byte characters. +.IP - +There 8 bits in a character. +.IP - +Character constants with values that can not be represented in 8 bits +are truncated. +.IP - +Character constants that are more than 1 character wide will have the +first character specified in the least significant byte. +.IP - +The only supported locale is "C". +.IP - +A plain 'char' has the same range of values as 'signed char'. +.NS A.6.3.5 +.IP - +The compiler assumes that it works on and compiles for a +2-complement binary-number system. Shorts will use 2 bytes and longs +will use 4 bytes. The size of integers are machine dependent. +.IP - +Converting an integer to a shorter signed integer is implemented by +ignoring the high-order byte(s) of the former. +Converting a unsigned integer to a signed integer of the same type is +only done in administration. This means that the bit-pattern remains +unchanged. +.IP - +The result of bitwise operations on signed integers are what can be +expected on a 2-complement machine. +.IP - +If either operand is negative, whether the result of the / operator is the +largest integer less than or equal to the algebraic quotient or the +smallest integer greater than or equal to the algebraic quotient is machine +dependent, as is the sign of the result of the % operator. +.IP - +The right-shift of a negative value is negative. +.NS A.6.3.6 +.IP - +The representation of floating-point values is machine-dependent. +When native floating-point is not present an IEEE-emulation is used. +The compiler uses high-precision floating-point for constant folding. +.IP - +Truncation is always to the nearest floating-point number that can +be represented. +.NS A.6.3.7 +.IP - +The type returned by the sizeof-operator (also known as size_t) +is 'unsigned int'. This is done for backward compatibility reasons. +.IP - +Casting an integer to a pointer or vice versa has no effect in +bit-pattern when the sizes are equal. Otherwise the value will be +truncated or zero-extended (depending on the direction of the +conversion and the relative sizes). +.IP - +When a pointer is as large as an integer, the type of a 'ptrdiff_t' will +be 'int'. Otherwise the type will be 'long'. +.NS A.6.3.8 +.IP - +Since the front end has only limited control over the registers, it can +only make it more likely that variables that are declared as +registers also end up in registers. The only things that can possibly be +put into registers are : 'int', 'long', 'float', 'double', 'long double' +and pointers. +.NS A.6.3.9 +.IP - +When a member of a union object is accessed using a member of a +different type, the resulting value will usually be garbage. The +compiler makes no effort to catch these errors. +.IP - +The alignment of types is a compile-time option. The alignment of +a structure-member is the alignment of its type. Usually, the +alignment is passed on to the compiler by the 'ack' program. When a +user wants to do this manually, he/she should be prepared for trouble. +.IP - +A "plain" 'int' bit-field is taken as a 'signed int'. This means that +a field with a size of 1 bit can only store the values 0 and -1. +.IP - +The order of allocation of bit-fields is a compile-time option. By +default, high-order bits are allocated first. +.IP - +An enum has the same size as a "plain" 'int'. +.NS A.6.3.10 +.IP - +An access to a volatile declared variable is done by just mentioning +the variable. E.g. the statement "x;" where x is declared volatile, +constitutes an access. +.S A.6.3.11 +.IP - +There is no fixed limit on the number of declarators that may modify an +arithmetic, structure or union type, although specifying too many may +cause the compiler to run out of memory. +.NS A.6.3.12 +.IP - +The maximum number of cases in a switch-statement is in the order of +1e9, although the compiler may run out of memory somewhat earlier. +.NS A.6.3.13 +.IP - +Since both the pre-processor and the compiler assume ASCII-characters, +a single character constant in a conditional-inclusion directive +matches the same value in the execution character set. +.IP - +The pre-processor recognizes -I... command-line options. The +directories thus specified are searched first. After that, depending on the +command that the preprocessor is called with, machine/system-dependant +directories are searched. After that, ~em/include/_tail_ac and +/usr/include are visited. +.IP - +Quoted names are first looked for in the directory in which the file +which does the include resides. +.IP - +The characters in a h- or q- char-sequence are taken to be UNIX +paths. +.IP - +Neither the compiler nor the preprocessor know any pragmas. +.IP - +Since the compiler runs on UNIX, __DATE__ and __TIME__ will always be +defined. +.NS A.6.3.14 +.IP - +NULL is defined as ((void *)0). This in order to flag dubious +constructions like "int x = NULL;". +.IP - +The diagnostic printed by 'assert' is as follows: +.ti +4n +"Assertion "" failed, file "", line ", +.br +where is the argument to the assert macro, printed as string. +(the and should be clear) +.KS +.IP - +The sets for character test macros. +.TS +l l. +name: set: +isalnum() 0-9A-Za-z +isalpha() A-Za-z +iscntrl() \e000-\e037\e177 +islower() a-z +isupper() A-Z +isprint() -~ (== \e040-\e176) +.TE +.KE +As an addition, there is an isascii() macro, which tests whether a character +is an ascii character. Characters in the range from \e000 to \e177 are ascii +characters. +.KS +.IP - +The behaviour of mathematic functions on domain error: +.TS +l c +l n. +name: returns: +asin() 0.0 +acos() 0.0 +atan2() 0.0 +fmod() 0.0 +log() -HUGE_VAL +log10() -HUGE_VAL +pow() 0.0 +sqrt() 0.0 +.TE +.KE +.IP - +Underflow range errors do not cause errno to be set. +.IP - +The function fmod() returns 0.0 and sets errno to EDOM when the second +argument is 0.0. +.IP - +The set of signals for the signal() function depends on the UNIX-system +which the compiler is compiling for. The default handling, semantics +and behaviour of these signals are those specified by the operating +system vendor. The default handling is not reset when SIGILL is +received. +.IP - +A text-stream need not end in a new-line character. +.IP - +White space characters before a new-line appear when read in. +.IP - +There may be any number of null characters appended to a binary +stream. +.IP - +The file position indicator of an append mode stream is initially +positioned at the beginning of the file. +.IP - +A write on a text stream does not cause the associated file to be +truncated beyond that point. +.IP - +The buffering intended by the standard is fully supported. +.IP - +A zero-length file actually exists. +.IP - +A file name can consist of any character, except for the '\e0' and +the '/'. +.IP - +A file can be open multiple times. +.IP - +When a remove() is done on an open file, reading and writing behave +just as can be expected from a non-removed file. When the associated +stream is closed, all written data will be lost. +.IP - +When a file exists prior to a call to rename(), the behaviour is that +of the underlying UNIX system. Normally, the call would fail. +.IP - +The %p conversion in fprintf() has the same effect as %#x or %#lx, +depending on the sizes of pointer and integer. +.IP - +The %p conversion in fscanf() has the same effect as %x or %lx, +depending on the sizes of pointer and integer. +.IP - +A - character that is neither the first nor the last character in the +scanlist for %[ conversion is taken to be a range indicator. When the +first character has a higher ASCII-value than the second, the - will +just be put into the scanlist. +.IP - +The value of errno when fgetpos() or ftell() failed is that of lseek(). +This means: +.RS +.IP "EBADF \-" 10 +when the stream is not valid +.IP "ESPIPE \-" +when fildes is associated with a pipe (and on some systems: sockets) +.IP "EINVAL \-" +the resulting file pointer would be negative +.RE +.LP +.IP - +The messages generated by perror() depend on the value of errno. +The mapping of errors to strings is done by strerror(). +.IP - +When the requested size is zero, malloc(), calloc() and realloc() +return a null-pointer. +.IP - +When abort() is called, output buffers will be flushed. Temporary files +(made with the tmpfile() function) will have disappeared when SIGABRT +is not caught or ignored. +.IP - +The exit() function returns the low-order eight bits of its argument +to the environment. +.IP - +The predefined environment names are controlled by the user. +Setting environment variables is done through the putenv() function. +This function accepts a pointer to char as its argument. +To set f.i. the environment variable TERM to a230 one writes +.ti +4n +putenv("TERM=a230"); +.br +The argument to putenv() is stored in an internal table, so malloc'ed +strings can not be freed until another call to putenv() (which sets the +same environment variable) is made. The function returns 1 if it fails, +0 otherwise. +.LP +.IP - +The argument to system is passed as argument to /bin/sh -c. +.IP - +The strings returned by strerror() depend on errno in the following +way: +.TS +l l. +errno string +0 "Error 0", +EPERM "Not owner", +ENOENT "No such file or directory", +ESRCH "No such process", +EINTR "Interrupted system call", +EIO "I/O error", +ENXIO "No such device or address", +E2BIG "Arg list too long", +ENOEXEC "Exec format error", +EBADF "Bad file number", +ECHILD "No children", +EAGAIN "No more processes", +ENOMEM "Not enough core", +EACCES "Permission denied", +EFAULT "Bad address", +ENOTBLK "Block device required", +EBUSY "Mount device busy", +EEXIST "File exists", +EXDEV "Cross-device link", +ENODEV "No such device", +ENOTDIR "Not a directory", +EISDIR "Is a directory", +EINVAL "Invalid argument", +ENFILE "File table overflow", +EMFILE "Too many open files", +ENOTTY "Not a typewriter", +ETXTBSY "Text file busy", +EFBUG "File too large", +ENOSPC "No space left on device", +ESPIPE "Illegal seek", +EROFS "Read-only file system", +EMLINK "Too many links", +EPIPE "Broken pipe", +EDOM "Math argument", +ERANGE "Result too large" +.TE +everything else causes strerror() to return "unknown error" +.IP - +The local time zone is per default MET (GMT + 1:00:00). This can be +changed through the TZ environment variable, or by some changes in the +sources. +.IP - +The clock() function returns the number of ticks since process +startup. +.SH +References +.IP [1] +ANS X3.159-1989 +.I +American National Standard for Information Systems - +Programming Language C +.R diff --git a/doc/basic.doc b/doc/basic.doc new file mode 100644 index 0000000..20edc5f --- /dev/null +++ b/doc/basic.doc @@ -0,0 +1,949 @@ +.\" $Id: basic.doc,v 2.6 1994/06/24 10:01:43 ceriel Exp $ +.TL +.de Sy +.LP +.IP \fBsyntax\fR 10 +.. +.de PU +.IP \fBpurpose\fR 10 +.. +.de RM +.IP \fBremarks\fR 10 +.. +The ABC compiler +.AU +Martin L. Kersten +Gert-Jan Akkerman +Marcel Worring +Edo Westerhuis +Frans Kunst +Ronnie Lachniet +.AI +Department of Mathematics and Computer Science. +.br +Free University +.br +Amsterdam +.AB +This manual describes the +programming language BASIC and its compiler +included in the Amsterdam Compiler Kit. +.AE +.SH +INTRODUCTION. +.LP +The BASIC-EM compiler is an extensive implementation of the +programming language BASIC. +The language structure and semantics are modelled after the +BASIC interpreter/compiler of Microsoft (tr), a short comparison +is provided in appendix A. +.LP +The compiler generates code for a virtual machine, the EM machine +[[ACM, etc]]. +Using EM as an intermediate machine results in a highly portable +compiler and BASIC code. +.br +The drawback of EM is that it does not directly reflect one particular +hardware design, which means that many of the low level operations available +within BASIC are ill-defined or even inapplicable. +To mention a few, the peek and poke instructions are likely +to be behave errorneous, while line printer and tapedeck +primitives are unknown. +.LP +This manual is divided into three chapters. +.br +Chapter 1 discusses the general language syntax and semantics. +.br +Chapter 2 describes the statements available in BASIC-EM. +.br +Chapter 3 describes the predefined functions, ordered alphabetically. +.LP +Appendix A discusses the differences with Microsoft BASIC. +.br +Appendix B describes all reserved symbols. +.LP +.LP +.SH +SYNTAX NOTATION +.LP +The conventions for syntax presentation are as follows: +.IP CAPS 10 +Items are reserved words, must be input as shown. +.IP <> 10 +Items in lowercase letters enclosed in angular brackets +are to be supplied by the user. +.IP [] 10 +Items are optional. +.IP \.\.\. 10 +Items may be repeated any number of times +.IP {} 10 +A choice between two or more alternatives. At least one of the entries +must be chosen. +.IP | 10 +Vertical bars separate the choices within braces. +.LP +All punctuation must be included where shown. +.bp +.NH 1 +GENERAL INFORMATION +.LP +The BASIC-EM compiler is designed for a UNIX based environment. +It accepts a text file with a BASIC program (suffix .b) and generates +an executable file, called a.out. +.NH 2 +LINE FORMAT +.LP +A BASIC program consists of a series of lines, starting with a +positive line number in the range 0 to 32767. +A line may consists of more than one physical line on a terminal, but +is limited to 1024 characters. +Multiple BASIC statements may be placed on a single line, provided +they are separated by a colon (:). +.NH 2 +CONSTANTS +.LP +The BASIC compiler character set is comprised of alphabetic +characters, numeric characters, and special characters shown below. +.DS += + - * / ^ ( ) % # $ \\ _ +! [ ] , . ; : & ' ? > < \\ (blanc) +.DE +.LP +BASIC uses two different types of constants during processing: +numeric and string constants. +.br +A string constant is a sequence of characters taken from the ASCII +character set enclosed by double quotation marks. +.br +Numeric constants are positive or negative numbers, grouped into +five different classes. +.IP "a) integer constants" 25 +.br +Whole numbers in the range of -32768 and 32767. Integer constants do +not contain decimal points. +.IP "b) fixed point constants" 25 +.br +Positive or negative real numbers, i.e. numbers with a decimal point. +.IP "c) floating point constants" 25 +.br +Real numbers in scientific notation. A floating point constant +consists of an optional signed integer or fixed point number +followed by the letter E (or D) and an optional signed integer +(the exponent). +The allowable range of floating point constants is 10^-38 to 10^+38. +.IP "d) Hex constants" 25 +.br +Hexadecimal numbers, denoted by the prefix &H. +.IP "e) Octal constants" 25 +.br +Octal numbers, denoted by the prefix &O. +.NH 2 +VARIABLES +.LP +Variables are names used to represent values in a BASIC program. +A variable is assigned a value by assigment specified in the program. +Before a variable is assigned its value is assumed to be zero. +.br +Variable names are composed of letters, digits or the decimal point, +starting with a letter. Up to 40 characters are significant. +A variable name can be followed by any of the following type +declaration characters: +.IP % 5 +Defines an integer variable +.IP ! 5 +Defines a single precision variable (see below) +.IP # 5 +Defines a double precision variable +.IP $ 5 +Defines a string variable. +.LP +Beside single valued variables, values may be grouped into tables or arrays. +Each element in an array is referenced by the array name and an index, +such a variable is called a subscripted variable. +An array has as many subscripts as there are dimensions in the array, +the maximum of which is 11. +.br +If a variable starts with FN it is assumed to be a call to a user defined +function. +.br +A variable name may not be a reserved word nor the name +of a predefined function. +A list of all reserved identifiers is included as Appendix B. +.LP +NOTES: +.br +Two variables with the same name but different type is +considered illegal. +.br +The type of a variable without typedeclaration-character is set, +at it's first occurence in the program, +to the defaulttype which is (in this implementation) double precision. +.br +Multi-dimensional array's must be declared before use (see +DIM-statement ). +.br +BASIC-EM differs from Microsoft BASIC in supporting floats in one precision +only (due to EM), eg doubles and floats have the same precision. +.NH 2 +EXPRESSIONS +.LP +When necessary the compiler will convert a numeric value from +one type to another. +A value is always converted to the precision of the variable it is assigned +to. +When a floating point value is converted to an integer the fractional +portion is rounded. +In an expression all values are converted to the same degree of precision, +i.e. that of the most precise operand. +.br +Division by zero results in the message "Division by zero". +If overflow (or underflow) occurs, the "Overflow (underflow)" message is +displayed and execution is terminated (contrary to Microsoft). +.SH +Arithmetic +.LP +The arithmetic operators in order of precedence,a re: +.DS L +^ Exponentiation +- Negation +*,/,\\\\\\\\,MOD Multiplication, Division, Remainder ++,- Addition, Substraction +.DE +The operator \\\\ denotes integer division, its operands are rounded to +integers before the operator is applied. +Modulus arithmetic is denoted by the operator MOD, which yields the +integer value that is the remainder of an integer division. +.br +The order in which operators are performed can be changed with parentheses. +.SH +Relational +.LP +The relational operators in order of precedence, are: +.DS += Equality +<> Inequality +< Less than +> Greater than +<= Less than or equal to +>= Greater than or equal to +.DE +The relational operators are used to compare two values and returns +either "true" (-1) or "false" (0) (See IF statement). +The precedence of the relational operators is lower +then the arithmetic operators. +.SH +Logical +.LP +The logical operators performs tests on multiple relations, bit manipulations, +or boolean operations. +The logical operators returns a bitwise result ("true" or "false"). +In an expression, logical operators are performed after the relational and +arithmetic operators. +The logical operators work by converting their operands to signed +two-complement integers in the range -32768 to 32767. +.DS +NOT Bitwise negation +AND Bitwise and +OR Bitwise or +XOR Bitwise exclusive or +EQV Bitwise equivalence +IMP Bitwise implies +.DE +.SH +Functional +.LP +A function is used in an expression to call a system or user defined +function. +A list of predefined functions is presented in chapter 3. +.SH +String operations +.LP +Strings can be concatenated by using +. Strings can be compared with +the relational operators. String comparison is performed in lexicographic +order. +.NH 2 +ERROR MESSAGES +.LP +The occurence of an error results in termination of the program +unless an ON....ERROR statement has been encountered. +.bp +.NH 1 +B-EM STATEMENTS +.LP +This chapter describes the statements available within the BASIC-EM +compiler. Each description is formatted as follows: +.Sy +Shows the correct syntax for the statement. See introduction of +syntax notation above. +.PU +Describes the purpose and details of the instructions. +.RM +Describes special cases, deviation from Microsoft BASIC etc. +.LP +.NH 2 +CALL +.Sy +CALL [()] +.PU +The CALL statement provides the means to execute procedures +and functions written in another language included in the +Amsterdam Compiler Kit. +The argument list consist of (subscripted) variables. +The BASIC compiler pushes the address of the arguments on the stack in order +of encounter. +.RM +Not yet available. +.NH 2 +CLOSE +.Sy +CLOSE [[#][,[#]]] +.PU +To terminate I/O on a disk file. + is the number associated with the file +when it was OPENed (See OPEN-statement). Ommission of parameters results in closing +all files. +.sp +The END statement and STOP statement always issue a CLOSE of +all files. +.NH 2 +DATA +.Sy +DATA +.PU +DATA statements are used to construct a data bank of values that are +accessed by the program's READ statement. +DATA statements are non-executable, +the data items are assembled in a data file by the BASIC compiler. +This file can be replaced, provided the layout remains +the same (otherwise the RESTORE won't function properly). +.sp +The list of data items consists of numeric and string constants +as discussed in section 1. +Moreover, string constants starting with a letter and not +containing blancs, newlines, commas, colon need not be enclosed with +the string quotes. +.sp +DATA statements can be reread using the RESTORE statement. +.NH 2 +DEF FN +.Sy +DEF FN [()]= +.PU +To define and name a function that is written by the user. + must be an identifier and should be preceded by FN, +which is considered integral part of the function name. + defines the expression to be evaluated upon function call. +.sp +The parameter list is comprised of a comma separated +list of variable names, used within the function definition, +that are to replaced by values upon function call. +The variable names defined in the parameterlist, called formal +parameters, do not affect the definition and use of variables +defined with the same name in the rest of the BASIC program. +.sp +A type declaration character may be suffixed to the function name to +designate the data type of the function result. +.NH 2 +DEFINT/SNG/DBL/STR +.Sy +DEF +.PU +Any undefined variable starting with the letter included in the range of +letters is declared of type unless a type declaration character +is appended. +The range of letters is a comma separated list of characters and +character ranges (-). +.NH 2 +DIM +.Sy +DIM +.PU +The DIM statement allocates storage for subscripted variables. +If an undefined subscripted variable is used +the maximum value of the array subscript is assumed to be 10. +A subscript out of range is signalled by the program (when ACK works) +The minimum subscript value is 0, unless the OPTION BASE statement has been +encountered. +.sp +All variables in a subscripted variable are initially zero. +.sp +BUGS. Multi-dimensional arrays MUST be defined. Subscript out of range is +left unnotified. +.NH 2 +END +.Sy +END +.PU +END terminates a BASIC program and returns to the UNIX shell. +An END statement at the end of the BASIC program is optional. +.NH 2 +ERR and ERL +.Sy += ERR +.br += ERL +.PU +Whenever an error occurs the variable ERR contains the +error number and ERL the BASIC line where the error occurred. +The variables are usually used in error handling routines +provided by the user. +.NH 2 +ERROR +.Sy +ERROR +.PU +To simulate the occurrence of a BASIC error. +To define a private error code a value must be used that is not already in +use by the BASIC runtime system. +The list of error messages currently in use can be found in appendix B. +.NH 2 +FIELD +.PU +To be implemented. +.NH 2 +FOR...NEXT +.Sy +FOR = TO[STEP] +.br + ...... +.br +NEXT [][,...] +.PU +The FOR statements allows a series of statements to be performed +repeatedly. is used as a counter. During the first +execution pass it is assigned the value , +an arithmetic expression. After each pass the counter +is incremented (decremented) with the step size , an expression. +Ommission of the step size is intepreted as an increment of 1. +.br +Execution of the program lines specified between the FOR and the NEXT +statement is terminated as soon as is greater (less) than +.sp +The NEXT statement is labeled with the name(s) of the counter to be +incremented. +.sp +The variables mentioned in the NEXT statement may be ommitted, in which case +the variable of increment the counter of the most recent FOR statement. +If a NEXT statement is encountered before its corresponding FOR statement, +the error message "NEXT without FOR" is generated. +.NH 2 +GET +.Sy +GET [#][, ] +.PU +To be implemented. +.NH 2 +GOSUB...RETURN +.Sy +GOSUB + ... +.br +RETURN +.PU +The GOSUB statement branches to the first statement of a subroutine. +The RETURN statement cause a branch back to the statement following the +most recent GOSUB statement. +A subroutine may contain more than one RETURN statement. +.sp +Subroutines may be called recursively. +Nesting of subroutine calls is limited, upon exceeding the maximum depth +the error message "XXXXX" is displayed. +.NH 2 +GOTO +.Sy +GOTO +.PU +To branch unconditionally to a specified line in the program. +If does not exists, the compilation error message +"Line not defined" is displayed. +.RM +Microsoft BASIC continues at the first line +equal or greater then the line specified. +.NH 2 +IF...THEN +.Sy +.br +IF THEN {|} +[ELSE {|}] +.br +.Sy +IF GOTO +[ELSE {|}] +.PU +The IF statement is used +to make a decision regarding the program flow based on the +result of the expressions. +If the expression is not zero, the THEN or GOTO clause is +executed. If the result of is zero, the THEN or +GOTO clause is ignored and the ELSE clause, if present is +executed. +.br +IF..THEN..ELSE statements may be nested. +Nesting is limited by the length of the line. +The ELSE clause matches with the closests unmatched THEN. +.sp +When using IF to test equality for a value that is the +result of a floating point expression, remember that the +internal representation of the value may not be exact. +Therefore, the test should be against a range to +handle the relative error. +.RM +Microsoft BASIC allows a comma before THEN. +.NH 2 +INPUT +.Sy +INPUT [;][<"prompt string">;] +.PU +An INPUT statement can be used to obtain values from the user at the +terminal. +When an INPUT statement is encountered a question mark is printed +to indicate the program is awaiting data. +IF <"prompt string"> is included, the string is printed before the +the question mark. The question mark is suppressed when the prompt +string is followed by a comma, rather then a semicolon. +.sp +For each variable in the variable a list a value should be supplied. +Data items presented should be separated by a comma. +.sp +The type of the variable in the variable list must aggree with the +type of the data item entered. Responding with too few or too many +data items causes the message "?Redo". No assignment of input values +is made until an acceptable response is given. +.RM +The option to disgard the carriage return with the semicolon after the +input symbol is not yet implemented. +.NH 2 +INPUT [#] +.Sy +INPUT #, +.PU +The purpose of the INPUT# statement is to read data items from a sequential +file and assign them to program variables. + is the number used to open the file for input. +The variables mentioned are (subscripted) variables. +The type of the data items read should aggree with the type of the variables. +A type mismatch results in the error message "XXXXX". +.sp +The data items on the sequential file are separated by commas and newlines. +In scanning the file, leading spaces, new lines, tabs, and +carriage returns are ignored. The first character encountered +is assumed to be the state of a new item. +String items need not be enclosed with double quotes, provided +it does not contain spaces, tabs, newlines and commas, +.RM +Microsoft BASIC won't assign values until the end of input statement. +This means that the user has to supply all the information. +.NH 2 +LET +.Sy +[LET]= +.PU +To assign the value of an expression to a (subscribted) variable. +The type convertions as dictated in chapter 1 apply. +.NH 2 +LINE INPUT +.Sy +LINE INPUT [;][<"prompt string">;] +.PU +An entire line of input is assigned to the string variable. +See INPUT for the meaning of the <"prompt string"> option. +.NH 2 +LINE INPUT [#] +.Sy +LINE INPUT #, +.PU +Read an entire line of text from a sequential file +and assign it to a string variable. +.NH 2 +LSET and RSET +.PU +To be implemented +.NH 2 +MID$ +.Sy +MID$(,n[,m])= +.PU +To replace a portion of a string with another string value. +The characters of replaces characters in +starting at position n. If m is present, at most m characters are copied, +otherwise all characters are copied. +However, the string obtained never exceeds the length of string expr1. +.NH 2 +ON ERROR GOTO +.Sy +ON ERROR GOTO +.PU +To enable error handling within the BASIC program. +An error may result from arithmetic errors, disk problems, interrupts, or +as a result of the ERROR statement. +After printing an error message the program is continued at the +statements associated with . +.sp +Error handling is disabled using ON ERROR GOTO 0. +Subsequent errors result in an error message and program termination. +.NH 2 +ON...GOSUB and ON ...GOTO +.Sy +ON GOSUB +.br +ON GOTO +.PU +To branch to one of several specified line numbers or subroutines, based +on the result of the . The list of line numbers are considered +the first, second, etc alternative. Branching to the first occurs when +the expression evaluates to one, to the second alternative on two, etc. +If the value of the expression is zero or greater than the number of alternatives, processing continues at the first statement following the ON..GOTO +(ON GOSUB) statement. +.sp +When the expression results in a negative number the +an "Illegal function call" error occurs. +.sp +BUG If the value of the expression is zero or greater than the number of +alternatives, processing does NOT continue at the first statement +following the ON..GOTO (ON GOSUB) statement. +.NH 2 +OPEN +.Sy +OPEN {"i" | "o" | "r" } , [#] , +.PU +To open (filename should be quoted) for input/reading or output. +If file is not opened for output it has to be existent, otherwise an +"file not found" error will occur. +.NH 2 +OPTION BASE +.Sy +OPTION BASE n +.PU +To declare the lower bound of subsequent array subscripts as either +0 or 1. The default lower bound is zero. +.NH 2 +POKE +.Sy +POKE , +.PU +To poke around in memory. The use of this statement is not recommended, +because it requires full understanding of both +the implementation of the Amsterdam +Compiler Kit and the hardware characteristics. +.NH 2 +PRINT +.Sy +PRINT +.PU +To print constants or the contents of variables on the terminal-device. +If the variables or constants are seperated by comma's the values will +be printed seperated by tabs. +If the variables or constants are seperated by semi-colon's the values +will be printed without spaces in between. +The new-line generated at the end of the print-statement can be suppressed by +a semi-colon at the end of list of variables or constants. +.NH 2 +PRINT USING +.PU +To be implemented +.NH 2 +PUT +.PU +To be implemented +.NH 2 +RANDOMIZE +.Sy +RANDOMIZE [] +.PU +To reset the random seed. When the expression is ommitted, the system +will ask for a value between -32768 and 32767. +The random number generator returns the same sequence of values provided +the same seed is used. +.NH 2 +READ +.Sy +READ +.PU +To read values from the DATA statements and assign them to variables. +The type of the variables should match to the type of the items being read, +otherwise a "Syntax error" occurs. If all data is read the message "Out of +data" will be displayed. +.NH 2 +REM +.Sy +REM +.PU +To include explantory information in a program. +The REM statements are not executed. +A single quote has the same effect as : REM, which +allows for the inclusion of comment at the end of the line. +.RM +Microsoft BASIC does not allow REM statements as part of +DATA lines. +.NH 2 +RESTORE +.Sy +RESTORE [] +.PU +To allow DATA statements to be re-read from a specific line. +After a RESTORE statement is executed, the next READ accesses +the first item of the DATA statements. +If is specified, the next READ accesses the first +item in the specified line. +.sp +Note that data statements result in a sequential datafile generated +by the compiler, being read by the read statements. +This data file may be replaced using the operating system functions +with a modified version, provided the same layout of items +(same number of lines and items per line) is used. +.NH 2 +STOP +.Sy +STOP +.PU +To terminate the execution of a program and return to the operating system +command interpreter. A STOP statement results in the message "Break in line +???" +.NH 2 +SWAP +.Sy +SWAP , +.PU +To exchange the values of two variables. +.sp +BUG. Strings cannot be swapped ! +.NH 2 +TRON/TROFF +.Sy +TRON +.Sy +TROFF +.PU +As an aid in debugging the TRON statement results in a program +listing each line being interpreted. TROFF disables generation of +this code. +.NH 2 +WHILE...WEND +.Sy +WHILE + ..... +WEND +.PU +To execute a series of BASIC statements as long as a conditional expression +is true. WHILE...WEND loops may be nested. +.NH 2 +WRITE +.Sy +WRITE [] +.PU +To write data at the terminal in DATA statement layout conventions. +The expressions should be separated by commas. +.NH 2 +WRITE # +.Sy +WRITE # , +.PU +To write a sequential data file, being opened with the "O" mode. +The values are being writting using the DATA statements layout conventions. +.bp +.NH +FUNCTIONS +.LP +.IP ABS(X) 25 +Returns the absolute value of expression X +.IP ASC(X$) 25 +Returns the numeric value of the first character of the string. +If X$ is not initialized an "Illegal function call" error +is returned. +.IP ATN(X) 25 +Returns the arctangent of X in radians. Result is in the range +of -pi/2 to pi/2. +.IP CDBL(X) 25 +Converts X to a double precision number. +.IP CHR$(X) 25 +Converts the integer value X to its ASCII character. +X must be in the range of 0 to 257. +It is used for cursor addressing and generating bel signals. +.IP CINT(X) 25 +Converts X to an integer by rounding the fractional portion. +If X is not in the range -32768 to 32767 an "Overflow" +error occurs. +.IP COS(X) 25 +Returns the cosine of X in radians. +.IP CSNG(X) 25 +Converts X to a single precision number. +.IP CVI(<2-bytes>) 25 +Convert two byte string value to integer number. +.IP CVS(<4-bytes>) 25 +Convert four byte string value to single precision number. +.IP CVD(<8-bytes>) 25 +Convert eight byte string value to double precision number. +.IP EOF[()] 25 +Returns -1 (true) if the end of a sequential file has been reached. +.IP EXP(X) 25 +Returns e(base of natural logarithm) to the power of X. +X should be less then 10000.0. +.IP FIX(X) 25 +Returns the truncated integer part of X. FIX(X) is +equivalent to SGN(X)*INT(ABS(X)). +The major difference between FIX and INT is that FIX does not +return the next lower number for negative X. +.IP HEX$(X) 25 +Returns the string which represents the hexadecimal value of +the decimal argument. X is rounded to an integer using CINT +before HEX$ is evaluated. +.IP INT(X) 25 +Returns the largest integer <= X. +.IP INP$(X[,[#]Y]) 25 +Returns the string of X characters read from the terminal or +the designated file. +.IP LEN(X$) 25 +Returns the number of characters in the string X$. +Non printable and blancs are counted too. +.IP LOC() 25 +For sequential files LOC returns +position of the read/write head, counted in number of bytes. +For random files the function returns the record number just +read or written from a GET or PUT statement. +If nothing was read or written 0 is returned. +.IP LOG(X) 25 +Returns the natural logarithm of X. X must be greater than zero. +.IP MID$(X,I,[J]) 25 +Returns first J characters from string X starting at position I in X. +If J is omitted all characters starting of from position I in X are returned. +.IP MKI$(X) 25 +Converts an integer expression to a two-byte string. +.IP MKS$(X) 25 +Converts a single precision expression to a four-byte string. +.IP MKD$(X) 25 +Converts a double precision expression to a eight-byte string. +.IP OCT$(X) 25 +Returns the string which represents the octal value of the decimal +argument. X is rounded to an integer using CINT before OCTS is evaluated. +.IP PEEK(I) 25 +Returns the byte read from the indicated memory. (Of limited use +in the context of ACK) +.IP POS(I) 25 +Returns the current cursor position. To be implemented. +.IP RIGHT$(X$,I) +Returns the right most I characters of string X$. +If I=0 then the empty string is returned. +.IP RND(X) 25 +Returns a random number between 0 and 1. X is a dummy argument. +.IP SGN(X) 25 +If X>0 , SGN(X) returns 1. +.br +if X=0, SGN(X) returns 0. +.br +if X<0, SGN(X) returns -1. +.IP SIN(X) 25 +Returns the sine of X in radians. +.IP SPACE$(X) 25 +Returns a string of spaces length X. The expression +X is rounded to an integer using CINT. +.IP STR$(X) +Returns the string representation value of X. +.IP STRING$(I,J) 25 +Returns thes string of length Iwhose characters all +have ASCII code J. (or first character when J is a string) +.IP TAB(I) 25 +Spaces to position I on the terminal. If the current +print position is already beyond space I,TAB +goes to that position on the next line. +Space 1 is leftmost position, and the rightmost position +is width minus 1. To be used within PRINT statements only. +.IP TAN(X) 25 +Returns the tangent of X in radians. If TAN overflows +the "Overflow" message is displayed. +.IP VAL(X$) 25 +Returns the numerical value of string X$. +The VAL function strips leading blanks and tabs from the +argument string. +.bp +.SH +APPENDIX A DIFFERENCES WITH MICROSOFT BASIC +.LP +The following list of Microsoft commands and statements are +not recognized by the compiler. +.DS +SPC +USR +VARPTR +AUTO +CHAIN +CLEAR +CLOAD +COMMON +CONT +CSAVE +DELETE +EDIT +ERASE +FRE +KILL +LIST +LLIST +LOAD +LPRINT +MERGE +NAME +NEW +NULL +RENUM +RESUME +RUN +SAVE +WAIT +WIDTH LPRINT +.DE +Some statements are in the current implementation not available, +but will be soon. These include: +.DS +CALL +DEFUSR +FIELD +GET +INKEY +INPUT$ +INSTR$ +LEFT$ +LSET +RSET +PUT +.DE +.bp +.SH +APPENDIX B RESERVED WORDS IN BASIC-EM +.LP +The following list of words/symbols/names/identifiers are reserved, which +means that they can not be used for variable-names. +.DS +ABS AND ASC AS +ATN AUTO BASE CALL +CDBL CHAIN CHR CINT +CLEAR CLOAD CLOSE COMMON +CONT COS CSNG CSAVE +CVI CVS CVD DATA +DEFINT DEFSNG DEFDBL DEFSTR +DEF DELETE DIM EDIT +ELSE END EOF ERASE +ERROR ERR ERL ELSE +EQV EXP FIELD FIX +FOR FRE GET GOSUB +GOTO HEX IF IMP +INKEY INPUT INP INSTR +INT KILL LEFT LEN +LET LINE LIST LLIST +LOAD LOC LOG LPOS +LPRINT LSET MERGE MID +MKI MKS MKD MOD +NAME NEW NEXT NOT +NULL ON OCT OPEN +OPTION OR OUT PEEK +POKE PRINT POS PUT +RANDOMIZE READ REM RENUM +REN RESTORE RESUME RETURN +RIGHT RND RUN SAVE +STEP SGN SIN SPACE +SPC SQR STOP STRING +STR SWAP TAB TAN +THEN TO TRON TROFF +USING USR VAL VARPTR +WAIT WHILE WEND WIDTH +WRITE XOR +.DE diff --git a/doc/ceg/ceg.ref b/doc/ceg/ceg.ref new file mode 100644 index 0000000..54de7bf --- /dev/null +++ b/doc/ceg/ceg.ref @@ -0,0 +1,42 @@ +%T A Practical Toolkit For Making Compilers +%A A.S. Tanenbaum +%A H. v. Staveren +%A E.G. Keizer +%A J.W. Stevenson +%J Communications of the ACM +%V 26 +%N 9 +%D September 1983 + +%T Description of a Machine Architecture for Use with Block Structured Languages +%A A.S. Tanenbuum +%A H. v. Staveren +%A E.G. Keizer +%A J.W. Stevenson +%R IR-81 +%I Dept. Mathematics and Computer Science, Vrije Universiteit +%C Amsterdam +%D August 1983 + +%T EM_CODE(3ACK) +%A ACK Documentation +%I Dept. Mathematics and Computer Science, Vrije Universiteit +%C Amsterdam + +%T ACK.OUT(5ACK) +%A ACK Documentation +%I Dept. Mathematics and Computer Science, Vrije Universiteit +%C Amsterdam +%K aout + +%T PRINT(3ACK) +%A ACK Documentation +%I Dept. Mathematics and Computer Science, Vrije Universiteit +%C Amsterdam + +%T The C Programming Language +%A B.W. Kernighan +%A D.M. Ritchie +%I Prentice-Hall Inc. +%C Englewood Cliffs, New Jersey +%D 1978 diff --git a/doc/ceg/ceg.tr b/doc/ceg/ceg.tr new file mode 100644 index 0000000..f26d400 --- /dev/null +++ b/doc/ceg/ceg.tr @@ -0,0 +1,1587 @@ +.nr PS 12 +.nr VS 14 +.nr LL 6i +.tr ~ +.TL +The Code Expander Generator +.AU +Frans Kaashoek +Koen Langendoen +.AI +Dept. of Mathematics and Computer Science +Vrije Universiteit +Amsterdam, The Netherlands +.NH +Introduction +.PP +A \fBcode expander\fR (\fBce\fR for short) is a part of the +Amsterdam Compiler Kit +.[ +toolkit +.] +(\fBACK\fR) and provides the user with +high-speed generation of medium-quality code. Although conceptually +equivalent to the more usual \fBcode generator\fR, it differs in some +aspects. +.PP +Normally, a program to be compiled with \fBACK\fR +is first fed to the preprocessor. The output of the preprocessor goes +into the appropriate front end, which produces EM +.[ +block +.] +(a +machine independent low level intermediate code). The generated EM code is fed +into the peephole optimizer, which scans it with a window of a few instructions, +replacing certain inefficient code sequences by better ones. After the +peephole optimizer a back end follows, which produces high-quality assembly code. +The assembly code goes via the target optimizer into the assembler and the +object code then goes into the +linker/loader, the final component in the pipeline. +.PP +For various applications +this scheme is too slow. When debugging, for example, +compile time is more important than execution time of a program. +For this purpose a new scheme is introduced: +.IP \ \ 1: +The code generator and assembler are +replaced by a library, the \fBcode expander\fR, consisting of a set of +routines, one for every EM-instruction. Each routine expands its EM-instruction +into relocatable object code. In contrast, the usual ACK code generator uses +expensive pattern matching on sequences of EM-instructions. +The peephole and target optimizer are not used. +.IP \ \ 2: +These routines replace the usual EM-generating routines in the front end; this +eliminates the overhead of intermediate files. +.LP +This results in a fast compiler producing object file, ready to be +linked and loaded, at the cost of unoptimized object code. +.PP +Because of the +simple nature of the code expander, it is much easier to build, to debug, and to +test. Experience has demonstrated that a code expander can be constructed, +debugged, and tested in less than two weeks. +.PP +This document describes the tools for automatically generating a +\fBce\fR (a library of C files) from two tables and +a few machine-dependent functions. +A thorough knowledge of EM is necessary to understand this document. +.NH +The code expander generator +.PP +The code expander generator (\fBceg\fR) generates a code expander from +two tables and a few machine-dependent functions. This section explains how +\fBceg\fR works. The first half describes the transformations that are done on +the two tables. The +second half tells how these transformations are done by the \fBceg\fR. +.PP +A code expander consists of a set of routines that convert EM-instructions +directly to relocatable object code. These routines are called by a front +end through the EM_CODE(3ACK) +.[ +EM_CODE +.] +interface. To free the table writer of the burden of building +an object file, we supply a set of routines that build an object file +in the ACK.OUT(5ACK) +.[ +aout +.] +format (see appendix B). This set of routines is called +the +\fBback\fR-primitives (see appendix A). In short, a code expander consists of a +set of routines that map the EM_CODE interface on the +\fBback\fR-primitives interface. +.PP +To avoid repetition of the same sequences of +\fBback\fR-primitives in different +EM-instructions +and to improve readability, the EM-to-object information must be supplied in +two +tables. The EM_table maps EM to an assembly language, and the as_table +maps +assembly code to \fBback\fR-primitives. The assembly language is chosen by the +table writer. It can either be an actual assembly language or his ad-hoc +designed language. +.LP +The following picture shows the dependencies between the different components: +.sp +.PS +linewid = 0.5i +A: line down 2i +B: line down 2i with .start at A.start + (1.5i, 0) +C: line down 2i with .start at B.start + (1.5i, 0) +D: arrow right with .start at A.center - (0.25i, 0) +E: arrow right with .start at B.center - (0.25i, 0) +F: arrow right with .start at C.center - (0.25i, 0) +"EM_CODE(3ACK)" at A.start above +"EM_table" at B.start above +"as_table" at C.start above +"source language " at D.start rjust +"EM" at 0.5 of the way between D.end and E.start +G: "assembly" at 0.5 of the way between E.end and F.start +H: " back primitives" at F.end ljust +"(user defined)" at G - (0, 0.2i) +" (ACK.OUT)" at H - (0, 0.2i) ljust +.PE +.PP +The picture suggests that, during compilation, the EM instructions are +first transformed into assembly instructions and then the assembly instructions +are transformed into object-generating calls. This +is not what happens in practice, although the user is free to think it does. +Actually, however the EM_table and the as_table are combined during code +expander generation time, yielding an imaginary compound table that results in +routines from the EM_CODE interface that generate object code directly. +.PP +As already indicated, the compound table does not exist either. Instead, each +assembly instruction in the as_table is converted to a routine generating C +.[ +Kernighan +.] +code +to generate C code to call the \fBback\fR-primitives. The EM_table is +converted into a program that for each EM instruction generates a routine, +using the routines generated from the as_table. Execution of the latter program +will then generate the code expander. +.PP +This scheme allows great flexibility +in the table writing, while still +resulting in a very efficient code expander. One implication is that the +as_table is interpreted twice and the EM_table only once. This has consequences +for their structure. +.PP +To illustrate what happens, we give an example. The example is an entry in +the tables for the VAX-machine. The assembly language chosen is a subset of the +VAX assembly language. +.PP +One of the most fundamental operations in EM is ``loc c'', load the value of c +on the stack. To expand this instruction the +tables contain the following information: +.DS +EM_table : +.ft CW + C_loc ==> "pushl $$$1". + /* $1 refers to the first argument of C_loc. + * $$ is a quoted $. */ + + +\fRas_table : +.ft CW + pushl src : CONST ==> + @text1( 0xd0); + @text1( 0xef); + @text4( %$( src->num)). +\fR +.DE +.LP +The as_table is transformed in the following routine: +.DS +.ft CW +pushl_instr(src) +t_operand *src; +/* ``t_operand'' is a struct defined by the + * table writer. */ +{ + printf("swtxt();"); + printf("text1( 0xd0 );"); + printf("text1( 0xef );"); + printf("text4(%s);", substitute_dollar( src->num)); +} +\fR +.DE +Using ``pushl_instr()'', the following routine is generated from the EM_table: +.DS +.ft CW +C_loc( c) +arith c; +/* text1() and text4() are library routines that fill the + * text segment. */ +{ + swtxt(); + text1( 0xd0); + text1( 0xef); + text4( c); +} +\fR +.DE +.LP +A compiler call to ``C_loc()'' will cause the 1-byte numbers ``0xd0'' +and ``0xef'' +and the 4-byte value of the variable ``c'' to be stored in the text segment. +.PP +The transformations on the tables are done automatically by the code expander +generator. +The code expander generator is made up of two tools: +\fBemg\fR and \fBasg\fR. \fBAsg\fR +transforms +each assembly instruction into a C routine. These C routines generate calls +to the \fBback\fR-primitives. The generated C routines are used +by \fBemg\fR to generate the actual code expander from the EM_table. +.PP +The link between \fBemg\fR and \fBasg\fR is an assembly language. +We did not enforce a specific syntax for the assembly language; +instead we have given the table writer the freedom +to make an ad-hoc assembly language or to use an actual assembly language +suitable for his purpose. Apart from a greater flexibility this +has another advantage; if the table writer adopts the assembly language that +runs on the machine at hand, he can test the EM_table independently from the +as_table. Of course there is a price to pay: the table writer has to +do the decoding of the operands himself. See section 4 for more details. +.PP +Before we describe the structure of the tables in detail, we will give +an overview of the four main phases. +.IP "phase 1:" +.br +The as_table is transformed by \fBasg\fR. This results in a set of C routines. +Each assembly-opcode generates one C routine. Note that a call to such a +routine does not generate the corresponding object code; it generates C code, +which, when executed, generates the desired object code. +.IP "phase 2:" +.br +The C routines generated by \fBasg\fR are used by emg to expand the EM_table. +This +results in a set of C routines, the code expander, which conform to the +procedural interface EM_CODE(3ACK). A call to such a routine does indeed +generate the desired object code. +.IP "phase 3:" +.br +The front end that uses the procedural interface is linked/loaded with the +code expander generated in phase 2 and the \fBback\fR-primitives (a supplied +library). This results in a compiler. +.IP "phase 4:" +.br +The compiler runs. The routines in the code expander are +executed and produce object code. +.RE +.NH +Description of the EM_table +.PP +This section describes the EM_table. It contains four subsections. +The first 3 sections describe the syntax of the EM_table, +the +semantics of the EM_table, and the functions and +constants that must be present in the EM_table, in the file ``mach.c'' or in +the file ``mach.h''. The last section explains how a table writer can generate +assembly code instead of object code. The section on +semantics contains many examples. +.NH 2 +Grammar +.PP +The following grammar describes the syntax of the EM_table. +.VS +4 +.TS +center tab(%); +l c l. +TABLE%::=%( RULE)* +RULE%::=%C_instr ( COND_SEQUENCE | SIMPLE) +COND_SEQUENCE%::=%( condition SIMPLE)* ``default'' SIMPLE +SIMPLE%::=% ``==>'' ACTION_LIST +ACTION_LIST%::=%[ ACTION ( ``;'' ACTION)* ] ``.'' +ACTION%::=%AS_INSTR +%|%function-call +AS_INSTR%::=%``"'' [ label ``:''] [ INSTR] ``"'' +INSTR%::=%mnemonic [ operand ( ``,'' operand)* ] +.TE +.VS -4 +.PP +The ``('' ``)'' brackets are used for grouping, ``['' ... ``]'' +means ... 0 or 1 time, +a ``*'' means zero or more times, and +a ``|'' means +a choice between left or right. A \fBC_instr\fR is +a name in the EM_CODE(3ACK) interface. \fBcondition\fR is a C expression. +\fBfunction-call\fR is a call of a C function. \fBlabel\fR, \fBmnemonic\fR, +and \fBoperand\fR are arbitrary strings. If an \fBoperand\fR +contains brackets, the +brackets must match. There is an upper bound on the number of +operands; the maximum number is defined by the constant MAX_OPERANDS in de +file ``const.h'' in the directory assemble.c. Comments in the table should be +placed between ``/*'' and ``*/''. +The table is processed by the C preprocessor, before being parsed by +\fBemg\fR. +.NH 2 +Semantics +.PP +The EM_table is processed by \fBemg\fR. \fBEmg\fR generates a C function +for every instruction in the EM_CODE(3ACK). +For every EM-instruction not mentioned in the EM_table, a +C function that prints an error message is generated. +It is possible to divide the EM_CODE(3ACK)-interface into four parts : +.IP \0\01: +text instructions (e.g., C_loc, C_adi, ..) +.IP \0\02: +pseudo instructions (e.g., C_open, C_df_ilb, ..) +.IP \0\03: +storage instructions (e.g., C_rom_icon, ..) +.IP \0\04: +message instructions (e.g., C_mes_begin, ..) +.LP +This section starts with giving the semantics of the grammar. The examples +are text instructions. The section ends with remarks on the pseudo +instructions and the storage instructions. Since message instructions are not +useful for a code expander, they are ignored. +.PP +.NH 3 +Actions +.PP +The EM_table is made up of rules describing how to expand a \fBC_instr\fR +defined by the EM_CODE(3ACK)-interface (corresponding +to an EM instruction) into actions. +There are two kinds of actions: assembly instructions and C function calls. +An assembly instruction is defined as a mnemonic followed by zero or more +operands separated by commas. The semantics of an assembly instruction is +defined by the table writer. When the assembly language is not expressive +enough, then, as an escape route, function calls can be made. However, this +reduces +the speed of the actual code expander. Finally, actions can be grouped into +a list of actions; actions are separated by a semicolon and terminated +by a ``.''. +.DS +.ft CW +C_nop ==> . + /* Empty action list : no operation. */ + +C_inc ==> "incl (sp)". + /* Assembler instruction, which is evaluated + * during expansion of the EM_table */ + +C_slu ==> C_sli( $1). + /* Function call, which is evaluated during + * execution of the compiler. */ +\fR +.DE +.NH 3 +Labels +.PP +Since an assembly language without instruction labels is a rather weak +language, labels inside a contiguous block of assembly instructions are +allowed. When using labels two rules must be observed: +.IP \0\01: +The name of a label should be unique inside an action list. +.IP \0\02: +The labels used in an assembler instruction should be defined in the same +action list. +.LP +The following example illustrates the usage of labels. +.DS +.ft CW + /* Compare the two top elements on the stack. */ +C_cmp ==> "pop bx"; + "pop cx"; + "xor ax, ax"; + "cmp cx, bx"; + /* Forward jump to local label */ + "je 2f"; + "jb 1f"; + "inc ax"; + "jmp 2f"; + "1: dec ax"; + "2: push ax". +\fR +.DE +We will come back to labels in the section on the as_table. +.NH 3 +Arguments of an EM instruction +.PP +In most cases the translation of a \fBC_instr\fR depends on its arguments. +The arguments of a \fBC_instr\fR are numbered from 1 to \fIn\fR, where \fIn\fR +is the +total number of arguments of the current \fBC_instr\fR (there are a few +exceptions, see Implicit arguments). The table writer may +refer to an argument as $\fIi\fR. If a plain $-sign is needed in an +assembly instruction, it must be preceded by a extra $-sign. +.PP +There are two groups of \fBC_instr\fRs whose arguments are handled specially: +.RS +.IP "1: Instructions dealing with local offsets" +.br +The value of the $\fIi\fR argument referring to a parameter ($\fIi\fR >= 0) +is increased by ``EM_BSIZE''. ``EM_BSIZE'' is the size of the return status block +and must be defined in the file ``mach.h'' (see section 3.3). For example : +.DS +.ft CW +C_lol ==> "push $1(bp)". + /* automatic conversion of $1 */ +\fR +.DE +.IP "2: Instructions using global names or instruction labels" +.br +All the arguments referring to global names or instruction labels will be +transformed into a unique assembly name. To prevent name clashes with library +names the table writer has to provide the +conversions in the file ``mach.h''. For example : +.DS +.ft CW +C_bra ==> "jmp $1". + /* automatic conversion of $1 */ + /* type arith is converted to string */ +\fR +.DE +.RE +.NH 3 +Conditionals +.PP +The rules in the EM_table can be divided into two groups: simple rules and +conditional rules. The simple rules are made up of a \fBC_instr\fR followed by +a list of actions, as described above. The conditional rules (COND_SEQUENCE) +allow the table writer to select an action list depending on the value of +a condition. +.PP +A CONDITIONAL is a list of a boolean expression with the corresponding +simple rule. If +the expression evaluates to true then the corresponding simple rule is carried +out. If more than one condition evaluates to true, the first one is chosen. +The last case of a COND_SEQUENCE of a \fBC_instr\fR must handle +the default case. +The boolean expressions in a COND_SEQUENCE must be C expressions. Besides the +ordinary C operators and constants, $\fIi\fR references can be used +in an expression. +.DS +.ft CW + /* Load address of LB $1 levels back. */ +C_lxl + $1 == 0 ==> "pushl fp". + $1 == 1 ==> "pushl 4(ap)". + default ==> "movl $$$1, r0"; + "jsb .lxl"; + "pushl r0". +\fR +.DE +.NH 3 +Abbreviations +.PP +EM instructions with an external as an argument come in three variants in +the EM_CODE(3ACK) interface. In most cases it will be possible to take +these variants together. For this purpose the ``..'' notation is introduced. +For the code expander there is no difference between the +following instructions. +.DS +.ft CW +C_loe_dlb ==> "pushl $1 + $2". +C_loe_dnam ==> "pushl $1 + $2". +C_loe ==> "pushl $1 + $2". +\fR +.DE +So it can be written in the following way. +.DS +.ft CW +C_loe.. ==> "pushl $1 + $2". +\fR +.DE +.NH 3 +Implicit arguments +.PP +In the last example ``C_loe'' has two arguments, but in the EM_CODE interface +it has one argument. This argument depends on the current ``hol'' +block; in the EM_table this is made explicit. Every \fBC_instr\fR whose +argument depends on a ``hol'' block has one extra argument; argument 1 refers +to the ``hol'' block. +.NH 3 +Pseudo instructions +.PP +Most pseudo instructions are machine independent and are provided +by \fBceg\fR. The table writer has only to supply the following functions, +which are used to build a stackframe: +.DS +.ft CW +C_prolog() +/* Performs the prolog, for example save + * return address */ + +C_locals( n) +arith n; +/* Allocate n bytes for locals on the stack */ + +C_jump( label) +char *label; +/* Generates code for a jump to ``label'' */ +\fR +.DE +.LP +These functions can be defined in ``mach.c'' or in the EM_table (see +section 3.3). +.NH 3 +Storage instructions +.PP +The storage instructions ``C_bss_\fIcstp()\fR'', ``C_hol_\fIcstp()\fR'', +''C_con_\fIcstp()\fR'', and ``C_rom_\fIcstp()\fR'', except for the instructions +dealing with constants of type string (C_..._icon, C_..._ucon, C_..._fcon), are +generated automatically. No information is needed in the table. +To generate the C_..._icon, C_..._ucon, C_..._fcon instructions +\fBceg\fR only has to know how to convert a number of type string to bytes; +this can be defined with the constants ONE_BYTE, TWO_BYTES, and FOUR_BYTES. +C_rom_icon, C_con_icon, C_bss_icon, C_hol_icon can be abbreviated by ..icon. +This also holds for ..ucon and ..fcon. +For example : +.DS +.ft CW +\\.\\.icon + $2 == 1 ==> gen1( (ONE_BYTE) atoi( $1)). + $2 == 2 ==> gen2( (TWO_BYTES) atoi( $1)). + $2 == 4 ==> gen4( (FOUR_BYTES) atol( $1)). + default ==> arg_error( "..icon", $2). +\fR +.DE +Gen1(), gen2() and gen4() are \fBback\fR-primitives (see appendix A), and +generate one, two, or four byte constants. Atoi() is a C library function that +converts strings to integers. +The constants ``ONE_BYTE'', ``TWO_BYTES'', and ``FOUR_BYTES'' must be defined in +the file ``mach.h''. +.NH 2 +User supplied definitions and functions +.PP +If the table writer uses all the default functions he has only to supply +the following constants and functions : +.TS +tab(#); +l c lw(10c). +C_prolog()#:#T{ +Do prolog +T} +C_jump( l)#:#T{ +Perform a jump to label l +T} +C_locals( n)#:#T{ +Allocate n bytes on the stack +T} +# +NAME_FMT#:#T{ +Print format describing name to a unique name conversion. The format must +contain %s. +T} +DNAM_FMT#:#T{ +Print format describing data-label to a unique name conversion. The format +must contain %s. +T} +DLB_FMT#:#T{ +Print format describing numerical-data-label to a unique name conversion. +The format must contain a %ld. +T} +ILB_FMT#:#T{ +Print format describing instruction-label to a unique name conversion. +The format must contain %d followed by %ld. +T} +HOL_FMT#:#T{ +Print format describing hol-block-number to a unique name conversion. +The format must contain %d. +T} +# +EM_WSIZE#:#T{ +Size of a word in bytes on the target machine +T} +EM_PSIZE#:#T{ +Size of a pointer in bytes on the target machine +T} +EM_BSIZE#:#T{ +Size of base block in bytes on the target machine +T} +# +ONE_BYTE#:#T{ +\\C suitable type that can hold one byte on the machine where the \fBce\fR runs +T} +TWO_BYTES#:#T{ +\\C suitable type that can hold two bytes on the machine where the \fBce\fR runs +T} +FOUR_BYTES#:#T{ +\\C suitable type that can hold four bytes on the machine where the \fBce\fR runs +T} +# +BSS_INIT#:#T{ +The default value that the loader puts in the bss segment +T} +# +BYTES_REVERSED#:#T{ +Must be defined if the byte order must be reversed. +By default the least significant byte is outputted first.\fR\(dg +.FS +\fR\(dg When both byte orders are used, for +example NS 16032, the table writer has to +supply his own set of routines. +.FE +T} +WORDS_REVERSED#:#T{ +Must be defined if the word order must be reversed. +By default the least significant word is outputted first. +T} +.TE +.LP +An example of the file ``mach.h'' for the vax4. +.TS +tab(:); +l l l. +#define : ONE_BYTE : int +#define : TWO_BYTES : int +#define : FOUR_BYTES : long +: +#define : EM_WSIZE : 4 +#define : EM_PSIZE : 4 +#define : EM_BSIZE : 0 +: +#define : BSS_INIT : 0 +: +#define : NAME_FMT : "_%s" +#define : DNAM_FMT : "_%s" +#define : DLB_FMT : "_%ld" +#define : ILB_FMT : "I%03d%ld" +#define : HOL_FMT : "hol%d" +.TE +Notice that EM_BSIZE is zero. The vax ``call'' instruction takes automatically +care of the base block. +.PP +There are three primitives that have to be defined by the table writer, either +as functions in the file ``mach.c'' or as rules in the EM_table. +For example, for the 8086 they look like this: +.DS +.ft CW +C_jump ==> "jmp $1". + +C_prolog ==> "push bp"; + "mov bp, sp". + +C_locals + $1 == 0 ==> . + $1 == 2 ==> "push ax". + $1 == 4 ==> "push ax"; + "push ax". + default ==> "sub sp, $1". +\fR +.DE +.NH 2 +Generating assembly code +.PP +When the code expander generator is used for generating assembly instead of +object code (see section 5), additional print formats have to be defined +in ``mach.h''. The following table lists these formats. +.TS +tab(#); +l c lw(10c). +BYTE_FMT#:#T{ +Print format to allocate and initialize one byte. The format must +contain %ld. +T} +WORD_FMT#:#T{ +Print format to allocate and initialize one word. The format must +contain %ld. +T} +LONG_FMT#:#T{ +Print format to allocate and initialize one long. The format must +contain %ld. +T} +BSS_FMT#:#T{ +Print format to allocate space in the bss segment. The format must +contain %ld (number of bytes). +T} +COMM_FMT#:#T{ +Print format to declare a "common". The format must contain a %s (name to be declared +common), followed by a %ld (number of bytes). +T} + +SEGTXT_FMT#:#T{ +Print format to switch to the text segment. +T} +SEGDAT_FMT#:#T{ +Print format to switch to the data segment. +T} +SEGBSS_FMT#:#T{ +Print format to switch to the bss segment. +T} + +SYMBOL_DEF_FMT#:#T{ +Print format to define a label. The format must contain %s. +T} +GLOBAL_FMT#:#T{ +Print format to declare a global name. The format must contain %s. +T} +LOCAL_FMT#:#T{ +Print format to declare a local name. The format must contain %s. +T} + +RELOC1_FMT#:#T{ +Print format to initialize a byte with an address expression. The format must +contain %s (name) and %ld (offset). +T} +RELOC2_FMT#:#T{ +Print format to initialize a word with an address expression. The format must +contain %s (name) and %ld (offset). +T} +RELOC4_FMT#:#T{ +Print format to initialize a long with an address expression. The format must +contain %s (name) and %ld (offset). +T} + +ALIGN_FMT#:#T{ +Print format to align a segment. +T} +.TE +.NH 1 +Description of the as_table +.PP +This section describes the as_table. Like the previous section, it is divided +into +four parts: the first two parts describe the grammar and the semantics of the +as_table; the third part gives an overview +of the functions and the constants that must be present in the as_table (in +the file ``as.h'' or in the file ``as.c''); the last part describes the case when +assembly is generated instead of object code. +The part on semantics contains examples that appear in the as_table for the +VAX or for the 8086. +.NH 2 +Grammar +.PP +The form of the as_table is given by the following grammar : +.VS +4 +.TS +center tab(#); +l c l. +TABLE#::=#( RULE)* +RULE#::=#( mnemonic | ``...'') DECL_LIST ``==>'' ACTION_LIST +DECL_LIST#::=#DECLARATION ( ``,'' DECLARATION)* +DECLARATION#::=#operand [ ``:'' type] +ACTION_LIST#::=#ACTION ( ``;'' ACTION) ``.'' +ACTION#::=#IF_STATEMENT +#|#function-call +#|#``@''function-call +IF_STATEMENT#::=#''@if'' ``('' condition ``)'' ACTION_LIST +##( ``@elsif'' ``('' condition ``)'' ACTION_LIST)* +##[ ``@else'' ACTION_LIST] +##''@fi'' +function-call#::=#function-identifier ``('' [arg (,arg)*] ``)'' +arg#::=#argument +#|#reference +.TE +.VS -4 +.LP +\fBmnemonic\fR, \fBoperand\fR, and \fBtype\fR are all C identifiers; +\fBcondition\fR is a normal C expression; +\fBfunction-call\fR must be a C function call. A function can be called with +standard C arguments or with a reference (see section 4.2.4). +Since the as_table is +interpreted during code expander generation as well as during code +expander execution, two levels of calls are present in it. A ``function-call'' +is done during code expander generation, a ``@function-call'' during code +expander execution. +.NH 2 +Semantics +.PP +The as_table is made up of rules that map assembly instructions onto +\fBback\fR-primitives, a set of functions that construct an object file. +The table is processed by \fBasg\fR, which generates a C functions +for each assembler mnemonic. The names of +these functions are the assembler mnemonics postfixed +with ``_instr'' (e.g., ``add'' becomes ``add_instr()''). These functions +will be used by the function +assemble() during the expansion of the EM_table. +After explaining the semantics of the as_table the function +assemble() will be described. +.NH 3 +Rules +.PP +A rule in the as_table is made up of a left and a right hand side; +the left hand side describes an assembler +instruction (mnemonic and operands); the +right hand side gives the corresponding actions as \fBback\fR-primitives or as +functions defined by the table writer, which call \fBback-primitives\fR. +Two simple examples from the VAX as_table and the 8086 as_table, resp.: +.DS +.ft CW +movl src, dst ==> @text1( 0xd0); + gen_operand( src); + gen_operand( dst). + /* ``gen_operand'' is a function that encodes + * operands by calling back-primitives. */ + +rep ens:MOVS ==> @text1( 0xf3); + @text1( 0xa5). + +\fR +.DE +.NH 3 +Declaration of types. +.PP +In general, a machine instruction is encoded as an opcode followed by zero or +more +the operands. There are two methods for mapping assembler mnemonics +onto opcodes: the mnemonic determines the opcode, or mnemonic and operands +together determine the opcode. Both cases can be +easily expressed in the as_table. +The first case is obvious. +The second case is handled by introducing type fields for the operands. +.PP +When mnemonic and operands together determine the opcode, the table writer has +to give several rules for each combination of mnemonic and operands. The rules +differ in the type fields of the operands. +The table writer has to supply functions that check the type +of the operand. The name of such a function is the name of the type; it +has one argument: a pointer to a struct of type \fIt_operand\fR; it returns +non-zero when the operand is of this type, otherwise it returns 0. +.PP +This will usually lead to a list of rules per mnemonic. To reduce the amount of +work an abbreviation is supplied. Once the mnemonic is specified it can be +referred to in the following rules by ``...''. +One has to make sure +that each mnemonic is mentioned only once in the as_table, otherwise +\fBasg\fR will generate more than one function with the same name. +.PP +The following example shows the usage of type fields. +.DS +.ft CW + mov dst:REG, src:EADDR ==> + @text1( 0x8b); /* opcode */ + mod_RM( %d(dst->reg), src). /* operands */ + + ... dst:EADDR, src:REG ==> + @text1( 0x89); /* opcode */ + mod_RM( %d(src->reg), dst). /* operands */ +\fR +.DE +The table-writer must supply the restriction functions, +.ft CW +REG\fR and +.ft CW +EADDR\fR in the previous example, in ``as.c'' or ''as.h''. +.NH 3 +The function of the @-sign and the if-statement. +.PP +The right hand side of a rule is made up of function calls. +Since the as_table is +interpreted on two levels, during code expander generation and during code +expander execution, two levels of calls are present in it. A function-call +without an ``@''-sign +is called during code expander generation (e.g., the +.ft CW +gen_operand()\fR in the +first example). +A function call with an ``@''-sign is called during code +expander execution (e.g., +the \fBback\fR-primitives). So the last group will be part of the compiler. +.PP +The need for the ``@''-sign construction arises, for example, when +implementing push/pop optimization (e.g., ``push x'' followed by ``pop y'' +can be replaced by ``move x, y''). +In this case flags need to be set, unset, and tested during the execution of +the compiler: +.DS L +.ft CW +PUSH src ==> /* save in ax */ + mov_instr( AX_oper, src); + /* set flag */ + @assign( push_waiting, TRUE). +\fR +.DE +.DS +.ft CW +POP dst ==> @if ( push_waiting) + /* ``mov_instr'' is asg-generated */ + mov_instr( dst, AX_oper); + @assign( push_waiting, FALSE). + @else + /* ``pop_instr'' is asg-generated */ + pop_instr( dst). + @fi. +\fR +.DE +.LP +Although the @-sign is followed syntactically by a +function name, this function can very well be the name of a macro defined in C. +This is in fact the case with ``@assign()'' in the above example. +.PP +The case may arise when information is needed that is not known +until execution of +the compiler. For example one needs to know if a ``$\fIi\fR'' argument fits in +one byte. +In this case one can use a special if-statement provided +by \fBasg\fR: @if, @elsif, @else, @fi. This means that the conditions +will be evaluated at +run time of the \fBce\fR. In such a condition one may of course refer +to the ''$\fIi\fR'' arguments. For example, constants can be +packed into one or two byte arguments as follows: +.DS +.ft CW +mov dst:ACCU, src:DATA ==> + @if ( fits_byte( %$(dst->expr))) + @text1( 0xc0); + @text1( %$(dst->expr)). + @else + @text1( 0xc8); + @text2( %$(dst->expr)). + @fi. +.DE +.NH 3 +References to operands +.PP +As noted before, the operands of an assembler instruction may be used as +pointers to the struct \fIt_operand\fR in the right hand side of the table. +Because of the free format assembler, the types of the fields in the struct +\fIt_operand\fR are unknown to \fBasg\fR. As these fields can appear in calls +to functions, \fBasg\fR must know +these types. This section explains how these types must be specified. +.PP +References to operands come in three forms: ordinary operands, operands that +contain ``$\fIi\fR'' references, and operands that refer to names of local labels. +The ``$\fIi\fR'' in operands represent names or numbers of a \fBC_instr\fR and must +be given as arguments to the \fBback\fR-primitives. Labels in operands +must be converted to a number that tells the distance, the number of bytes, +between the label and the current position in the text-segment. +.LP +All these three cases are treated in an uniform way. When the table writer +makes a reference to an operand of an assembly instruction, he must describe +the type of the operand in the following way. +.VS +4 +.TS +center tab(#); +l c l. +reference#::=#``%'' conversion +##``('' operand-name ``\->'' field-name ``)'' +conversion#::=# printformat +#|#``$'' +#|#``dist'' +printformat#::=#see PRINT(3ACK) +.[ +PRINT +.] +.TE +.VS -4 +.LP +The three cases differ only in the conversion field. The printformat conversion +applies to ordinary operands. The ``%$'' applies to operands that contain +a ``$\fIi\fR''. The expression between parentheses must result in a pointer to +a char. The +result of ``%$'' is of the type of ``$\fIi\fR''. The ``%dist'' +applies to operands that refer to a local label. The expression between +the brackets must result in a pointer to a char. The result of ``%dist'' is +of type arith. +.PP +The following example illustrates the usage of ``%$''. (For an +example that illustrates the usage of ordinary fields see +the section on ``User supplied definitions and functions''). +.DS +.ft CW +jmp dst ==> + @text1( 0xe9); + @reloc2( %$(dst->lab), %$(dst->off), PC_REL). +\fR +.DE +.PP +A useful function concerning $\fIi\fRs is arg_type(), which takes as input a +string starting with $\fIi\fR and returns the type of the \fIi\fR''th argument +of the current EM-instruction, which can be STRING, ARITH or INT. One may need +this function while decoding operands if the context of the $\fIi\fR does not +give enough information. +If the function arg_type() is used, the file +arg_type.h must contain the definition of STRING, ARITH and INT. +.PP +%dist is only guaranteed to work when called as a parameter of text1(), text2() or text4(). +The goal of the %dist conversion is to reduce the number of reloc1(), reloc2() +and reloc4() +calls, saving space and time (no relocation at compiler run time). +The following example illustrates the usage of ``%dist''. +.DS +.ft CW + jmp dst:ILB ==> /* label in an instruction list */ + @text1( 0xeb); + @text1( %dist( dst->lab)). + + ... dst:LABEL ==> /* global label */ + @text1( 0xe9); + @reloc2( %$(dst->lab), %$(dst->off), PC_REL). +\fR +.DE +.NH 3 +The functions assemble() and block_assemble() +.PP +The functions assemble() and block_assemble() are provided by \fBceg\fR. +If, however, the table writer is not satisfied with the way they work +he can +supply his own assemble() or block_assemble(). +The default function assemble() splits an assembly string into a +label, mnemonic, +and operands and performs the following actions on them: +.IP \0\01: +It processes the local label; it records the name and current position. Thereafter it calls the function process_label() with one argument of type string, +the label. The table writer has to define this function. +.IP \0\02: +Thereafter it calls the function process_mnemonic() with one argument of +type string, the mnemonic. The table writer has to define this function. +.IP \0\03: +It calls process_operand() for each operand. Process_operand() must be +written by the table-writer since no fixed representation for operands +is enforced. It has two arguments: a string (the operand to decode) +and a pointer to the struct \fIt_operand\fR. The declaration of the struct +\fIt_operand\fR must be given in the +file ``as.h'', and the table-writer can put all the information needed for +encoding the operand in machine format in it. +.IP \0\04: +It examines the mnemonic and calls the associated function, generated by +\fBasg\fR, with pointers to the decoded operands as arguments. This makes it +possible to use the decoded operands in the right hand side of a rule (see +below). +.LP +If the default assemble() does not work the way the table writer wants, he +can supply his own version of it. Assemble() has the following arguments: +.DS +.ft CW +assemble( instruction ) + char *instruction; +\fR +.DE +\fIinstruction\fR points to a null-terminated string. +.PP +The default function block_assemble() is called with a sequence of assembly +instructions that belong to one action list. It calls assemble() for +every assembly instruction in +this block. But if a special action is +required on a block of assembly instructions, the table writer only has to +rewrite this function to get a new \fBceg\fR that obliges to his wishes. +The function block_assemble has the following arguments: +.DS +.ft CW +block_assemble( instructions, nr, first, last) + char **instruction; + int nr, first, last; +\fR +.DE +\fIInstruction\fR point to an array of pointers to strings representing +assembly instructions. \fINr\fR is +the number of instructions that must be assembled. \fIFirst\fR +and \fIlast\fR have no function in the default block_assemble(), but are +useful when optimizations are done in block_assemble(). +.PP +Four things have to be specified in ``as.h'' and ``as.c''. First the user must +give the declaration of struct \fIt_operand\fR in ``as.h'', and the functions +process_operand(), process_mnemonic(), and process_label() must be given +in ``as.c''. If the right hand side of the as_table +contains function calls other than the \fBback\fR-primitives, these functions +must also be present in ``as.c''. Note that both the ``@''-sign (see 4.2.3) +and ``references'' (see 4.2.4) also work in the functions defined in ``as.c''. +.PP +The following example shows the representative and essential parts of the +8086 ``as.h'' and ``as.c'' files. +.nr PS 10 +.nr VS 12 +.LP +.DS L +.ft CW +/* Constants and type definitions in as.h */ + +#define UNKNOWN 0 +#define IS_REG 0x1 +#define IS_ACCU 0x2 +#define IS_DATA 0x4 +#define IS_LABEL 0x8 +#define IS_MEM 0x10 +#define IS_ADDR 0x20 +#define IS_ILB 0x40 + +#define AX 0 +#define BX 3 +#define CL 1 +#define SP 4 +#define BP 5 +#define SI 6 +#define DI 7 + +#define REG( op) ( op->type & IS_REG) +#define ACCU( op) ( op->type & IS_REG && op->reg == AX) +#define REG_CL( op) ( op->type & IS_REG && op->reg == CL) +#define DATA( op) ( op->type & IS_DATA) +#define LABEL( op) ( op->type & IS_LABEL) +#define ILB( op) ( op->type & IS_ILB) +#define MEM( op) ( op->type & IS_MEM) +#define ADDR( op) ( op->type & IS_ADDR) +#define EADDR( op) ( op->type & ( IS_ADDR | IS_MEM | IS_REG)) +#define CONST1( op) ( op->type & IS_DATA && strcmp( "1", op->expr) == 0) +#define MOVS( op) ( op->type & IS_LABEL&&strcmp("\"movs\"", op->lab) == 0) +#define IMMEDIATE( op) ( op->type & ( IS_DATA | IS_LABEL)) + +struct t_operand { + unsigned type; + int reg; + char *expr, *lab, *off; + }; + +extern struct t_operand saved_op, *AX_oper; +\fR +.DE +.nr PS 12 +.nr VS 14 +.LP +.nr PS 10 +.nr VS 12 +.DS L +.ft CW + +/* Some functions in as.c. */ + +#include "arg_type.h" +#include "as.h" + +#define last( s) ( s + strlen( s) - 1) +#define LEFT '(' +#define RIGHT ')' +#define DOLLAR '$' + +process_operand( str, op) +char *str; +struct t_operand *op; + +/* expr -> IS_DATA en IS_LABEL + * reg -> IS_REG en IS_ACCU + * (expr) -> IS_ADDR + * expr(reg) -> IS_MEM + */ +{ + char *ptr, *index(); + + op->type = UNKNOWN; + if ( *last( str) == RIGHT) { + ptr = index( str, LEFT); + *last( str) = '\0'; + *ptr = '\0'; + if ( is_reg( ptr+1, op)) { + op->type = IS_MEM; + op->expr = ( *str == '\0' ? "0" : str); + } + else { + set_label( ptr+1, op); + op->type = IS_ADDR; + } + } + else + if ( is_reg( str, op)) + op->type = IS_REG; + else { + if ( contains_label( str)) + set_label( str, op); + else { + op->type = IS_DATA; + op->expr = str; + } + } +} + +/*********************************************************************/ + +mod_RM( reg, op) +int reg; +struct t_operand *op; + +/* This function helps to decode operands in machine format. + * Note the $-operators + */ +{ + if ( REG( op)) + R233( 0x3, reg, op->reg); + else if ( ADDR( op)) { + R233( 0x0, reg, 0x6); + @reloc2( %$(op->lab), %$(op->off), ABSOLUTE); + } + else if ( strcmp( op->expr, "0") == 0) + switch( op->reg) { + case SI : R233( 0x0, reg, 0x4); + break; + + case DI : R233( 0x0, reg, 0x5); + break; + + case BP : R233( 0x1, reg, 0x6); /* exception! */ + @text1( 0); + break; + + case BX : R233( 0x0, reg, 0x7); + break; + + default : fprint( STDERR, "Wrong index register %d\en", + op->reg); + } + else { + @if ( fit_byte( %$(op->expr))) + switch( op->reg) { + case SI : R233( 0x1, reg, 0x4); + break; + + case DI : R233( 0x1, reg, 0x5); + break; + + case BP : R233( 0x1, reg, 0x6); + break; + + case BX : R233( 0x1, reg, 0x7); + break; + + default : fprint( STDERR, "Wrong index register %d\en", + op->reg); + } + @text1( %$(op->expr)); + @else + switch( op->reg) { + case SI : R233( 0x2, reg, 0x4); + break; + + case DI : R233( 0x2, reg, 0x5); + break; + + case BP : R233( 0x2, reg, 0x6); + break; + + case BX : R233( 0x2, reg, 0x7); + break; + + default : fprint( STDERR, "Wrong index register %d\en", + op->reg); + } + @text2( %$(op->expr)); + @fi + } +} +\fR +.DE +.nr PS 12 +.nr VS 14 +.NH 2 +Generating assembly code +.PP +It is possible to generate assembly instead of object files (see section 5), in +which case there is no need to supply ``as_table'', ``as.h'', and ``as.c''. +This option is useful for debugging the EM_table. +.NH 1 +Building a code expander +.PP +This section describes how to generate a code expander in two phases. +In phase one, the EM_table is +written and assembly code is generated. If the assembly code is an actual +language, the EM_table can be tested by assembling and running the generated +code. +If an ad-hoc assembly language is used by the table writer, it is not possible +to test the EM_table, but the code generated is at least in readable form. +In the second phase, the as_table is written and object code is generated. +After the generated object code is fed into the loader, it can be tested. +.NH 2 +Phase one +.PP +The following is a list of instructions to make a +code expander that generates assembly instructions. +.IP \0\01: +Create a new directory. +.IP \0\02: +Create the ``EM_table'', ``mach.h'', and ``mach.c'' files; there is no need +for ``as_table'', ``as.h'', and ``as.c'' at this moment. +.IP \0\03: +type +.br +.ft CW +install_ceg -as +\fR +.br +install_ceg will create a Makefile and three directories : ceg, ce, and back. +Ceg will contain the program ceg; this program will be +used to turn ``EM_table'' into a set of C source files (in the ce directory), +one for each +EM-instruction. All these files will be compiled and put in a library called +\fBce.a\fR. +.br +The option +.ft CW +-as\fR means that a \fBback\fR-library will be +generated (in the directory ``back'') that +supports the generation of assembly language. The library is named ``back.a''. +.IP \0\04: +Link a front end, ``ce.a'', and ``back.a'' together resulting in a compiler +that generates assembly code. +.LP +If the table writer has chosen an actual assembly language, the EM_table can be +tested (e.g., by running the compiler on the EM test set). If an error occurs, +change the EM_table and type +.IP +.br +.ft CW +update_ceg\fR \fBC_instr +\fR +.br +.LP +where \fBC_instr\fR stands for the name of the erroneous EM-instruction. +If the table writer has chosen an ad-hoc assembly language, he can at least +read the generated code and look for possible errors. If an error is found, +the same procedure as described above can be followed. +.NH 2 +Phase two +.PP +The next phase is to generate a \fBce\fR that produces relocatable object +code. +.IP \0\01: +Remove the ``ce'', ``ceg'', and ``back'' directories. +.IP \0\02: +Write the ``as_table'', ``as.h'', and ``as.c'' files. +.IP \0\03: +type +.sp +.ft CW +install_ceg -obj \fR +.sp +The option +.ft CW +-obj\fR means that ``back.a'' will contain a library +for generating +ACK.OUT(5ACK) object files, see appendix B. +If the writer does not want to use the default ``back.a'', +the +.ft CW +-obj\fR flag must omitted and a ``back.a'' should be supplied that +generates the generates object code in the desired format. +.IP \0\04: +Link a front end, ``ce.a'', and ``back.a'' together resulting in a compiler +that generates object code. +.LP +The as_table is ready to be tested. If an error occurs, adapt the table. +Then there are two ways to proceed: +.IP \0\01: +recompile the whole EM_table, +.sp +.ft CW +update_ceg ALL \fR +.sp +.IP \0\02: +recompile just the few EM-instructions that contained the error, +.sp +.ft CW +update_ceg \fBC_instr\fR +.sp +where \fBC_instr\fR is an erroneous EM-instruction. +This has to be done for every EM-instruction that contained the erroneous +assembly instruction. +.NH +Acknowledgements +.PP +We want to thank Henri Bal, Dick Grune, and Ceriel Jacobs for their +valuable suggestions and the critical reading of this paper. +.NH +References +.LP +.[ +$LIST$ +.] +.bp +.SH +Appendix A, \fRthe \fBback\fR-primitives +.PP +This appendix describes the routines available to generate relocatable +object code. If the default back.a is used, the object code is in +ACK.OUT(5ACK) format. +In de default back.a, the names defined here are remapped to more hidden names, +to avoid name conflicts with for instance names used in the front-end. This +remapping is done in an include-file, "back.h". +A user-implemented back.a should do the same thing. +.nr PS 10 +.nr VS 12 +.PP +.IP A1. +Text and data generation; with ONE_BYTE b; TWO_BYTES w; FOUR_BYTES l; arith n; +.VS +4 +.TS +tab(#); +l c lw(10c). +text1( b)#:#T{ +Put one byte in text-segment. +T} +text2( w)#:#T{ +Put word (two bytes) in text-segment, byte-order is defined by +BYTES_REVERSED in mach.h. +T} +text4( l)#:#T{ +Put long ( two words) in text-segment, word-order is defined by +WORDS_REVERSED in mach.h. +T} +# +con1( b)#:#T{ +Same for CON-segment. +T} +con2( w)#: +con4( l)#: +# +rom1( b)#:#T{ +Same for ROM-segment. +T} +rom2( w)#: +rom4( l)#: +# +gen1( b)#:#T{ +Same for the current segment, only to be used in the ``..icon'', ``..ucon'', etc. +pseudo EM-instructions. +T} +gen2( w)#: +gen4( l)#: +# +bss( n)#:#T{ +Put n bytes in bss-segment, value is BSS_INIT. +T} +common( n)#:#T{ +If there is a saved label, generate a "common" for it, of size +n. Otherwise, it is equivalent to bss(n). +(see also the save_label routine). +T} +.TE +.VS -4 +.IP A2. +Relocation; with char *s; arith o; int r; +.VS +4 +.TS +tab(#); +l c lw(10c). +reloc1( s, o, r)#:#T{ +Generates relocation-information for 1 byte in the current segment. +T} +##s\0:\0the string which must be relocated +##o\0:\0the offset in bytes from the string. +##T{ +r\0:\0relocation type. It can have the values ABSOLUTE or PC_REL. These +two constants are defined in the file ``back.h'' +T} +reloc2( s, o, r)#:#T{ +Generates relocation-information for 1 word in the +current segment. Byte-order according to BYTES_REVERSED in mach.h. +T} +reloc4( s, o, r)#:#T{ +Generates relocation-information for 1 long in the +current segment. Word-order according to WORDS_REVERSED in mach.h. +T} +.TE +.VS -4 +.IP A3. +Symbol table interaction; with int seg; char *s; +.VS +4 +.TS +tab(#); +l c lw(10c). +switch_segment( seg)#:#T{ +sets current segment to ``seg'', and does alignment if necessary. ``seg'' +can be one of the four constants defined in ``back.h'': SEGTXT, SEGROM, +SEGCON, SEGBSS. +T} +# +symbol_definition( s)#:#T{ +Define s in symbol-table. +T} +set_local_visible( s)#:#T{ +Record scope-information in symbol table. +T} +set_global_visible( s)#:#T{ +Record scope-information in symbol table. +T} +.TE +.VS -4 +.IP A4. +Start/end actions; with char *f; +.VS +4 +.TS +tab(#); +l c lw(10c). +open_back( f)#:#T{ +Directs output to file ``f'', if f is the null pointer output must be given on +standard output. +T} +close_back()#:#T{ +close output stream. +T} +init_back()#:#T{ +Only used with user-written back-library, gives the opportunity to initialize. +T} +end_back()#:#T{ +Only used with user-written back-library. +T} +.TE +.VS -4 +.IP A5. +Label generation routines; with int n; arith g; char *l; These routines all +return a "char *" to a static area, which is overwritten at each call. +.VS +4 +.TS +tab(#); +l c lw(10c). +extnd_pro( n)#:#T{ +Label set at the end of procedure \fIn\fP, to generate space for locals. +T} +extnd_start( n)#:#T{ +Label set at the beginning of procedure \fIn\fP, to jump back to after generating +space for locals. +T} +extnd_name( l)#:#T{ +Create a name for a procedure named \fIl\fP. +T} +extnd_dnam( l)#:#T{ +Create a name for an external variable named \fIl\fP. +T} +extnd_dlb( g)#:#T{ +Create a name for numeric data label \fIg\fP. +T} +extnd_ilb( l, n)#:#T{ +Create a name for instruction label \fIl\fP in procedure \fIn\fP. +T} +extnd_hol( n)#:#T{ +Create a name for HOL block number \fIn\fP. +T} +extnd_part( n)#:#T{ +Create a unique label for the C_insertpart mechanism. +T} +extnd_cont( n)#:#T{ +Create another unique label for the C_insertpart mechanism. +T} +extnd_main( n)#:#T{ +Create yet another unique label for the C_insertpart mechanism. +T} +.TE +.VS -4 +.IP A6. +Some miscellaneous routines, with char *l; +.VS +4 +.TS +tab(#); +l c lw(10c). +save_label( l)#:#T{ +Save label \fIl\fP. Unfortunately, in EM, when a label is encountered, +it is not yet +known in which segment it will end up. The save_label/dump_label mechanism +is there to solve this problem. +T} +dump_label()#:#T{ +If there is a label saved, force definition for it now. +T} +align_word()#:#T{ +Align to a word boundary, if the current segment is not a text segment. +T} +.TE +.VS -4 +.nr PS 12 +.nr VS 14 +.bp +.SH +Appendix B, description of ACK-a.out library +.PP +The object file produced by \fBce\fR is by default in ACK.OUT(5ACK) +format. The object file is made up of one header, followed by +four segment headers, followed by text, data, relocation information, +symbol table, and the string area. The object file is tuned for the ACK-LED, +so there are some special things done just before the object file is dumped. +First, four relocation records are added which contain the names of the four +segments. Second, all the local relocation is resolved. This is done by the +function do_relo(). If there is a record belonging to a local +name this address is relocated in the segment to which the record belongs. +Besides doing the local relocation, do_relo() changes the ``nami''-field +of the local relocation records. This field receives the index of one of the +four +relocation records belonging to a segment. After the local +relocation has been resolved the routine output_back() dumps the +ACK object file. +.LP +If a different a.out format is wanted, one can choose between three strategies: +.IP \ \1: +The most simple one is to use a conversion program, which converts the ACK +a.out format to the wanted a.out format. This program exists for all most +all machines on which ACK runs. However, +not all conversion programs can generate relocation information. +The disadvantage is that the compiler will become slower. +.IP \ \2: +A better solution is to change the functions output_back(), do_relo(), +open_back(), and close_back() in such a way +that they produce the wanted a.out format. This strategy saves a lot of I/O. +.IP \ \3: +If this still is not satisfactory, the +\fBback\fR-primitives can be adapted to produce the wanted a.out format. diff --git a/doc/ceg/proto.make b/doc/ceg/proto.make new file mode 100644 index 0000000..1a61ab8 --- /dev/null +++ b/doc/ceg/proto.make @@ -0,0 +1,12 @@ +# $Id: proto.make,v 1.2 1994/06/24 10:05:51 ceriel Exp $ + +#PARAMS do not remove this line! + +SRC_DIR = $(SRC_HOME)/doc/ceg + +PIC=pic +TBL=tbl +REFER=refer + +$(TARGET_HOME)/doc/ceg.doc: $(SRC_DIR)/ceg.tr $(SRC_DIR)/ceg.ref + $(PIC) $(SRC_DIR)/ceg.tr | $(REFER) -e -p $(SRC_DIR)/ceg.ref | $(TBL) > $@ diff --git a/doc/cg.doc b/doc/cg.doc new file mode 100644 index 0000000..1013a46 --- /dev/null +++ b/doc/cg.doc @@ -0,0 +1,1864 @@ +.\" $Id: cg.doc,v 1.9 1994/06/24 10:01:47 ceriel Exp $ +.RP +.ND Nov 1984 +.TL +The table driven code generator from +.br +the Amsterdam Compiler Kit +.AU +Hans van Staveren +.AI +Dept. of Mathematics and Computer Science +Vrije Universiteit +Amsterdam, The Netherlands +.AB +It is possible to automate the process of compiler building +to a great extent using collections of tools. +The Amsterdam Compiler Kit is such a collection of tools. +This document provides a description of the internal workings +of the table driven code generator in the Amsterdam Compiler Kit, +and a description of syntax and semantics of the driving table. +.PP +>>> NOTE <<< +.br +This document pertains to the \fBold\fP code generator. Refer to the +"Second Revised Edition" for the new code generator. +.AE +.NH 1 +Introduction +.PP +Part of the Amsterdam Compiler Kit is a code generator system consisting +of a code generator generator (\fIcgg\fP for short) and some machine +independent C code. +.I Cgg +reads a machine description table and creates two files, +tables.h and tables.c. +These are then used together with other C code to produce +a code generator for the machine at hand. +.PP +This in turn reads compact EM code and produces +assembly code. +The remainder of this document will first broadly describe +the working of the code generator, +then a description of the machine table follows after which +the internal workings of the code generator will be explained. +.PP +The reader is assumed to have at least a vague notion about the +semantics of the intermediary EM code. +Someone wishing to write a table for a new machine +should be thoroughly acquainted with EM code +and the assembly code of the machine at hand. +.NH 1 +Global overview of the workings of the code generator. +.PP +The code generator or +.I cg +tries to generate good code by simulating the runtime stack +of the program compiled and delaying emission of code as long +as possible. +It also keeps track of register contents, which enables it to +eliminate redundant moves, and tries to eliminate redundant tests +by keeping information about condition code status, +if applicable for the machine. +.PP +.I Cg +maintains a `fakestack' containing `tokens' that are built +by executing the pseudo code contained in the code rules given +by the table writer. +One can think of the fakestack as a logical extension of the real +stack the program compiled will have when run. +During code generation tokens will be kept on the fakestack as long +as possible but when they are moved to the real stack, +by generating code for the push, +all tokens above\u*\d +.FS +* in the rest of this document the stack is assumed to grow downwards, +although the top of the stack will mean the first element that will +be popped. +.FE +the tokens pushed will be pushed also, +so that the fakestack will not contain holes. +.PP +The main loop of +.I cg +is this: +.IP 1) +find a pattern of EM instructions starting at the current one to +generate code for. +This pattern will usually be of length one but longer patterns can be used. +.IP 2) +Select one of the possibly many stack patterns that go with this +EM pattern on the basis of heuristics and/or lookahead. +.IP 3) +Force the current fakestack contents to match the pattern. +This may involve +copying tokens to registers, making dummy transformations, e.g. to +transform a "local" into an "register offsetted" or might even +cause to have the complete fakestack contents put to the real stack +and then back into registers if no suitable transformations +were provided by the table writer. +.IP 4) +Execute the pseudocode associated with the code rule just selected, +this may cause registers to be allocated, +code to be emitted etc.. +.IP 5) +Put tokens onto the fakestack to reflect the result of the operation. +.IP 6) +Insert some EM instructions into the stream, +this is possible but not common. +.IP 7) +Account for the cost. +The cost is kept in a (space, time) vector and lookahead decisions +are based on a linear combination of these. +.PP +The table that drives +.I cg +is not read in every time, +but instead is used at compiletime +of +.I cg +to set parameters and to load pseudocode tables. +A program called +.I cgg +reads the table and produces large lists of numbers that are +compiled together with machine independent code to produce +a code generator for the machine at hand. +.NH 1 +Description of the machine table +.PP +The machine description table consists of the following sections: +.IP 1) +Constant definitions +.IP 2) +Register definitions +.IP 3) +Token definitions +.IP 4) +Token expression definitions +.IP 5) +Code rules +.IP 6) +Move definitions +.IP 7) +Test definitions +.IP 8) +Stacking definitions +.PP +Input is in free format, white space and newlines may be used +at will to improve legibility. +Identifiers used in the table have the same syntax as C identifiers, +upper and lower case considered different, all characters significant. +There is however one exception: +identifiers must be more than one character long for parsing reasons. +C style comments are accepted +.DS + /* this is a comment */ +.DE +and #define macros may be used if the need arises. +.NH 2 +Some constants +.PP +Before anything else three constants must be defined, +all with the syntax NAME=value, value being an integer. +These constants are: +.IP EM_WSIZE 10 +Number of bytes in a machine word. +This is the number of bytes +a simple \fBloc\fP instruction will put on the stack. +.IP EM_PSIZE +Number of bytes in a pointer. +This is the number of bytes +a \fBlal\fP instruction will put on the stack. +.IP EM_BSIZE +Number of bytes in the hole between AB and LB. +If the calling sequence just saves PC and LB this +size will be twice the pointersize. +.PP +EM_WSIZE and EM_PSIZE are checked when a program is compiled +with the resulting code generator. +EM_BSIZE is used by +.I cg +to add to the offset of instructions dealing with locals +having positive offsets, +i.e. parameters. +.PP +Optionally one can give here the factors with which the size and time +parts of the cost function have to be multiplied to ensure they have the +same order of magnitude. +This can be done as +.DS +TIMEFACTOR = C\d1\u/C\d2\u +SIZEFACTOR = C\d3\u/C\d4\u +.DE +Above numbers must be read as rational numbers. +Defaults are 1/1 for both of them. +These constants set the default size/time tradeoff in the code generator, +so if TIMEFACTOR and SIZEFACTOR are both 1 the code generator will choose +at random between two codesequences where one has +cost (10,4) and the other has cost (8,6). +See also the description of the cost field below. +.PP +Also optional is the definition of a printformat for integers in the codefile. +This is given as +.DS +FORMAT = string +.DE +The default for string is "%ld". +For example on the PDP 11 one can use +.DS +FORMAT= "0%lo" +.DE +to satisfy the old UNIX assembler that reads octal unless followed by +a period, and the ACK assembler that follows C conventions. +.NH 2 +Register definition +.PP +The next part of the tables describes the various registers of the +machine and defines identifiers +to be used in later parts of the tables. +Example for the PDP-11: +.DS L +REGISTERS: +R0 = ( "r0",2), REG. +R1 = ( "r1",2), REG, ODDREG. +R2 = ( "r2",2), REG. +R3 = ( "r3",2), REG, ODDREG. +R4 = ( "r4",2), REG. +LB = ( "r5",2), LOCALBASE. +R01= ( "r0",4,R0,R1), REGPAIR. +R23= ( "r2",4,R2,R3), REGPAIR. +FR0= ( "r0",4), FREG. +FR1= ( "r1",4), FREG. +FR2= ( "r2",4), FREG. +FR3= ( "r3",4), FREG. +DR0= ( "r0",8,FR0), DREG. +DR1= ( "r1",8,FR1), DREG. +DR2= ( "r2",8,FR2), DREG. +DR3= ( "r3",8,FR3), DREG. +.DE +.PP +The identifier before the '=' sign is the name of the register +as used further on in the table. +The string is the name of the register as far as the assembler is concerned. +The number is the size of the register in bytes. +Identifiers following the number but within the parentheses are previously +defined registernames that are contained in the register being defined. +The identifiers following the closing parenthesis are properties +of the register. +So for example R23 is a register with assembler name r2, 4 bytes long, +contains the registers R2 and R3 and has the property REGPAIR. +.PP +It might seem wise to list each and every property of a register, +so one might give R0 the extra property MFPTREG named after the not +too well known MFPT instruction on newer PDP-11 types, +but this is not a good idea. +Every extra property means the registerset is more unorthogonal +and +.I cg +execution time is influenced by that, +because it has to take into account a larger set of registers +that are not equivalent. +.PP +There is a predefined property SCRATCH that is dynamic, +i.e. a register can have the property SCRATCH one time, +and loose it the next. +A register has the property SCRATCH when it has a reference count of one. +One needs to be able to discriminate between SCRATCH registers +and others, +because it is only allowed to do arithmetic on +SCRATCH registers. +.NH 2 +Stack token definition +.PP +The next part describes all possible tokens that can reside on +the fakestack during code generation. +Attributes of a token are described in the form of a C struct declaration, +this is followed by the size in bytes of the token, +optionally followed by the cost of the token when used as an addressing mode +and the format +to be used on output. +.PP +Tokens should usually be declared for every addressing mode +of the machine at hand and for every size directly usable in +a machine instruction. +Example for the PDP-11 (incomplete): +.DS L +TOKENS: +IREG2 = { REGISTER reg; } 2 "*%[reg]" /* indirect register */ +REGCONST = { REGISTER reg; STRING off; } 2 /* not really addressable */ +REGOFF2 = { REGISTER reg; STRING off; } 2 "%[off](%[reg])" +IREGOFF2 = { REGISTER reg; STRING off; } 2 "*%[off](%[reg])" +CONST = { INT off; } 2 cost=(2,850) "$%[off]." +EXTERN2 = { STRING off; } 2 "%[off]" +IEXTERN2 = { STRING off; } 2 "*%[off]" +PAIRSIGNED = { REGISTER regeven,regodd; } 2 "%[regeven]" +.DE +.PP +Types allowed in the struct are REGISTER, INT and STRING. +Tokens without a printformat should never be output. +.PP +Notice that tokens need not correspond to addressing modes, +the REGCONST token listed above, +meaning the sum of the contents of the register and the constant, +has no corresponding addressing mode on the PDP-11, +but is included so that a sequence of add constant, load indirect, +can be handled efficiently. +This REGCONST token is needed as part of the path +.DS +REGISTER -> REGCONST -> REGOFF +.DE +of which the first and the last "exist" and the middle is needed +only as an intermediate step. +.NH 2 +Token expressions +.PP +Usually machines have certain collections of addressing modes that +can be used with certain instructions. +The stack patterns in the table are lists of these collections +and since it is cumbersome to write out these long lists +every time, there is a section here to give names to these +collections. +Please note that it is not forbidden to write out a token expression +in the remainder of the table, +but for clarity it is usually better not to. +Example for the PDP-11 (incomplete): +.DS L +TOKENEXPRESSIONS: +SOURCE2 = REG + IREG2 + REGOFF2 + IREGOFF2 + CONST + EXTERN2 + + IEXTERN2 +SREG = REG * SCRATCH +.DE +Permissible in the expressions are all PASCAL set operators, i.e. +.IP + +set union +.IP - +set difference +.IP * +set intersection +.PP +Every tokenidentifier is also a token expression identifier +denoting the singleton collection of tokens containing +just itself. +Every register property as defined above is also a token expression +matching all registers with that property when on the fakestack. +The standard token expression identifier ALL denotes the collection of +all tokens. +.NH 2 +Expressions +.PP +Throughout the rest of the table expressions can be used in some +places. +This section will give the syntax and semantics of expressions. +There are four types of expressions: integer, string, register and undefined. +Type checking is performed by +.I cgg . +An operator with at least one undefined operand returns undefined except +for the defined() function mentioned below. +An undefined expression is interpreted as FALSE when it is needed +as a truth value. +Basic terms in an expression are +.IP number 16 +A number is a constant of type integer. +.IP "string" +A string within double quotes is a constant of type string. +All the normal C style escapes may be used within the string. +.IP REGIDENT +The name of a register is a constant of type register. +.IP $\fIi\fP +A dollarsign followed by a number is the representation of the argument +of EM instruction \fI\fP. +The type of the operand is dependent on the instruction, +sometimes it is integer, +sometimes it is string. +It is undefined when the instruction has no operand. +.br +Although an exhaustive list could be given describing all the types +the following rule of thumb will suffice. +If it is unimaginable for the operand of the instruction ever to be +something different from a plain integer, the type is integer, +otherwise it is string. +.br +.I Cg +makes all necessary conversions, +like adding EM_BSIZE to positive arguments of instructions +dealing with locals, +prepending underlines to global names, +converting codelabels into a unique representation etc. +Details about this can be found in the section about +machine dependent C code. +.IP %[1] +This in general means the token mentioned first in the +stack pattern. +When used inside an expression the token must be a simple register. +Type of this is register. +.IP %[1.off] +This means field "off" of the first stack pattern token. +Type is the same as that of field "off". +To use this expression implies a check that all tokens +in the token expression used have the same attributes. +.IP %[1.1] +This is the first subregister of the first token. +Previous comments apply. +.IP %[b] +The second allocated register. +.IP %[a.2] +The second subregister of the first allocated register. +.PP +All normal C operators apply to integers, +the + operator serves for string concatenation +and register expressions can only be compared to each other. +Furthermore there are some special "functions": +.IP tostring(e) 16 +Converts an integer expression e to a string. +.IP defined(e) +Returns 1 if expression e is defined, 0 otherwise. +.IP samesign(e1,e2) +Returns 1 if integer expression e1 and e2 have the same sign. +.IP sfit(e1,e2) +Returns 1 if integer expression e1 fits as a signed integer +into a field of e2 bits, 0 otherwise. +.IP ufit(e1,e2) +Same as above but now for unsigned e1. +.IP rom(a,n) +Integer expression giving the n'th argument from the \fBrom\fP descriptor +pointed at by the a'th EM instruction. +Undefined if that descriptor does not exist. +.IP loww(a) +Returns the lower half of the argument of the a'th EM instruction. +This is used to split the arguments of a \fBldc\fP instruction. +.IP highw(a) +Same for upper half. +.NH 2 +Code rules +.PP +The largest section of the tables consists of the code generation rules. +They specify EM patterns, stack patterns, code to be generated etc. +Syntax is +.DS L +code rule : EM pattern '|' stack pattern '|' code '|' + stack replacement '|' EM replacement '|' cost ; +.DE +All parts are optional, however there must be at least one pattern present. +If the empattern is missing the rule becomes a rewriting rule or +.I coercion +to be used when code generation cannot continue +because of an invalid stack pattern. +The code rules are preceded by the word +.DS +CODE: +.DE +The next paragraphs describe the various parts in detail. +.NH 3 +The EM pattern +.PP +The EM pattern consists of a list of EM mnemonics followed +by a boolean expression. +Examples: +.DS +\fBloe\fP +.DE +will match a single \fBloe\fP instruction, +.DS +\fBloc\fP \fBloc\fP \fBcif\fP $1==2 && $2==8 +.DE +is a pattern that will match +.DS +\fBloc\fP 2 +\fBloc\fP 8 +\fBcif\fP +.DE +and +.DS +\fBlol\fP \fBinc\fP \fBstl\fP $1==$3 +.DE +will match for example +.DS +.ta 10m 20m 30m 40m 50m 60m +\fBlol\fP 6 \fBlol\fP -2 \fBlol\fP 4 +\fBinc\fP \fBinc\fP but \fInot\fP \fBinc\fP +\fBstl\fP 6 \fBstl\fP -2 \fBstl\fP -4 +.DE +A missing boolean expression evaluates to TRUE. +.PP +When the EM pattern is the same as in the previous code rule the pattern +should be given as `...'. +The code generator will match the longest EM pattern on every occasion, +if two patterns of the same length match the first in the table will be chosen, +while all patterns of length greater than or equal to three are considered +to be of the same length. +.NH 3 +The stack pattern +.PP +The stack pattern is a list of token expressions, +usually token expression identifiers for clarity. +No boolean expression is allowed here. +The first expression is the one that matches the top of the stack. +.PP +The pattern can be followed by the word STACK +in which case the pattern only matches if there is nothing +else on the fakestack. +The code generator will stack everything not matched at the start +of the rule. +.PP +The pattern can be preceded with the word +.DS +nocoercions: +.DE +which tells the code generator not to try to coerce to the pattern +but only to use it when it is already there. +There are two reasons for this construction, +correctness and speed. +It is needed for correctness when the pattern contains a register +that is not transparent when data is moved through it. +.PP +Example: on the PDP-11 the shortest code for +.DS +\fBlae\fP a +\fBloi\fP 8 +\fBlae\fP b +\fBsti\fP 8 +.DE +is +.DS +movf _a,fr0 +movf fr0,_b +.DE +assuming that the floating point processor is in double +precision mode and fr0 is free. +Unfortunately this is not correct since a trap can occur on certain +kinds of data. +This could happen if there was a pattern for \fBsti\fP\ 8 that allowed +one to move a floating point register not preceded by nocoercions: . +The code generator would then find that moving the 8-byte global _a +to a floating point register and then storing it to _b was the cheapest, +assuming that the space/time knob was turned far enough to space. +It is unfortunate that the type information is no longer present, +since if _a really is a floating point number the move could be +made without error. +.PP +The second reason for the nocoercions: construct is speed. +When the code generator has a long list of possible stack patterns +for one EM pattern it can waste a lot of time trying to find coercions +to all of them, while the mere presence of such a long list +indicates that the table writer has given a lot of special cases. +In this case prepending all the special cases by nocoercions: +will stop the code generator from trying to find things there aren't. +.NH 3 +The code part +.PP +The code part consists of three parts, stack cleanup, register allocation +and code to generate. +All of these may be omitted. +.NH 4 +Stack cleanup +.PP +The stack cleanup part describes certain stacktokens that should neither remain on +the fakestack, nor remembered as contents of registers. +This is usually only required with store operations. +The entire fakestack, except for the part matched in the stack pattern, +is searched for tokens matching the expression and they are copied +to the real stack. +Every register that contains the stacktoken is marked as empty. +.PP +Syntax is +.DS +remove(token expression) \fIor\fP +remove(token expression, boolean expression) +.DE +Example: +.DS +remove(REGOFF2,%[reg] != LB || %[off] == $1) +.DE +is part of a remove() call for use in the \fBstl\fP code rule. +It removes all register offsetted tokens where the register is not the +localbase plus the local wherein the store is done. +The necessity for this can be seen from the following example: +.DS +\fBlol\fP 4 +\fBinl\fP 4 +\fBstl\fP 6 +.DE +Without a proper remove() call in the rule for \fBinl\fP code would +be generated as here +.DS +inc 4(r5) +mov 4(r5),6(r5) +.DE +so local 6 would be given the new value of local 4 instead of the old +as the EM code prescribed. +.PP +When generating something like a branch instruction it +might be needed to empty the fakestack completely. +This can of course be done with +.DS +remove(ALL) +.DE +.NH 4 +Register allocation +.PP +The register allocation part describes the kind of registers needed. +Syntax for allocate() is +.DS +allocate(itemlist) +.DE +where itemlist is a list of three kinds of things: +.IP 1) +a tokendescription, for example %[1]. +.br +This will instruct the code generator to temporarily decrement the reference count +of all registers contained in the token, +so that they are available for allocation in this allocate() call +if they were only used in that token. +See example below. +.IP 2) +a register property. +.br +This will allocate a register with that property. +The register will be marked as empty at this point. +Lookahead will be performed if necessary. +.IP 3) +a register property with initialization. +.br +This will allocate the register as in 2) but will also +initialize it. +This eases the task of the code generator because it can +find a register already filled with the right value +if it exists. +.PP +Examples: +.DS +allocate(OREG) +.DE +will allocate an odd register, while +.DS +allocate(REG={REGOFF2,LB,$1}) +.DE +will allocate a register while simultaneously filling it with +the asked value. +.br +Inside the coercion from SOURCE2 to REGISTER in the PDP-11 table +the following allocate() can be found. +.DS +allocate(%[1],REG=%[1]) +.DE +This tells the code generator that registers contained in %[1] can be used +again and asks to fill the register allocated with %[1]. +So if %[1]={REGOFF2,R3,"4"} and R3 has a reference count of 1 +the following code might be generated. +.DS +mov 4(r3),r3 +.DE +In the rest of the line the registers allocated can be named by +%[a] and %[b.1],%[b.2], i.e. with lower case letters +in order of allocation. +.PP +Warning: +.DS +allocate(R3) +.DE +is \fRnot\fP the way to allocate R3. +R3 is not a register property, so it will be seen as a token description +and the effect is that R3 will have its reference count decremented. +.NH 4 +Code +.PP +Code to be generated is specified as a list of items of the following kind: +.IP 1) +a string in double quotes ("This is a string"). +.br +This is copied to the codefile and a newline ( \en ) is appended. +Inside the string all normal C string conventions are allowed, +and substitutions can be made of the following sorts. +.RS +.IP a) +$1, $2 etc. +These are the operands of the corresponding EM instructions +and are printed according to their type. +To put a real '$' inside the string it must be doubled ('$$'). +.IP b) +%[1], %[2.reg], %[b.1] etc. +These have their obvious meaning. +If they describe a complete token ( %[1] ) +the printformat for the token is used. +If they stand for a basic term in an expression +they will be printed according to their type. +To put a real '%' inside the string it must be doubled ('%%'). +.IP c) +%( arbitrary expression %). +This allows inclusion of arbitrary expressions inside strings. +Usually not needed very often, +so that the awkward notation is not too bad. +Note that %(%[1]%) is equivalent to %[1]. +.RE +.IP 2) +a move() call. +This has the following syntax: +.DS +move(token description, token description) +.DE +Moves are handled specially since that enables the code generator +to keep track of register contents. +Example: +.DS +move(R3,{REGOFF2,LB,$1}) +.DE +will generate code to move R3 to $1(r5) except when +R3 already was a copy of $1(r5). +Then the code will be omitted. +The rules describing how to move things to each other +can be found in the MOVES section described below. +.IP 3) +an erase() call. +This has the following syntax: +.DS +erase(register expression) +.DE +This tells the code generator that the register mentioned no longer has any +useful value. +This is +.I necessary +after code in the table has changed the contents of registers. +For example, after an add to a register the register must be erased, +because the contents do no longer match any token. +.IP 4) +For machines that have condition codes, +alas most of them do, +there are provisions to remember condition code setting +and prevent needless testing. +To set the condition code to a token put in the code the following call: +.DS +test(token) +.DE +where token can be all of the standard forms that can also be used in move(). +This will generate a test if the condition codes +were not already set to that token. +It is also possible to tell +.I cg +that a certain operation, like a preceding add +has set the condition codes to some token with the call +.DS +setcc(token) +.DE +So a sequence of a setcc and a test on the same token will generate +no code. +Another allowed call within the code is +.DS +samecc +.DE +which tells the code generator that condition codes were unaffected +in this rule. +If no setcc or samecc has been given the default is +.DS +nocc +.DE +when a piece of code contained strings, +which tells the code generator that the condition codes +have no useful value any more. +.NH 3 +Stack replacement +.PP +The stack replacement is a possibly empty list of items to be pushed onto +the fakestack. Three kinds of items are possible: +.IP 1) +An item of the form %[1]. This will push the stacktoken mentioned back +onto the stack unchanged. +.IP 2) +A register expression. This will push the register mentioned +onto the fakestack. +.IP 3) +An item of the form { REGOFF2,%[1.reg],$1 }. +This generates a token with tokenidentifier REGOFF2 and attributes +in order of declaration. +.PP +All tokens matched by the stack pattern at the beginning of the code rule +are first removed and their registers deallocated. +Items are pushed in the order of appearance. +This means that the last item will be on the top of the +stack after the push. +So if the stack pattern contained two token expressions +and they must be pushed back unchanged, +they have to be specified as stack replacement +.DS +%[2] %[1] +.DE +and not the other way around. +.NH 3 +EM replacement +.PP +In exceptional cases it might be useful to leave part of an empattern +undone. +For example, a \fBsdl\fP instruction might be split into two \fBstl\fP instructions +when there is no 4-byte quantity on the stack. The emreplacement part allows +one to express this. +Example: +.DS +\fBstl\fP $1 \fBstl\fP $1+2 +.DE +The instructions are inserted in the stream so that they can match +the first part of a pattern in the next step. +Note that since the code generator traverses the EM instructions in a strict +linear fashion, +it is impossible to let the EM replacement match later parts of a pattern. +So if there is a pattern +.DS +\fBloc\fP \fBstl\fP $1==0 +.DE +and the input is +.DS +\fBloc\fP 0 \fBsdl\fP 4 +.DE +the \fBloc\fP\ 0 will be processed first, +then the \fBsdl\fP might be split into two \fBstl\fP's but the pattern +cannot match now. +.NH 3 +Cost +.PP +The cost field can be specified when there is more than one +code rule with the same empattern. +If the code generator has a choice between two possibilities +to generate code it will choose the cheapest according to +the cost field. +The cost for a code generation is the sum of the costs +of all the coercions needed, plus the cost for freeing +registers plus the cost of the code rule itself. +.PP +The format of the costfield is +.DS +( nbytes, time ) or +( nbytes, time ) + %[\fIi\fP] +.DE +with time in the metric desired, like nanoseconds or states. +See constants section above. +The %[\fIi\fP] in the second example is used for adding the cost of a certain +address mode used in the code generated. +This can of course be repeated if desired. +The cost of the address mode must then be specified in the token definition +section. +.NH 3 +Examples +.PP +A list of examples for the PDP-11 is given here. +Far from being complete it gives examples of most kinds +of instructions. +.DS L +\fBadi\fP $1==2 | SREG,SOURCE2 | + "add %[2],%[1]" erase(%[1]) setcc(%[1]) + | %[1] | | (2,450) + %[2] +\&... | SOURCE2,SREG | + "add %[1],%[2]" erase(%[2]) setcc(%[2]) + | %[2] | | (2,450) + %[1] +.DE +is an example of the use of the `...' construct +and shows how to place erase() and setcc() calls. +.DS L + +\fBdvi\fP $1==2 | SOURCE2,SPAIRSIGNED | + "div %[1],%[2]" erase(%[2]) + | %[2.regeven] | | + +\fBcmi\fP \fBtgt\fP $1==2 | SOURCE2,SOURCE2 | allocate(REG={CONST,0}) + "cmp %[2],%[1];ble 1f;inc %[a];1:" erase(%[a]) + | %[a] | | + +\fBcal\fP | STACK | + "jsr pc,$1" + | | | + +\fBlol\fP | | | { REGOFF2, LB, $1 } | | + +\fBstl\fP | SOURCE2 | + remove(REGOFF2,%[off]==$1) + move(%[1],{REGOFF2,LB,$1}) + | | | + +| SOURCE2 | + allocate(%[1],REGPAIR) + move(%[1],%[a.2]) + test(%[a.2]) + "sxt %[a.even]" | { PAIRSIGNED, %[a.1], %[a.2] }| | +.DE +This coercion shows how to use the move and test calls. +At first one might think that the testcall is unnecessary, +since the move will have set the condition codes, +but the move may never have been executed +if the register already contained the value, +in which case it is necessary to do the test. +If the move was executed the test will be omitted. +.DS L +| SOURCE2 | allocate(%[1],REG=%[1]) | %[a] | | + +\fBsdl\fP | SOURCE2 | | %[1] | \fBstl\fP $1 \fBstl\fP $1+2 | + +\fBexg\fP $1==2 | SOURCE2 SOURCE2 | | %[1] %[2] | | +.DE +This last example again shows the difference in the order +of the stack pattern and the stack replacement. +.NH 2 +Move code rules +.PP +When issuing a move() call as described above or a register allocation +with initialization, the code generator has to know which +instruction to use for the move. +The code will of course only be generated if it cannot be omitted. +This is listed in the move section of the tables by giving a list +of tuples: +.DS +( source, destination, codepart [ , costfield ] ) +.DE +where the square brackets mean the costfield is optional. +Example for the PDP-11 +.DS +MOVES: +( CONST %[off]==0 , SOURCE2, "clr %[2]" ) +( SOURCE2, SOURCE2, "mov %[1],%[2]" ) +.DE +The moves are scanned from top to bottom, +so the first one that matches will be chosen. +.NH 2 +Test code rules +.PP +When issuing a test() call as described above, +the code generator has to know which instruction +to use for the test. +The code will only be generated if the condition codes +were not already set to the token. +This is listed in the test section of the tables by giving +a list of tuples: +.DS +( source, codepart [ , costfield ] ) +.DE +Example for the PDP-11 +.DS +TESTS: +( SOURCE2, "tst %[1]") +( DREG, "tstf %[1]\encfcc") +.DE +The tests are scanned from top to bottom, +so the first one that matches will be chosen. +.NH 2 +Stacking code rules. +.PP +When the code generator has to stack a token it must know +which code to use. +Since it must at all times be possible to empty the fakestack +even when no registers are free, +it is mandatory that all +tokens used must have a rule attached for stacking them +without using a scratch register. +Since however this might be clumsy and +a register might in practice be available +it is also possible to give rules +which use a register. +On the Intel 8086 for example, +there is no instruction to push a constant without using a register, +and the code needed to do it without, must use global data +and as such is very complicated and wasteful of memory and time. +It can therefore be left to be used in extreme cases, +while in general the constant is pushed through a register. +The stacking rules are listed in the stack section of the table as a list +of tuples: +.DS +(source, [ register property ] , codepart [ , costfield ] ) +.DE +Example for the Intel 8086: +.DS +STACKS: +(CONST, REG, move(%[1],%[a]) "push %[a]") +(REG ,, "push %[1]") +.DE +.NH 1 +The files mach.h and mach.c +.PP +The table writer must also supply two files containing +machine dependent declarations and C code. +These files are mach.h and mach.c. +.NH 2 +Types in the code generator +.PP +Three different types of integer coexist in the code generator +and their range depends on the machine at hand. +The type 'int' is used for things like labelcounters that won't require +more than 16 bits precision. +The type 'word' is used among others to assemble datawords and +is of type 'long'. +The type 'full' is used for addresses and is of type 'long' if +EM_WSIZE>2 or EM_PSIZE>2. +.PP +In macro and function definitions in later paragraphs implicit typing +will be used for parameters, that is parameters starting with an 's' +will be of type string, and the letters 'i','w','f' will stand for +int, word and full respectively. +.NH 2 +Global variables to work with +.PP +Some global variables are present in the code generator +that can be manipulated by the routines in mach.h and mach.c. +.LP +The declarations are: +.DS L +.ta 20 +FILE *codefile; /* code is emitted on this stream */ +word part_word; /* words to be output are put together here */ +int part_size; /* number of bytes already put in part_word */ +char str[]; /* Last string read in */ +long argval; /* Last int read and kept */ +.DE +.NH 2 +Macros in mach.h +.PP +In the file mach.h a collection of macros is defined that have +to do with formatting of assembly code for the machine at hand. +Some of these macros can of course be left undefined in which case the +macro calls are left in the source and will be treated as +function calls. +These functions can then be defined in \fImach.c\fR. +.PP +The macros to be defined are: +.IP ex_ap(s) 16 +Must print the magic incantations that will mark the symbol \fI\fR +to be exported to other modules. +This is the translation of the EM \fBexa\fP and \fBexp\fP instructions. +.IP in_ap(s) +Same to import the symbol. +Translation of \fBina\fP and \fBinp\fP. +.IP newplb(s) +Must print the definition of procedure label \fIs\fR. +If left undefined the newilb() macro is used instead. +.IP newilb(s) +Must print the definition of instruction label \fIs\fR. +.IP newdlb(s) +Must print the definition of data label \fIs\fR. +.IP dlbdlb(s1,s2) +Must define data label +.I s1 +to be equal to +.I s2 . +.IP newlbss(s,f) +Must declare a piece of memory initialized to BSS_INIT(see below) +of length +.I f +and with label +.I s . +.IP cst_fmt +Format to be used when converting constant arguments of +EM instructions to string. +Argument to be formatted will be 'full'. +.IP off_fmt +Format to be used for integer part of label+constant, +argument will be 'full'. +.IP fmt_ilb(ip,il,s) +Must use the numbers +.I ip +and +.I il +which are a procedure number +and a label number respectively and copy a string to +.I s +that must be unique for that combination. +This procedure is optional, if it is not given ilb_fmt +must be defined as below. +.IP ilb_fmt +Format to be used for creation of unique instruction labels. +Arguments will be a unique procedure number (int) and the label +number (int). +.IP dlb_fmt +Format to be used for printing numeric data labels. +Argument will be 'int'. +.IP hol_fmt +Format to be used for generation of labels for +space generated by a +.B hol +pseudo. +Argument will be 'int'. +.IP hol_off +Format to be used for printing of the address of an element in +.B hol +space. +Arguments will be the offset in the +.B hol +block (word) and the number of the +.B hol +(int). +.IP con_cst(w) +Must generate output that will assemble into one machineword. +.IP con_ilb(s) +Must generate output that will put the address of the instruction label +into the datastream. +.IP con_dlb(s) +Must generate output that will put the address of the data label +into the datastream. +.IP fmt_id(sf,st) +Must take the string in +.I sf +which is a nonnumeric global label, and transform it into a copy made to +.I st +which will not collide with reserved assembler words and system labels. +This procedure is optional, if it is not given the id_first macro is used +as defined below. +.IP id_first +Must be a character. +This is prepended to all nonnumeric global labels if their length +is shorter than the maximum allowed(currently 8) or if they already +start with that character. +This is to avoid conflicts of user labels with system labels. +.IP BSS_INIT +Must be a constant. +This is the value filled in all the words not initialized explicitly. +This is loader and system dependent. +If omitted no initialization is assumed. +.NH 3 +Example mach.h for the PDP-11 +.DS L +.ta 8 16 24 32 40 48 56 +#define ex_ap(y) fprintf(codefile,"\et.globl %s\en",y) +#define in_ap(y) /* nothing */ + +#define newplb(x) fprintf(codefile,"%s:\en",x) +#define newilb(x) fprintf(codefile,"%s:\en",x) +#define newdlb(x) fprintf(codefile,"%s:\en",x) +#define dlbdlb(x,y) fprintf(codefile,"%s=%s\en",x,y) +#define newlbss(l,x) fprintf(codefile,"%s:.=.+%d.\en",l,x); + +#define cst_fmt "$%d." +#define off_fmt "%d." +#define ilb_fmt "I%x_%x" +#define dlb_fmt "_%d" +#define hol_fmt "hol%d" + +#define hol_off "%ld.+hol%d" + +#define con_cst(x) fprintf(codefile,"%ld.\en",x) +#define con_ilb(x) fprintf(codefile,"%s\en",x) +#define con_dlb(x) fprintf(codefile,"%s\en",x) + +#define id_first '_' +#define BSS_INIT 0 +.DE +.NH 2 +Functions in mach.c +.PP +In mach.c some functions must be supplied, +mostly manipulating data resulting from pseudoinstructions. +The specifications are given here, +implicit typing of parameters as above. +.IP con_part(isz,word) 20 +This function must manipulate the globals +part_word and part_size to append the isz bytes +contained in word to the output stream. +If part_word is full, i.e. part_size==EM_WSIZE +the function part_flush() may be called to empty the buffer. +This is the function that must go through the trouble of +doing byte order in words correct. +.IP con_mult(w_size) +This function must take the string str[] and create an integer +from the string of size w_size and generate code to assemble global +data for that integer. +Only the sizes for which arithmetic is implemented need be +handled, +so if 200-byte integer division is not implemented, +200-byte integer global data do not have to be implemented. +Here one must take care of word order in long integers. +.IP con_float() +This function must generate code to assemble a floating +point number of which the size is contained in argval +and the ASCII representation in str[]. +.IP prolog(f_nlocals) +This function is called at the start of every procedure. +Function prolog code must be generated, +and room made for local variables for a total of f_nlocals bytes. +.IP mes(w_mesno) +This function is called when a +.B mes +pseudo is seen that is not handled by the machine independent part. +The example below probably shows all the table writer ever has to know +about that. +.IP segname[] +This is not a function, +but an array of four strings. +These strings are put out whenever the code generator +switches segments. +Segments are SEGTXT, SEGCON, SEGROM and SEGBSS in that order. +.NH 3 +Example mach.c for the PDP-11 +.PP +As an example of the sort of code expected, +the mach.c for the PDP-11 is presented here. +.DS L +.ta 8 16 24 32 40 48 56 64 +/* + * machine dependent back end routines for the PDP-11 + */ + +con_part(sz,w) register sz; word w; { + + while (part_size % sz) + part_size++; + if (part_size == EM_WSIZE) + part_flush(); + if (sz == 1) { + w &= 0xFF; + if (part_size) + w <<= 8; + part_word |= w; + } else { + assert(sz == 2); + part_word = w; + } + part_size += sz; +} + +con_mult(sz) word sz; { + long l; + + if (sz != 4) + fatal("bad icon/ucon size"); + l = atol(str); + fprintf(codefile,"\et%o;%o\en",(int)(l>>16),(int)l); +} + +con_float() { + double f; + register short *p,i; + + /* + * This code is correct only when the code generator is + * run on a PDP-11 or VAX-11 since it assumes native + * floating point format is PDP-11 format. + */ + + if (argval != 4 && argval != 8) + fatal("bad fcon size"); + f = atof(str); + p = (short *) &f; + i = *p++; + if (argval == 8) { + fprintf(codefile,"\et%o;%o;",i,*p++); + i = *p++; + } + fprintf(codefile,"\et%o;%o\en",i,*p++); +} + +prolog(nlocals) full nlocals; { + + fprintf(codefile,"mov r5,-(sp)\enmov sp,r5\en"); + if (nlocals == 0) + return; + if (nlocals == 2) + fprintf(codefile,"tst -(sp)\en"); + else + fprintf(codefile,"sub $%d.,sp\en",nlocals); +} + +mes(type) word type; { + int argt ; + + switch ( (int)type ) { + case ms_ext : + for (;;) { + switch ( argt=getarg( + ptyp(sp_cend)|ptyp(sp_pnam)|sym_ptyp) ) { + case sp_cend : + return ; + default: + strarg(argt) ; + fprintf(codefile,".globl %s\en",argstr) ; + break ; + } + } + default : + while ( getarg(any_ptyp) != sp_cend ) ; + break ; + } +} + +char *segname[] = { + ".text", /* SEGTXT */ + ".data", /* SEGCON */ + ".data", /* SEGROM */ + ".bss" /* SEGBSS */ +}; +.DE +.NH 1 +Coercions +.PP +A central part in code generation is taken by the +.I coercions . +It is the responsibility of the table writer to provide +all necessary coercions so that code generation can continue. +The very minimal set of coercions are +the coercions to unstack every token expression, +in combination with the rules to stack every token. +.PP +If these are present the code generator can always make the necessary +transformations by stacking and unstacking. +Of course for codequality it is usually best to provide extra coercions +to prevent this stacking to take place. +.I Cg +discriminates three types of coercions: +.IP 1) +Unstacking coercions. +This category can use the allocate() call in its code. +.IP 2) +Splitting coercions, these are the coercions that split +larger tokens into smaller ones. +.IP 3) +Transforming coercions, these are the coercions that transform +a token into another one of the same size. +This category can use the allocate() call in its code. +.PP +When a stack configuration does not match the stack pattern +.I coercions +are searched for in the following order: +.IP 1) +First tokens are split if necessary to get their sizes right. +.IP 2) +Then transforming coercions are found that will make the pattern match. +.IP 3) +Finally if the stack pattern is longer than the fakestack contents +unstacking coercions will be used to fill up the pattern. +.PP +At any point, when coercions are missing so code generation could not +continue, the offending tokens are stacked. +.NH 1 +Internal workings of the code generator. +.NH 2 +Description of tables.c and tables.h contents +.PP +In this section the intermediate files will be described +that are produced by +.I cgg +and compiled with machine independent code to produce a code generator. +.NH 3 +Tables.c +.PP +Tables.c contains a large number of initialized array's of all sorts. +Description of each follows: +.br +.in 1i +.ti -0.5i +byte code rules[] +.br +Pseudo code interpreted by the code generator. +Always starts with some opcode followed by operands depending +on the opcode. +Integers in this table are between 0 and 32767 and have a one byte +encoding if between 0 and 127. +.ti -0.5i +char stregclass[] +.br +Number of computed static register class per register. +Two registers are in the same class if they have the same properties +and don't share a common subregister. +.ti -0.5i +struct reginfo machregs[] +.br +Info per register. +Initialized with representation string, size, +members of the register and set of registers affected when this +one is changed. +Also contains room for runtime information, +like contents and reference count. +.ti -0.5i +tkdef_t tokens[] +.br +Information per tokentype. +Initialized with size, cost, type of operands and formatstring. +.ti -0.5i +node_t enodes[] +.br +List of triples representing expressions for the code generator. +.ti -0.5i +string code strings[] +.br +List of strings. +All strings are put in a list and checked for duplication, +so only one copy per string will reside here. +.ti -0.5i +set_t machsets[] +.br +List of token expression sets. +Bit 0 of the set is used for the SCRATCH property of registers, +bit 1 upto NREG are for the corresponding registers +and bit NREG+1 upto the end are for corresponding tokens. +.ti -0.5i +inst_t tokeninstances[] +.br +List of descriptions for building tokens. +Contains type of rule for building one, +plus operands depending on the type. +.ti -0.5i +move_t moves[] +.br +List of move rules. +Contains token expressions for source and destination +plus cost and index for code rule. +.ti -0.5i +byte pattern[] +.br +EM patterns. +This is structured internally as chains of patterns, +each chain pointed at by pathash[]. +After each pattern the list of possible code rules is given. +.ti -0.5i +int pathash[256] +.br +Indices into pattern[] for all patterns with a certain low order +byte of the hashing function. +.ti -0.5i +c1_t c1coercs[] +.br +List of rules to stack tokens. +Contains token expressions, +register needed, +cost +and code rule. +.ti -0.5i +c2_t c2coercs[] +.br +List of splitting coercions. +Token expressions, +split factor, +replacements +and code rule. +.ti -0.5i +c3_t c3coercs[] +.br +List of one to one coercions. +Token expressions, +register needed, +replacement +and code rule. +.ti -0.5i +struct reginfo **reglist[] +.br +List of lists of pointers to register information. +For every property the list is here +to find the registers corresponding to it. +.in 0 +.NH 3 +tables.h +.PP +In tables.h various derived constants for the tables are +given. +They are then used to determine array sizes in the actual code generator, +plus loop termination in some cases. +.NH 2 +Other important data structures +.PP +During code generation some other data structures are used +and here is a short description of some of the important ones. +.PP +Tokens are kept in the code generator as a struct consisting of +one integer +.I t_token +which is -1 if the token is a register, +and the number of the token otherwise, +plus an array of +.I TOKENSIZE +unions +.I t_att +of which the first is the register number in case of a register. +.PP +The fakestack is an array of these tokens, +there is a global variable +.I stackheight . +.PP +The results of expressions are kept in a struct +.I result +with elements +.I e_typ , +giving the type of the expression: +.I EV_INT , +.I EV_REG +or +.I EV_STR , +and a union +.I e_v +which contains the real result. +.NH 2 +A tour through the sources +.NH 3 +codegen.c +.PP +The file codegen.c contains one large function consisting +of one giant switch statement. +It is the interpreter for the code generator pseudo code +as contained in code rules[]. +This function can call itself recursively when doing lookahead. +Arguments are: +.IP codep 10 +Pointer into code rules, pseudo program counter. +.IP ply +Number of EM pattern lookahead allowed. +.IP toplevel +Boolean telling whether this is the toplevel codegen() or +a deeper incarnation. +.IP costlimit +A cutoff value to limit searches. +If the cost crosses costlimit the incarnation can terminate. +.IP forced +A register number if nonzero. +This is used inside coercions to force the allocate() call to allocate +a register determined by earlier lookahead. +.PP +The instructions inplemented in the switch: +.NH 4 +DO_NEXTEM +.PP +Matches the next EM pattern and does lookahead if necessary to find the best +code rule associated with this pattern. +Heuristics are used to determine best code rule when possible. +This is done by calling the distance() function. +.NH 4 +DO_COERC +.PP +This sets the code generator in the state to do a from stack coercion. +.NH 4 +DO_XMATCH +.PP +This is done when a match no longer has to be checked. +Used when the nocoercions: trick is used in the table. +.NH 4 +DO_MATCH +.PP +This is the big one inside this function. +It has the task to transform the contents of the current +fakestack to match the pattern given after it. +.PP +Since the code generator does not know combining coercions, +i.e. there is no way to make a big token out of two smaller ones, +the first thing done is to stack every token that is too small. +After that all tokens too big are split if possible to the right size. +.PP +Next the coercions are sought that would transform tokens in place to +the right one, plus the coercions that would pop tokens of the stack. +Each of those might need a register, so a list of registers is generated +and at the end of looking for coercions the function +.I tuples() +is called to generate the list of all possible \fIn\fP-tuples, +where +.I n +equals the number of registers needed. +.PP +Lookahead is now performed if the number of tuples is greater than one. +If no possibility is found within the costlimit, +the fakestack is made smaller by pushing the bottom token, +and this process is repeated until either a way is found or +the fakestack is completely empty and there is still no way +to make the match. +.PP +If there is a way the corresponding coercions are executed +and the code is finished. +.NH 4 +DO_REMOVE +.PP +Here the remove() call is executed, all tokens matched by the +token expression plus boolean expression are pushed. +In the current implementation there is no attempt to move those +tokens to registers, but that is a possible future extension. +.NH 4 +DO_DEALLOCATE +.PP +This one temporarily decrements by one the reference count of all registers +contained in the token given as argument. +.NH 4 +DO_REALLOCATE +.PP +Here all temporary deallocates are made undone. +.NH 4 +DO_ALLOCATE +.PP +This is the part that allocates a register and decides which one to use. +If the +.I forced +argument was given its task is simple, +otherwise some work must be done. +First the list of possible registers is scanned, +all free registers noted and it is noted whether any of those +registers is already +containing the initialization. +If no registers are available some fakestack token is stacked and the +process is repeated. +.PP +After that if an exact match was found, +the list of registers is reduced to one register matching exactly +out of every register class. +Now lookahead is performed if necessary and the register chosen. +If an initialization was given the corresponding move is performed, +otherwise the register is marked empty. +.NH 4 +DO_LOUTPUT +.PP +This prints a string and an expression. +Only done on toplevel. +.NH 4 +DO_ROUTPUT +.PP +Prints a string and a new line. +Only on toplevel. +.NH 4 +DO_MOVE +.PP +Calls the move() function in the code generator to implement the move() +function in the table. +.NH 4 +DO_ERASE +.PP +Marks the register that is its argument as empty. +.NH 4 +DO_TOKREPLACE +.PP +This is the token replacement part. +It is also called if there is no token replacement because it has +some other functions as well. +.PP +First the tokens that will be pushed on the fakestack are computed +and stored in a temporary array. +Then the tokens that were matched in this rule are popped +and their embedded registers have their reference count +decremented. +After that the replacement tokens are pushed. +.PP +Finally all registers allocated in this rule have their reference count +decremented. +If they were not pushed on the fakestack they will be available again +in the next code rule. +.NH 4 +DO_EMREPLACE +.PP +Places replacement EM instructions back into the instruction stream. +.NH 4 +DO_COST +.PP +Accounts for cost as given in the code rule. +.NH 4 +DO_RETURN +.PP +Returns from this level of codegen(). +Is used at the end of coercions, +move rules etc.. +.NH 3 +compute.c +.PP +This module computes the various expressions as given +in the enodes[] array. +Nothing very special happens here, +it is just a recursive function computing leaves +of expressions and applying the operator. +.NH 3 +equiv.c +.PP +In this module the tuples() function is implemented. +It is given the number of registers needed and +a list of register lists and it constructs a list of tuples +where the \fIn\fP'th register comes from the \fIn\fP'th list. +Before the list is constructed however +the dynamic register classes are computed. +Two registers are in the same dynamic class if they are in the +same static class and their contents is the same. +.PP +After that the permute() recursive function is called to +generate the list of tuples. +After construction a generated tuple is added to the list +if it is not already pairwise in the same class +or if the register relations are not the same, +i.e. if the first and second register share a common +subregister in one tuple and not in the other they are considered different. +.NH 3 +fillem.c +.PP +This is the routine that does the reading of EM instructions +and the handling of pseudos. +The mach.c module provided by the table writer is included +at the end of this module. +The routine fillemlines() is called by nextem() at toplevel +to make sure there are enough instruction to match. +It fills the EM instruction buffer up to 5 places from the end to +keep room for EM replacement instructions, +or up to a pseudo. +.PP +The dopseudo() function performs the function of the pseudo last +encountered. +If the pseudo is a +.B rom +the corresponding label is saved with the contents of the +.B rom +to be available to the code generator later. +The rest of the routines are small service routines for either +input or data output. +.NH 3 +gencode.c +.PP +This module contains routines called by codegen() to generate the real +code to the codefile. +The function gencode() gets a string as argument and copies it to codefile +while processing certain embedded control characters implementing +the $2 and [1.reg] escapes. +The function genexpr() prints the expression given as argument. +It is used to implement the %(\ expr\ %) escape. +The prtoken() function interprets the tokenformat as given in +the tokens[] array. +.NH 3 +glosym.c +.PP +This module maintains a list of global symbols that have a +.B rom +pseudo associated. +There are functions to enter a symbol and to find a symbol. +.NH 3 +main.c +.PP +Main routine of the code generator. +Processes arguments and flags. +Flags available are: +.IP -d +Sets debug mode if the code generator was not compiled with +the NDEBUG macro defined. +Debug mode gives very long output on stderr indicating +all steps of the code generation process including nesting +of the codegen() function. +.IP -p\fIn\fP +Sets the lookahead depth to +.I n , +the +.I p +stands for ply, +a well known word in chess playing programs. +.IP -w\fIn\fP +Sets the weight percentage for size in the cost function to +.I n +percent. +Uses Euclides algorithm to simplify rationals. +.NH 3 +move.c +.PP +Function to implement the move() pseudo function in the tables, +register initialization and the setcc and test pseudo functions. +First tests are made to try to prevent the move from really happening. +The condition code register is treated special here. +After that, if there is an after that, +the move rule is found and the code executed. +.NH 3 +nextem.c +.PP +The entry point of this module is nextem(). +It hashes the next three EM instructions, +and uses the low order byte of the hash +as an index into the array pathash[], +to find a chain of patterns in the array +pattern[], +that are all tried for a match. +.PP +The function trypat() does most of the work +checking patterns. +When a pattern is found to match all instructions +the operands of the instruction are placed into the dollar[] array. +Then the boolean expression is tried. +If it matches the function can return, +leaving the operands still in the dollar[] array, +so later in the code rule they can still be used. +.NH 3 +reg.c +.PP +Collection of routines to handle registers. +Reference count routines are here, +chrefcount() and getrefcount(), +plus routines to erase a single register or all of them, +erasereg() and cleanregs(). +.PP +If NDEBUG hasn't been defined, here is also the routine that checks +if the reference count kept with the register information is in +agreement with the number of times it occurs on the fakestack. +.NH 3 +salloc.c +.PP +Module for string allocation and garbage collection. +Contains entry points myalloc(), +a routine calling malloc() and checking whether room is left, +myfree(), just free(), +popstr() a function called from state.c to free all strings +made since the last saved status. +Furthermore there is salloc() which has the size of the string as parameter +and returns a pointer to the allocated space, +while keeping a copy of the pointer for garbage allocation purposes. +.PP +The function garbage_collect is called from codegen() at toplevel +every now and then, +and checks all places where strings may reside to mark strings +as being in use. +Strings not in use are returned to the pool of free space. +.NH 3 +state.c +.PP +Set of routines called to save current status, +restore a previous saved state and to free the room +occupied by a saved state. +A list of structs is kept here to save the state. +If this is not done, +small allocates will take space +from the holes big enough for state saves, +and as a result every new state save will need a new struct. +The code generator runs out of room very rapidly under these conditions. +.NH 3 +subr.c +.PP +Random set of leftover routines. +.NH 4 +match +.PP +Computes whether a certain token matches a certain token expression. +Just computes a bitnumber according to the algorithm explained with +machsets[], +and tests the bit and the boolean expression if it is there. +.NH 4 +instance,cinstance +.PP +These two functions compute a token from a description. +They differ very slight, cinstance() is used to compute +the result of a coercion in a certain context +and therefore has more arguments, which it uses instead of +the global information instance() works on. +.NH 4 +eqtoken +.PP +eqtoken computes whether two tokens can be considered identical. +Used to check register contents during moves mainly. +.NH 4 +distance +.PP +This is the heuristic function that computes a distance from +the current fakestack contents to the token pattern in the table. +It likes exact matches most, then matches where at least the sizes are correct +and if the sizes are not correct it likes too large sizes more than too +small, since splitting a token is easier than combining one. +.NH 4 +split +.PP +This function tries to find a splitting coercion +and executes it immediately when found. +The fakestack is shuffled thoroughly when this happens, +so pieces below the token that must be split are saved first. +.NH 4 +docoerc +.PP +This function executes a coercion that was found. +The same shuffling is done, so the top of the stack is again saved. +.NH 4 +stackupto +.PP +This function gets a pointer into the fakestack and must stack +every token including the one pointed at up to the bottom of the fakestack. +The first stacking rule possible is used, +so rules using registers must come first. +.NH 4 +findcoerc +.PP +Looks for a one to one coercion, if found it returns a pointer +to it and leaves a list of possible registers to use in the global +variable curreglist. +This is used by codegen(). +.NH 3 +var.c +.PP +Global variables used by more than one module. +External definitions are in extern.h. diff --git a/doc/crefman.doc b/doc/crefman.doc new file mode 100644 index 0000000..b28e26d --- /dev/null +++ b/doc/crefman.doc @@ -0,0 +1,629 @@ +\." $Id: crefman.doc,v 1.5 1994/06/24 10:01:51 ceriel Exp $ +.\" eqn crefman.doc | troff -ms +.EQ +delim $$ +.EN +.RP +.TL +ACK/CEM Compiler +.br +Reference Manual +.AU +Erik H. Baalbergen +.AI +Department of Mathematics and Computer Science +Vrije Universiteit +Amsterdam +The Netherlands +.AB no +.AE +.NH +C Language +.PP +This section discusses the extensions to and deviations from the C language, +as described in [1]. +The issues are numbered according to the reference manual. +.SH +2.2 Identifiers +.PP +Upper and lower case letters are different. +The number of significant letters +is 32 by default, but may be set to another value using the \fB\-M\fP option. +The identifier length should be set according to the rest of the compilation +programs. +.SH +2.3 Keywords +.SH +\f(CWasm\fP +.PP +The keyword \f(CWasm\fP +is recognized. +However, the statement +.DS +.ft CW +asm(string); +.ft R +.DE +is skipped, while a warning is given. +.SH +\f(CWenum\fP +.PP +The \f(CWenum\fP keyword is recognized and interpreted. +.SH +\f(CWentry\fP, \f(CWfortran\fP +.PP +The words \f(CWentry\fP and \f(CWfortran\fP +are reserved under the restricted option. +The words are not interpreted by the compiler. +.SH +2.4.1 Integer Constants +.PP +The type of an integer constant is the first of the corresponding list +in which its value can be represented. Decimal: \f(CWint, long, unsigned long\fP; +octal or hexadecimal: \f(CWint, unsigned, long, unsigned long\fP; suffixed by +the letter L or l: \f(CWlong, unsigned long\fP. +.SH +2.4.3 Character Constants +.PP +A character constant is a sequence of 1 up to \f(CWsizeof(int)\fP characters +enclosed in single quotes. +The value of a character constant '$c sub 1 c sub 2 ... c sub n$' +is $d sub n + M \(mu d sub {n - 1} + ... + M sup {n - 1} \(mu d sub 2 + M sup n \(mu d sub 1$, +where M is 1 + maximum unsigned number representable in an \f(CWunsigned char\fP, +and $d sub i$ is the signed value (ASCII) +of character $c sub i$. +.SH +2.4.4 Floating Constants +.PP +The compiler does not support compile-time floating point arithmetic. +.SH +2.6 Hardware characteristics +.PP +The compiler is capable of producing EM code for machines with the following +properties +.IP \(bu +a \f(CWchar\fP is 8 bits +.IP \(bu +the size of \f(CWint\fP is equal to the word size +.IP \(bu +the size of \f(CWshort\fP may not exceed the size of \f(CWint\fP +.IP \(bu +the size of \f(CWint\fP may not exceed the size of \f(CWlong\fP +.IP \(bu +the size of pointers is equal to the size of either \f(CWshort\fP, \f(CWint\fP +or \f(CWlong\fP +.LP +.SH +4 What's in a name? +.SH +\f(CWchar\fP +.PP +Objects of type \f(CWchar\fP are taken to be signed. +The combination \f(CWunsigned char\fP is legal. +.SH +\f(CWunsigned\fP +.PP +The type combinations \f(CWunsigned char\fP, \f(CWunsigned short\fP and +\f(CWunsigned long\fP are supported. +.SH +\f(CWenum\fP +.PP +The data type \f(CWenum\fP is implemented as described +in \fIRecent Changes to C\fP (see appendix A). +.I Cem +treats enumeration variables as if they were \f(CWint\fP. +.SH +\f(CWvoid\fP +.PP +Type \f(CWvoid\fP is implemented. +The type specifies an empty set of values, which takes no storage space. +.SH +\fRFundamental types\fP +.PP +The names of the fundamental types can be redefined by the user, using +\f(CWtypedef\fP. +.SH +7 Expressions +.PP +The order of evaluation of expressions depends on the complexity of the +subexpressions. +In case of commutative operations, the most complex subexpression is +evaluated first. +Parameter lists are evaluated from right to left. +.SH +7.2 Unary operators +.PP +The type of a \f(CWsizeof\fP expression is \f(CWunsigned int\fP. +.SH +7.13 Conditional operator +.PP +Both the second and the third expression in a conditional expression may +include assignment operators. +They may be structs or unions. +.SH +7.14 Assignment operators +.PP +Structures may be assigned, passed as arguments to functions, and returned +by functions. +The types of operands taking part must be the same. +.SH +8.2 Type specifiers +.PP +The combinations \f(CWunsigned char\fP, \f(CWunsigned short\fP +and \f(CWunsigned long\fP are implemented. +.SH +8.5 Structure and union declarations +.PP +Fields of any integral type, either signed or unsigned, +are supported, as long as the type fits in a word on the target machine. +.PP +Fields are left adjusted by default; the first field is put into the left +part of a word, the next one on the right side of the first one, etc. +The \f(CW-Vr\fP option in the call of the compiler +causes fields to be right adjusted within a machine word. +.PP +The tags of structs and unions occupy a different name space from that of +variables and that of member names. +.SH +9.7 Switch statement +.PP +The type of \fIexpression\fP in +.DS +.ft CW +\f(CWswitch (\fP\fIexpression\fP\f(CW)\fP \fIstatement\fP +.ft +.DE +must be integral. +A warning is given under the restricted option if the type is \f(CWlong\fP. +.SH +10 External definitions +.PP +See [4] for a discussion on this complicated issue. +.SH +10.1 External function definitions +.PP +Structures may be passed as arguments to functions, and returned +by functions. +.SH +11.1 Lexical scope +.PP +Typedef names may be redeclared like any other variable name; the ice mentioned +in \(sc11.1 is walked correctly. +.SH +12 Compiler control lines +.PP +Lines which do not occur within comment, and with \f(CW#\fP as first +character, are interpreted as compiler control line. +There may be an arbitrary number of spaces, tabs and comments (collectively +referred as \fIwhite space\fP) following the \f(CW#\fP. +Comments may contain newline characters. +Control lines with only white space between the \f(CW#\fP and the line separator +are skipped. +.PP +The #\f(CWinclude\fP, #\f(CWifdef\fP, #\f(CWifndef\fP, #\f(CWundef\fP, #\f(CWelse\fP and +#\f(CWendif\fP control lines and line directives consist of a fixed number of +arguments. +The list of arguments may be followed an arbitrary sequence of characters, +in which comment is interpreted as such. +(I.e., the text between \f(CW/*\fP and \f(CW*/\fP is skipped, regardless of +newlines; note that commented-out lines beginning with \f(CW#\fP are not +considered to be control lines.) +.SH +12.1 Token replacement +.PP +The replacement text of macros is taken to be a string of characters, in which +an identifier may stand for a formal parameter, and in which comment is +interpreted as such. +Comments and newline characters, preceeded by a backslash, in the replacement +text are replaced by a space character. +.PP +The actual parameters of a macro are considered tokens and are +balanced with regard to \f(CW()\fP, \f(CW{}\fP and \f(CW[]\fP. +This prevents the use of macros like +.DS +.ft CW +CTL([) +.ft +.DE +.PP +Formal parameters of a macro must have unique names within the formal-parameter +list of that macro. +.PP +A message is given at the definition of a macro if the macro has +already been #\f(CWdefined\fP, while the number of formal parameters differ or +the replacement texts are not equal (apart from leading and trailing +white space). +.PP +Recursive use of macros is detected by the compiler. +.PP +Standard #\f(CWdefined\fP macros are +.DS +\f(CW__FILE__\fP name of current input file as string constant +\f(CW__DATE__\fP curent date as string constant; e.g. \f(CW"Tue Wed 2 14:45:23 1986"\fP +\f(CW__LINE__\fP current line number as an integer +.DE +.PP +No message is given if \fIidentifier\fP is not known in +.DS +.ft CW +#undef \fIidentifier\fP +.ft +.DE +.SH +12.2 File inclusion +.PP +A newline character is appended to each file which is included. +.SH +12.3 Conditional compilation +.PP +The #\f(CWif\fP, #\f(CWifdef\fP and #\f(CWifndef\fP control lines may be followed +by an arbitrary number of +.DS +.ft CW +#elif \fIconstant-expression\fP +.ft +.DE +control lines, before the corresponding #\f(CWelse\fP or #\f(CWendif\fP +is encountered. +The construct +.DS +.ft CW +#elif \fIconstant-expression\fP +some text +#endif /* corresponding to #elif */ +.ft +.DE +is equivalent to +.DS +.ft CW +#else +#if \fIconstant-expression\fP +some text +#endif /* corresponding to #if */ +#endif /* corresponding to #else */ +.ft +.DE +.PP +The \fIconstant-expression\fP in #\f(CWif\fP and #\f(CWelif\fP control lines +may contain the construction +.DS +.ft CW +defined(\fIidentifier\fP) +.ft +.DE +which is replaced by \f(CW1\fP, if \fIidentifier\fP has been #\f(CWdefined\fP, +and by \f(CW0\fP, if not. +.PP +Comments in skipped lines are interpreted as such. +.SH +12.4 Line control +.PP +Line directives may occur in the following forms: +.DS +.ft CW +#line \fIconstant\fP +#line \fIconstant\fP "\fIfilename\fP" +#\fIconstant\fP +#\fIconstant\fP "\fIfilename\fP" +.ft +.DE +Note that \fIfilename\fP is enclosed in double quotes. +.SH +14.2 Functions +.PP +If a pointer to a function is called, the function the pointer points to +is called instead. +.SH +15 Constant expressions +.PP +The compiler distinguishes the following types of integral constant expressions +.IP \(bu +field-width specifier +.IP \(bu +case-entry specifier +.IP \(bu +array-size specifier +.IP \(bu +global variable initialization value +.IP \(bu +enum-value specifier +.IP \(bu +truth value in \f(CW#if\fP control line +.LP +.PP +Constant integral expressions are compile-time evaluated while an effort +is made to report overflow. +Constant floating expressions are not compile-time evaluated. +.NH +Compiler flags +.IP \fB\-C\fR +Run the preprocessor stand-alone while maintaining the comments. +Line directives are produced whenever needed. +.IP \fB\-D\fP\fIname\fP=\fIstring-of-characters\fP +.br +Define \fIname\fR as macro with \fIstring-of-characters\fR as +replacement text. +.IP \fB\-D\fP\fIname\fP +.br +Equal to \fB\-D\fP\fIname\fP\fB=1\fP. +.IP \fB\-E\fP +Run the preprocessor stand alone, i.e., +list the sequence of input tokens and delete any comments. +Line directives are produced whenever needed. +.IP \fB\-I\fIpath\fR +.br +Prepend \fIpath\fR to the list of include directories. +To put the directories "include", "sys/h" and "util/h" into the +include directory list in that order, the user has to specify +.DS +.ft CW +-Iinclude -Isys/h -Iutil/h +.ft R +.DE +An empty \fIpath\fP causes the standard include +directory (usually \f(CW/usr/include\fP) to be forgotten. +.IP \fB\-M\fP\fIn\fP +.br +Set maximum significant identifier length to \fIn\fP. +.IP \fB\-n\fP +Suppress EM register messages. +The user-declared variables are not stored into registers on the target +machine. +.IP \fB\-p\fP +Generate the EM \fBfil\fP and \fBlin\fP instructions in order to enable +an interpreter to keep track of the current location in the source code. +.IP \fB\-P\fP +Equivalent with \fB\-E\fP, but without line directives. +.IP \fB\-R\fP +Interpret the input as restricted C (according to the language as +described in [1]). +.IP \fB\-T\fP\fIpath\fP +.br +Create temporary files, if necessary, in directory \fIpath\fP. +.IP \fB\-U\fP\fIname\fP +.br +Get rid of the compiler-predefined macro \fIname\fP, i.e., +consider +.DS +.ft CW +#undef \fIname\fP +.ft R +.DE +to appear in the beginning of the file. +.IP \fB\-V\fIcm\fR.\fIn\fR,\ \fB\-V\fIcm\fR.\fIncm\fR.\fIn\fR\ ... +.br +Set the size and alignment requirements. +The letter \fIc\fR indicates the simple type, which is one of +\fBs\fR(short), \fBi\fR(int), \fBl\fR(long), \fBf\fR(float), \fBd\fR(double) +or \fBp\fR(pointer). +If \fIc\fR is \fBS\fP or \fBU\fP, then \fIn\fP is taken to be the initial +alignment of structs or unions, respectively. +The effective alignment of a struct or union is the least common multiple +of the initial struct/union alignment and the alignments of its members. +The \fIm\fR parameter can be used to specify the length of the type (in bytes) +and the \fIn\fR parameter for the alignment of that type. +Absence of \fIm\fR or \fIn\fR causes the default value to be retained. +To specify that the bitfields should be right adjusted instead of the +default left adjustment, specify \fBr\fR as \fIc\fR parameter. +.IP \fB\-w\fR +Suppress warning messages +.IP \fB\-\-\fIcharacter\fR +.br +Set debug-flag \fIcharacter\fP. +This enables some special features offered by a debug and develop version of +the compiler. +Some particular flags may be recognized, others may have surprising effects. +.RS +.IP \fBd\fP +Generate a dependency graph, reflecting the calling structure of functions. +Lines of the form +.DS +.ft CW +DFA: \fIcalling-function\fP: \fIcalled-function\fP +.ft +.DE +are generated whenever a function call is encountered. +.IP \fBf\fP +Dump whole identifier table, including macros and reserved words. +.IP \fBh\fP +Supply hash-table statistics. +.IP \fBi\fP +Print names of included files. +.IP \fBm\fP +Supply statistics concerning the memory allocation. +.IP \fBt\fP +Dump table of identifiers. +.IP \fBu\fP +Generate extra statistics concerning the predefined types and identifiers. +Works in combination with \fBf\fP or \fBt\fP. +.IP \fBx\fP +Print expression trees in human-readable format. +.RE +.LP +.SH +References +.IP [1] +Brian W. Kernighan, Dennis M. Ritchie, +.I +The C Programming Language +.R +.IP [2] +L. Rosler, +.I +Draft Proposed Standard - Programming Language C, +.R +ANSI X3J11 Language Subcommittee +.IP [3] +Erik H. Baalbergen, Dick Grune, Maarten Waage, +.I +The CEM Compiler, +.R +Informatica Manual IM-4, Dept. of Mathematics and Computer Science, Vrije +Universiteit, Amsterdam, The Netherlands +.IP [4] +Erik H. Baalbergen, +.I +Modeling global declarations in C, +.R +internal paper +.LP +.bp +.SH +Appendix A - Enumeration Type +.PP +The syntax is +.sp +.RS +.I enum-specifier : +.RS +\&\f(CWenum\fP { \fIenum-list\fP } +.br +\&\f(CWenum\fP \fIidentifier\fP { \fIenum-list\fP } +.br +\&\f(CWenum\fP \fIidentifier\fP +.RE +.sp +\&\fIenum-list\fP : +.RS +\&\fIenumerator\fP +.br +\&\fIenum-list\fP , \fIenumerator\fP +.RE +.sp +\&\fIenumerator\fP : +.RS +\&\fIidentifier\fP +.br +\&\fIidentifier\fP = \fIconstant-expression\fP +.RE +.sp +.RE +The identifier has the same role as the structure tag in a struct specification. +It names a particular enumeration type. +.PP +The identifiers in the enum-list are declared as constants, and may appear +whenever constants are required. +If no enumerators with +.B = +appear, then the values of the constants begin at 0 and increase by 1 as the +declaration is read from left to right. +An enumerator with +.B = +gives the associated identifier the value indicated; subsequent identifiers +continue the progression from the assigned value. +.PP +Enumeration tags and constants must all be distinct, and, unlike structure +tags and members, are drawn from the same set as ordinary identifiers. +.PP +Objects of a given enumeration type are regarded as having a type distinct +from objects of all other types. +.bp +.SH +Appendix B: C grammar in LL(1) form +.PP +The \fBbold-faced\fP and \fIitalicized\fP tokens represent terminal symbols. +.vs 16 +.nf +\fBexternal definitions\fP +program: external-definition* +external-definition: ext-decl-specifiers [declarator [function | non-function] | '\fB;\fP'] | asm-statement +ext-decl-specifiers: decl-specifiers? +non-function: initializer? ['\fB,\fP' init-declarator]* '\fB;\fP' +function: declaration* compound-statement +.sp 1 +\fBdeclarations\fP +declaration: decl-specifiers init-declarator-list? '\fB;\fP' +decl-specifiers: other-specifier+ [single-type-specifier other-specifier*]? | single-type-specifier other-specifier* +other-specifier: \fBauto\fP | \fBstatic\fP | \fBextern\fP | \fBtypedef\fP | \fBregister\fP | \fBshort\fP | \fBlong\fP | \fBunsigned\fP +type-specifier: decl-specifiers +single-type-specifier: \fItype-identifier\fP | struct-or-union-specifier | enum-specifier +init-declarator-list: init-declarator ['\fB,\fP' init-declarator]* +init-declarator: declarator initializer? +declarator: primary-declarator ['\fB(\fP' formal-list ? '\fB)\fP' | arrayer]* | '\fB*\fP' declarator +primary-declarator: identifier | '\fB(\fP' declarator '\fB)\fP' +arrayer: '\fB[\fP' constant-expression? '\fB]\fP' +formal-list: formal ['\fB,\fP' formal]* +formal: identifier +enum-specifier: \fBenum\fP [enumerator-pack | identifier enumerator-pack?] +enumerator-pack: '\fB{\fP' enumerator ['\fB,\fP' enumerator]* '\fB,\fP'? '\fB}\fP' +enumerator: identifier ['\fB=\fP' constant-expression]? +struct-or-union-specifier: [ \fBstruct\fP | \fBunion\fP] [ struct-declaration-pack | identifier struct-declaration-pack?] +struct-declaration-pack: '\fB{\fP' struct-declaration+ '\fB}\fP' +struct-declaration: type-specifier struct-declarator-list '\fB;\fP'? +struct-declarator-list: struct-declarator ['\fB,\fP' struct-declarator]* +struct-declarator: declarator bit-expression? | bit-expression +bit-expression: '\fB:\fP' constant-expression +initializer: '\fB=\fP'? initial-value +cast: '\fB(\fP' type-specifier abstract-declarator '\fB)\fP' +abstract-declarator: primary-abstract-declarator ['\fB(\fP' '\fB)\fP' | arrayer]* | '\fB*\fP' abstract-declarator +primary-abstract-declarator: ['\fB(\fP' abstract-declarator '\fB)\fP']? +.sp 1 +\fBstatements\fP +statement: + expression-statement + | label '\fB:\fP' statement + | compound-statement + | if-statement + | while-statement + | do-statement + | for-statement + | switch-statement + | case-statement + | default-statement + | break-statement + | continue-statement + | return-statement + | jump + | '\fB;\fP' + | asm-statement + ; +expression-statement: expression '\fB;\fP' +label: identifier +if-statement: \fBif\fP '\fB(\fP' expression '\fB)\fP' statement [\fBelse\fP statement]? +while-statement: \fBwhile\fP '\fB(\fP' expression '\fB)\fP' statement +do-statement: \fBdo\fP statement \fBwhile\fP '\fB(\fP' expression '\fB)\fP' '\fB;\fP' +for-statement: \fBfor\fP '\fB(\fP' expression? '\fB;\fP' expression? '\fB;\fP' expression? '\fB)\fP' statement +switch-statement: \fBswitch\fP '\fB(\fP' expression '\fB)\fP' statement +case-statement: \fBcase\fP constant-expression '\fB:\fP' statement +default-statement: \fBdefault\fP '\fB:\fP' statement +break-statement: \fBbreak\fP '\fB;\fP' +continue-statement: \fBcontinue\fP '\fB;\fP' +return-statement: \fBreturn\fP expression? '\fB;\fP' +jump: \fBgoto\fP identifier '\fB;\fP' +compound-statement: '\fB{\fP' declaration* statement* '\fB}\fP' +asm-statement: \fBasm\fP '\fB(\fP' \fIstring\fP '\fB)\fP' '\fB;\fP' +.sp 1 +\fBexpressions\fP +initial-value: assignment-expression | initial-value-pack +initial-value-pack: '\fB{\fP' initial-value-list '\fB}\fP' +initial-value-list: initial-value ['\fB,\fP' initial-value]* '\fB,\fP'? +primary: \fIidentifier\fP | constant | \fIstring\fP | '\fB(\fP' expression '\fB)\fP' +secundary: primary [index-pack | parameter-pack | selection]* +index-pack: '\fB[\fP' expression '\fB]\fP' +parameter-pack: '\fB(\fP' parameter-list? '\fB)\fP' +selection: ['\fB.\fP' | '\fB\->\fP'] identifier +parameter-list: assignment-expression ['\fB,\fP' assignment-expression]* +postfixed: secundary postop? +unary: cast unary | postfixed | unop unary | size-of +size-of: \fBsizeof\fP [cast | unary] +binary-expression: unary [binop binary-expression]* +conditional-expression: binary-expression ['\fB?\fP' expression '\fB:\fP' assignment-expression]? +assignment-expression: conditional-expression [asgnop assignment-expression]? +expression: assignment-expression ['\fB,\fP' assignment-expression]* +unop: '\fB*\fP' | '\fB&\fP' | '\fB\-\fP' | '\fB!\fP' | '\fB~ \fP' | '\fB++\fP' | '\fB\-\-\fP' +postop: '\fB++\fP' | '\fB\-\-\fP' +multop: '\fB*\fP' | '\fB/\fP' | '\fB%\fP' +addop: '\fB+\fP' | '\fB\-\fP' +shiftop: '\fB<<\fP' | '\fB>>\fP' +relop: '\fB<\fP' | '\fB>\fP' | '\fB<=\fP' | '\fB>=\fP' +eqop: '\fB==\fP' | '\fB!=\fP' +arithop: multop | addop | shiftop | '\fB&\fP' | '\fB^ \fP' | '\fB|\fP' +binop: arithop | relop | eqop | '\fB&&\fP' | '\fB||\fP' +asgnop: '\fB=\fP' | '\fB+\fP' '\fB=\fP' | '\fB\-\fP' '\fB=\fP' | '\fB*\fP' '\fB=\fP' | '\fB/\fP' '\fB=\fP' | '\fB%\fP' '\fB=\fP' + | '\fB<<\fP' '\fB=\fP' | '\fB>>\fP' '\fB=\fP' | '\fB&\fP' '\fB=\fP' | '\fB^ \fP' '\fB=\fP' | '\fB|\fP' '\fB=\fP' + | '\fB+=\fP' | '\fB\-=\fP' | '\fB*=\fP' | '\fB/=\fP' | '\fB%=\fP' + | '\fB<<=\fP' | '\fB>>=\fP' | '\fB&=\fP' | '\fB^=\fP' | '\fB|=\fP' +constant: \fIinteger\fP | \fIfloating\fP +constant-expression: assignment-expression +identifier: \fIidentifier\fP | \fItype-identifier\fP +.fi diff --git a/doc/ego/bo/bo1 b/doc/ego/bo/bo1 new file mode 100644 index 0000000..58c17c7 --- /dev/null +++ b/doc/ego/bo/bo1 @@ -0,0 +1,162 @@ +.bp +.NH 1 +Branch Optimization +.NH 2 +Introduction +.PP +The Branch Optimization phase (BO) performs two related +(branch) optimizations. +.NH 3 +Fusion of basic blocks +.PP +If two basic blocks B1 and B2 have the following properties: +.DS +SUCC(B1) = {B2} +PRED(B2) = {B1} +.DE +then B1 and B2 can be combined into one basic block. +If B1 ends in an unconditional jump to the beginning of B2, this +jump can be eliminated, +hence saving a little execution time and object code size. +This technique can be used to eliminate some deficiencies +introduced by the front ends (for example, the "C" front end +translates switch statements inefficiently due to its one pass nature). +.NH 3 +While-loop optimization +.PP +The straightforward way to translate a while loop is to +put the test for loop termination at the beginning of the loop. +.DS +while cond loop \kyLAB1: \kxTest cond + body of the loop --->\h'|\nxu'Branch On False To LAB2 +end loop\h'|\nxu'code for body of loop +\h'|\nxu'Branch To LAB1 +\h'|\nyu'LAB2: + +Fig. 10.1 Example of Branch Optimization +.DE +If the condition fails at the Nth iteration, the following code +gets executed (dynamically): +.DS +.TS +l l l. +N * conditional branch (which fails N-1 times) +N-1 * unconditional branch +N-1 * body of the loop +.TE +.DE +An alternative translation is: +.DS + Branch To LAB2 +LAB1: + code for body of loop +LAB2: + Test cond + Branch On True To LAB1 +.DE +This translation results in the following profile: +.DS +.TS +l l l. +N * conditional branch (which succeeds N-1 times) +1 * unconditional branch +N-1 * body of the loop +.TE +.DE +So the second translation will be significantly faster if N >> 2. +If N=2, execution time will be slightly increased. +On the average, the program will be speeded up. +Note that the code sizes of the two translations will be the same. +.NH 2 +Implementation +.PP +The basic block fusion technique is implemented +by traversing the control flow graph of a procedure, +looking for basic blocks B with only one successor (S). +If one is found, it is checked if S has only one predecessor +(which has to be B). +If so, the two basic blocks can in principle be combined. +However, as one basic block will have to be moved, +the textual order of the basic blocks will be altered. +This reordering causes severe problems in the presence +of conditional jumps. +For example, if S ends in a conditional branch, +the basic block that comes textually next to S must stay +in that position. +So the transformation in Fig. 10.2 is illegal. +.DS +.TS +l l l l l. +LAB1: S1 LAB1: S1 + BRA LAB2 S2 + ... --> BEQ LAB3 +LAB2: S2 ... + BEQ LAB3 S3 + S3 +.TE + +Fig. 10.2 An illegal transformation of Branch Optimization +.DE +If B is moved towards S the same problem occurs if the block before B +ends in a conditional jump. +The problem could be solved by adding one extra branch, +but this would reduce the gains of the optimization to zero. +Hence the optimization will only be done if the block that +follows S (in the textual order) is not a successor of S. +This condition assures that S does not end in a conditional branch. +The condition always holds for the code generated by the "C" +front end for a switch statement. +.PP +After the transformation has been performed, +some attributes of the basic blocks involved (such as successor and +predecessor sets and immediate dominator) must be recomputed. +.PP +The while-loop technique is applied to one loop at a time. +The list of basic blocks of the loop is traversed to find +a block B that satisfies the following conditions: +.IP 1. +the textually next block to B is not part of the loop +.IP 2. +the last instruction of B is an unconditional branch; +hence B has only one successor, say S +.IP 3. +the textually next block of B is a successor of S +.IP 4. +the last instruction of S is a conditional branch +.LP +If such a block B is found, the control flow graph is changed +as depicted in Fig. 10.3. +.DS +.ft 5 + | | + | v + v | + |-----<------| ----->-----| + ____|____ | | + | | | |-------| | + | S1 | | | v | + | Bcc | | | .... | +|--| | | | | +| --------- | | ----|---- | +| | | | | | +| .... ^ | | S2 | | +| | | | | | +| --------- | | | | | +v | | | ^ --------- | +| | S2 | | | | | +| | BRA | | | |-----<----- +| | | | | v +| --------- | | ____|____ +| | | | | | +| ------>------ | | S1 | +| | | Bnn | +|-------| | | | + | | ----|---- + v | | + |----<--| + | + v +.ft R + +Fig. 10.3 Transformation of the CFG by Branch Optimization +.DE diff --git a/doc/ego/ca/ca1 b/doc/ego/ca/ca1 new file mode 100644 index 0000000..ab06af4 --- /dev/null +++ b/doc/ego/ca/ca1 @@ -0,0 +1,65 @@ +.bp +.NH 1 +Compact assembly generation +.NH 2 +Introduction +.PP +The "Compact Assembly generation phase" (CA) transforms the +intermediate code of the optimizer into EM code in +Compact Assembly Language (CAL) format. +In the intermediate code, all program entities +(such as procedures, labels, global variables) +are denoted by a unique identifying number (see 3.5). +In the CAL output of the optimizer these numbers have to +be replaced by normal identifiers (strings). +The original identifiers of the input program are used whenever possible. +Recall that the IC phase generates two files that can be +used to map unique identifying numbers to procedure names and +global variable names. +For instruction labels CA always generates new names. +The reasons for doing so are: +.IP - +instruction labels are only visible inside one procedure, so they can +not be referenced in other modules +.IP - +the names are not very suggestive anyway, as they must be integer numbers +.IP - +the optimizer considerably changes the control structure of the program, +so there is really no one to one mapping of instruction labels in +the input and the output program. +.LP +As the optimizer combines all input modules into one module, +visibility problems may occur. +Two modules M1 and M2 can both define an identifier X (provided that +X is not externally visible in any of these modules). +If M1 and M2 are combined into one module M, two distinct +entities with the same name would exist in M, which +is not allowed. +.[~[ +tanenbaum machine architecture +.], section 11.1.4.3] +In these cases, CA invents a new unique name for one of the entities. +.NH 2 +Implementation +.PP +CA first reads the files containing the procedure and global variable names +and stores the names in two tables. +It scans these tables to make sure that all names are different. +Subsequently it reads the EM text, one procedure at a time, +and outputs it in CAL format. +The major part of the code that does the latter transformation +is adapted from the EM Peephole Optimizer. +.PP +The main problem of the implementation of CA is to +assure that the visibility rules are obeyed. +If an identifier must be externally visible (i.e. +it was externally visible in the input program) +and the identifier is defined (in the output program) before +being referenced, +an EXA or EXP pseudo must be generated for it. +(Note that the optimizer may change the order of definitions and +references, so some pseudos may be needed that were not +present in the input program). +On the other hand, an identifier may be only internally visible. +If such an identifier is referenced before being defined, +an INA or INP pseudo must be emitted prior to its first reference. diff --git a/doc/ego/cf/cf1 b/doc/ego/cf/cf1 new file mode 100644 index 0000000..e655474 --- /dev/null +++ b/doc/ego/cf/cf1 @@ -0,0 +1,94 @@ +.bp +.NH +The Control Flow Phase +.PP +In the previous chapter we described the intermediate +code of the global optimizer. +We also specified which part of this code +was constructed by the IC phase of the optimizer. +The Control Flow Phase (\fICF\fR) does +the remainder of the job, +i.e. it determines: +.IP - +the control flow graphs +.IP - +the loop tables +.IP - +the calling, change and use attributes of +the procedure table entries +.LP +CF operates on one procedure at a time. +For every procedure it first reads the EM instructions +from the EM-text file and groups them into basic blocks. +For every basic block, its successors and +predecessors are determined, +resulting in the control flow graph. +Next, the immediate dominator of every basic block +is computed. +Using these dominators, any loop in the +procedure is detected. +Finally, interprocedural analysis is done, +after which we will know the global effects of +every procedure call on its environment. +.sp +CF uses the same internal data structures +for the procedure table and object table as IC. +.NH 2 +Partitioning into basic blocks +.PP +With regard to flow of control, we distinguish +three kinds of EM instructions: +jump instructions, instruction label definitions and +normal instructions. +Jump instructions are all conditional or unconditional +branch instructions, +the case instructions (CSA/CSB) +and the RET (return) instruction. +A procedure call (CAL) is not considered to be a jump. +A defining occurrence of an instruction label +is regarded as an EM instruction. +.PP +An instruction starts +a new basic block, in any of the following cases: +.IP 1. +It is the first instruction of a procedure +.IP 2. +It is the first of a list of instruction label +defining occurrences +.IP 3. +It follows a jump +.LP +If there are several consecutive instruction labels +(which is highly unusual), +all of them are put in the same basic block. +Note that several cases may overlap, +e.g. a label definition at the beginning of a procedure +or a label following a jump. +.PP +A simple Finite State Machine is used to model +the above rules. +It also recognizes the end of a procedure, +marked by an END pseudo. +The basic blocks are stored internally as a doubly linked +linear list. +The blocks are linked in textual order. +Every node of this list has the attributes described +in the previous chapter (see syntax rule for +basic_block). +Furthermore, every node contains a pointer to its +EM instructions, +which are represented internally +as a linear, doubly linked list, +just as in the IC phase. +However, instead of one list per procedure (as in IC) +there is now one list per basic block. +.PP +On the fly, a table is build that maps +every label identifier to the label definition +instruction. +This table is used for computing the control flow. +The table is stored as a dynamically allocated array. +The length of the array is the number of labels +of the current procedure; +this value can be found in the procedure table, +where it was stored by IC. diff --git a/doc/ego/cf/cf2 b/doc/ego/cf/cf2 new file mode 100644 index 0000000..c4dd95d --- /dev/null +++ b/doc/ego/cf/cf2 @@ -0,0 +1,50 @@ +.NH 2 +Control Flow +.PP +A \fIsuccessor\fR of a basic block B is a block C +that can be executed immediately after B. +C is said to be a \fIpredecessor\fR of B. +A block ending with a RET instruction +has no successors. +Such a block is called a \fIreturn block\fR. +Any block that has no predecessors cannot be +executed at all (i.e. it is unreachable), +unless it is the first block of a procedure, +called the \fIprocedure entry block\fR. +.PP +Internally, the successor and predecessor +attributes of a basic block are stored as \fIsets\fR. +Alternatively, one may regard all these +sets of all basic blocks as a conceptual \fIgraph\fR, +in which there is an edge from B to C if C +is in the successor set of B. +We call this conceptual graph +the \fIControl Flow Graph\fR. +.PP +The only successor of a basic block ending on an +unconditional branch instruction is the block that +contains the label definition of the target of the jump. +The target instruction can be found via the LAB_ID +that is the operand of the jump instruction, +by using the label-map table mentioned +above. +If the last instruction of a block is a +conditional jump, +the successors are the target block and the textually +next block. +The last instruction can also be a case jump +instruction (CSA or CSB). +We then analyze the case descriptor, +to find all possible target instructions +and their associated blocks. +We require the case descriptor to be allocated in +a ROM, so it cannot be changed dynamically. +A case jump via an alterable descriptor could in principle +go to any label in the program. +In the presence of such an uncontrolled jump, +hardly any optimization can be done. +We do not expect any front end to generate such a descriptor, +however, because of the controlled nature +of case statements in high level languages. +If the basic block does not end in a jump instruction, +its only successor is the textually next block. diff --git a/doc/ego/cf/cf3 b/doc/ego/cf/cf3 new file mode 100644 index 0000000..42e8827 --- /dev/null +++ b/doc/ego/cf/cf3 @@ -0,0 +1,53 @@ +.NH 2 +Immediate dominators +.PP +A basic block B dominates a block C if every path +in the control flow graph from the procedure entry block +to C goes through B. +The immediate dominator of C is the closest dominator +of C on any path from the entry block. +See also +.[~[ +aho compiler design +.], section 13.1.] +.PP +There are a number of algorithms to compute +the immediate dominator relation. +.IP 1. +Purdom and Moore give an algorithm that is +easy to program and easy to describe (although the +description they give is unreadable; +it is given in a very messy Algol60 program full of gotos). +.[ +predominators +.] +.IP 2. +Aho and Ullman present a bitvector algorithm, which is also +easy to program and to understand. +(See +.[~[ +aho compiler design +.], section 13.1.]). +.IP 3 +Lengauer and Tarjan introduce a fast algorithm that is +hard to understand, yet remarkably easy to implement. +.[ +lengauer dominators +.] +.LP +The Purdom-Moore algorithm is very slow if the +number of basic blocks in the flow graph is large. +The Aho-Ullman algorithm in fact computes the +dominator relation, +from which the immediate dominator relation can be computed +in time quadratic to the number of basic blocks, worst case. +The storage requirement is also quadratic to the number +of blocks. +The running time of the third algorithm is proportional +to: +.DS +(number of edges in the graph) * log(number of blocks). +.DE +We have chosen this algorithm because it is fast +(as shown by experiments done by Lengauer and Tarjan), +it is easy to program and requires little data space. diff --git a/doc/ego/cf/cf4 b/doc/ego/cf/cf4 new file mode 100644 index 0000000..843a411 --- /dev/null +++ b/doc/ego/cf/cf4 @@ -0,0 +1,93 @@ +.NH 2 +Loop detection +.PP +Loops are detected by using the loop construction +algorithm of. +.[~[ +aho compiler design +.], section 13.1.] +This algorithm uses \fIback edges\fR. +A back edge is an edge from B to C in the CFG, +whose head (C) dominates its tail (B). +The loop associated with this back edge +consists of C plus all nodes in the CFG +that can reach B without going through C. +.PP +As an example of how the algorithm works, +consider the piece of program of Fig. 4.1. +First just look at the program and try to +see what part of the code constitutes the loop. +.DS +loop + if cond then 1 + -- lots of simple + -- assignment + -- statements 2 3 + exit; -- exit loop + else + S; -- one statement + end if; +end loop; + +Fig. 4.1 A misleading loop +.DE +Although a human being may be easily deceived +by the brackets "loop" and "end loop", +the loop detection algorithm will correctly +reply that only the test for "cond" and +the single statement in the false-part +of the if statement are part of the loop! +The statements in the true-part only get +executed once, so there really is no reason at all +to say they're part of the loop too. +The CFG contains one back edge, "3->1". +As node 3 cannot be reached from node 2, +the latter node is not part of the loop. +.PP +A source of problems with the algorithm is the fact +that different back edges may result in +the same loop. +Such an ill-structured loop is +called a \fImessy\fR loop. +After a loop has been constructed, it is checked +if it is really a new loop. +.PP +Loops can partly overlap, without one being nested +inside the other. +This is the case in the program of Fig. 4.2. +.DS +1: 1 + S1; +2: + S2; 2 + if cond then + goto 4; + S3; 3 4 + goto 1; +4: + S4; + goto 1; + +Fig. 4.2 Partly overlapping loops +.DE +There are two back edges "3->1" and "4->1", +resulting in the loops {1,2,3} and {1,2,4}. +With every basic block we associate a set of +all loops it is part of. +It is not sufficient just to record its +most enclosing loop. +.PP +After all loops of a procedure are detected, we determine +the nesting level of every loop. +Finally, we find all strong and firm blocks of the loop. +If the loop has only one back edge (i.e. it is not messy), +the set of firm blocks consists of the +head of this back edge and its dominators +in the loop (including the loop entry block). +A firm block is also strong if it is not a +successor of a block that may exit the loop; +a block may exit a loop if it has an (immediate) successor +that is not part of the loop. +For messy loops we do not determine the strong +and firm blocks. These loops are expected +to occur very rarely. diff --git a/doc/ego/cf/cf5 b/doc/ego/cf/cf5 new file mode 100644 index 0000000..1926c45 --- /dev/null +++ b/doc/ego/cf/cf5 @@ -0,0 +1,82 @@ +.NH 2 +Interprocedural analysis +.PP +It is often desirable to know the effects +a procedure call may have. +The optimization below is only possible if +we know for sure that the call to P cannot +change A. +.DS +.TS +l l. +A := 10; A:= 10; +P; -- procedure call --> P; +B := A + 2; B := 12; +.TE +.DE +Although it is not possible to predict exactly +all the effects a procedure call has, we may +determine a kind of upper bound for it. +So we compute all variables that may be +changed by P, although they need not be +changed at every invocation of P. +We can get hold of this set by just looking +at all assignment (store) instructions +in the body of P. +EM also has a set of \fIindirect\fR assignment +instructions, +i.e. assignment through a pointer variable. +In general, it is not possible to determine +which variable is affected by such an assignment. +In these cases, we just record the fact that P +does an indirect assignment. +Note that this does not mean that all variables +are potentially affected, as the front ends +may generate messages telling that certain +variables can never be accessed indirectly. +We also set a flag if P does a use (load) indirect. +Note that we only have to look at \fIglobal\fR +variables. +If P changes or uses any of its locals, +this has no effect on its environment. +Local variables of a lexically enclosing +procedure can only be accessed indirectly. +.PP +A procedure P may of course call another procedure. +To determine the effects of a call to P, +we also must know the effects of a call to the second procedure. +This second one may call a third one, and so on. +Effectively, we need to compute the \fItransitive closure\fR +of the effects. +To do this, we determine for every procedure +which other procedures it calls. +This set is the "calling" attribute of a procedure. +One may regard all these sets as a conceptual graph, +in which there is an edge from P to Q +if Q is in the calling set of P. This graph will +be referred to as the \fIcall graph\fR. +(Note the resemblance with the control flow graph). +.PP +We can detect which procedures are called by P +by looking at all CAL instructions in its body. +Unfortunately, a procedure may also be +called indirectly, via a CAI instruction. +Yet, only procedures that are used as operand of an LPI +instruction can be called indirect, +because this is the only way to take the address of a procedure. +We determine for every procedure whether it does +a CAI instruction. +We also build a set of all procedures used as +operand of an LPI. +.sp +After all procedures have been processed (i.e. all CFGs +are constructed, all loops are detected, +all procedures are analyzed to see which variables +they may change, which procedures they call, +whether they do a CAI or are used in an LPI) the +transitive closure of all interprocedural +information is computed. +During the same process, +the calling set of every procedure that uses a CAI +is extended with the above mentioned set of all +procedures that can be called indirect. diff --git a/doc/ego/cf/cf6 b/doc/ego/cf/cf6 new file mode 100644 index 0000000..a560b48 --- /dev/null +++ b/doc/ego/cf/cf6 @@ -0,0 +1,21 @@ +.NH 2 +Source files +.PP +The sources of CF are in the following files and packages: +.IP cf.h: 14 +declarations of global variables and data structures +.IP cf.c: +the routine main; interprocedural analysis; +transitive closure +.IP succ: +control flow (successor and predecessor) +.IP idom: +immediate dominators +.IP loop: +loop detection +.IP get: +read object and procedure table; +read EM text and partition it into basic blocks +.IP put: +write tables, CFGs and EM text +.LP diff --git a/doc/ego/cj/cj1 b/doc/ego/cj/cj1 new file mode 100644 index 0000000..e7174af --- /dev/null +++ b/doc/ego/cj/cj1 @@ -0,0 +1,144 @@ +.bp +.NH 1 +Cross jumping +.NH 2 +Introduction +.PP +The "Cross Jumping" optimization technique (CJ) +.[ +wulf design optimizing compiler +.] +is basically a space optimization technique. It looks for pairs of +basic blocks (B1,B2), for which: +.DS +SUCC(B1) = SUCC(B2) = {S} +.DE +(So B1 and B2 both have one and the same successor). +If the last few non-branch instructions are the same for B1 and B2, +one such sequence can be eliminated. +.DS +Pascal: + +if cond then + S1 + S3 +else + S2 + S3 + +(pseudo) EM: +.TS +l l l. + TEST COND TEST COND + BNE *1 BNE *1 + S1 S1 + S3 ---> BRA *2 + BRA *2 1: +1: S2 + S2 2: + S3 S3 +2: +.TE + +Fig. 9.1 An example of Cross Jumping +.DE +As the basic blocks have the same successor, +at least one of them ends in an unconditional branch instruction (BRA). +Hence no extra branch instruction is ever needed, just the target +of an existing branch needs to be changed; neither the program size +nor the execution time will ever increase. +In general, the execution time will remain the same, unless +further optimizations can be applied because of this optimization. +.PP +This optimization is particularly effective, +because it cannot always be done by the programmer at the source level, +as demonstrated by the Fig. 8.2. +.DS + Pascal: + +if cond then + x := f(4) +else + x := g(5) + + +EM: + +.TS +l l. +... ... +LOC 4 LOC 5 +CAL F CAL G +ASP 2 ASP 2 +LFR 2 LFR 2 +STL X STL X +.TE + +Fig. 9.2 Effectiveness of Cross Jumping +.DE +At the source level there is no common tail, +but at the EM level there is a common tail. +.NH 2 +Implementation +.PP +The implementation of cross jumping is rather straightforward. +The technique is applied to one procedure at a time. +The control flow graph of the procedure +is scanned for pairs of basic blocks +with the same (single) successor and with common tails. +Note that there may be more than two such blocks (e.g. as the result +of a case statement). +This is dealt with by repeating the entire process until no +further optimizations can de done for the current procedure. +.sp +If a suitable pair of basic blocks has been found, the control flow +graph must be altered. One of the basic +blocks must be split into two. +The control flow graphs before and after the optimization are shown +in Fig. 9.3 and Fig. 9.4. +.DS +.ft 5 + + -------- -------- + | | | | + | S1 | | S2 | + | S3 | | S3 | + | | | | + -------- -------- + | | + |------------------|--------------------| + | + v +.ft R + +Fig. 9.3 CFG before optimization +.DE +.DS +.ft 5 + -------- -------- + | | | | + | S1 | | S2 | + | | | | + -------- -------- + | | + |--------------------<------------------| + v + -------- + | | + | S3 | + | | + -------- + | + v +.ft R + +Fig. 9.4 CFG after optimization +.DE +Some attributes of the three resulting blocks (such as immediate dominator) +are updated. +.PP +In some cases, cross jumping might split the computation of an expression +into two, by inserting a branch somewhere in the middle. +Most code generators will generate very poor assembly code when +presented with such EM code. +Therefor, cross jumping is not performed in these cases. diff --git a/doc/ego/cs/cs1 b/doc/ego/cs/cs1 new file mode 100644 index 0000000..842e514 --- /dev/null +++ b/doc/ego/cs/cs1 @@ -0,0 +1,45 @@ +.bp +.NH 1 +Common subexpression elimination +.NH 2 +Introduction +.PP +The Common Subexpression Elimination optimization technique (CS) +tries to eliminate multiple computations of EM expressions +that yield the same result. +It places the result of one such computation +in a temporary variable, +and replaces the other computations by a reference +to this temporary variable. +The primary goal of this technique is to decrease +the execution time of the program, +but in general it will save space too. +.PP +As an example of the application of Common Subexpression Elimination, +consider the piece of program in Fig. 7.1(a). +.DS +.TS +l l l. +x := a * b; TMP := a * b; x := a * b; +CODE; x := TMP; CODE +y := c + a * b; CODE y := x; + y := c + TMP; + + (a) (b) (c) +.TE + +Fig. 7.1 Examples of Common Subexpression Elimination +.DE +If neither a nor b is changed in CODE, +the instructions can be replaced by those of Fig. 7.1(b), +which saves one multiplication, +but costs an extra store instruction. +If the value of x is not changed in CODE either, +the instructions can be replaced by those of Fig. 7.1(c). +In this case +the extra store is not needed. +.PP +In the following sections we will describe +which transformations are done +by CS and how this phase +was implemented. diff --git a/doc/ego/cs/cs2 b/doc/ego/cs/cs2 new file mode 100644 index 0000000..0fe4dfc --- /dev/null +++ b/doc/ego/cs/cs2 @@ -0,0 +1,86 @@ +.NH 2 +Specification of the Common Subexpression Elimination phase +.PP +In this section we will describe +the window +through which CS examines the code, +the expressions recognized by CS, +and finally the changes made to the code. +.NH 3 +The working window +.PP +The CS algorithm is applied to the +largest sequence of textually adjacent basic blocks +B1,..,Bn, for which +.DS +PRED(Bj) = {Bj-1}, j = 2,..,n. +.DE +Intuitively, this window consists of straight line code, +with only one entry point (at the beginning); it may +contain jumps, which should all have their targets outside the window. +This is illustrated in Fig. 7.2. +.DS +x := a * b; (1) +if x < 10 then (2) + y := a * b; (3) + +Fig. 7.2 The working window of CS +.DE +Line (2) can only be executed after line (1). +Likewise, line (3) can only be executed after +line (2). +Both a and b have the same values at line (1) and at line (3). +.PP +Larger windows were avoided. +In Fig. 7.3, the value of a at line (4) may have been obtained +at more than one point. +.DS +x := a * b; (1) +if x < 10 then (2) + a := 100; (3) +y := a * b; (4) + +Fig. 7.3 Several working windows +.DE +.NH 3 +Recognized expressions. +.PP +The computations eliminated by CS need not be normal expressions +(like "a * b"), +but can even consist of a single operand that is expensive to access, +such as an array element or a record field. +If an array element is used, +its address is computed implicitly. +CS is able to eliminate either the element itself or its +address, whichever one is most profitable. +A variable of a textually enclosing procedure may also be +expensive to access, depending on the lexical level difference. +.NH 3 +Transformations +.PP +CS creates a new temporary local variable (TMP) +for every eliminated expression, +unless it is able to use an existing local variable. +It emits code to initialize this variable with the +result of the expression. +Most recurrences of the expression +can simply be replaced by a reference to TMP. +If the address of an array element is recognized as +a common subexpression, +references to the element itself are replaced by +indirect references through TMP (see Fig. 7.4). +.DS +.TS +l l l. +x := A[i]; TMP := &A[i]; + . . . --> x := *TMP; +A[i] := y; . . . + *TMP := y; +.TE + +Fig. 7.4 Elimination of an array address computation +.DE +Here, '&' is the 'address of' operator, +and unary '*' is the indirection operator. +(Note that EM actually has different instructions to do +a use-indirect or an assign-indirect.) diff --git a/doc/ego/cs/cs3 b/doc/ego/cs/cs3 new file mode 100644 index 0000000..416d9e4 --- /dev/null +++ b/doc/ego/cs/cs3 @@ -0,0 +1,250 @@ +.NH 2 +Implementation +.PP +.NH 3 +The value number method +.PP +To determine whether two expressions have the same result, +there must be some way to determine whether their operands have +the same values. +We use a system of \fIvalue numbers\fP +.[ +kennedy data flow analysis +.] +in which each distinct value of whatever type, +created or used within the working window, +receives a unique identifying number, its value number. +Two items have the same value number if and only if, +based only upon information from the instructions in the window, +their values are provably identical. +For example, after processing the statement +.DS +a := 4; +.DE +the variable a and the constant 4 have the same value number. +.PP +The value number of the result of an expression depends only +on the kind of operator and the value number(s) of the operand(s). +The expressions need not be textually equal, as shown in Fig. 7.5. +.DS +.TS +l l. +a := c; (1) +use(a * b); (2) +d := b; (3) +use(c * d); (4) +.TE + +Fig. 7.5 Different expressions with the same value number +.DE +At line (1) a receives the same value number as c. +At line (2) d receives the same value number as b. +At line (4) the expression "c * d" receives the same value number +as the expression "a * b" at line (2), +because the value numbers of their left and right operands are the same, +and the operator (*) is the same. +.PP +As another example of the value number method, consider Fig. 7.6. +.DS +.TS +l l. +use(a * b); (1) +a := 123; (2) +use(a * b); (3) +.TE + +Fig. 7.6 Identical expressions with the different value numbers +.DE +Although textually the expressions "a * b" in line 1 and line 3 are equal, +a will have different value numbers at line 3 and line 1. +The two expressions will not mistakenly be recognized as equivalent. +.NH 3 +Entities +.PP +The Value Number Method distinguishes between operators and operands. +The value numbers of operands are stored in a table, +called the \fIsymbol table\fR. +The value number of a subexpression depends on the +(root) operator of the expression and on the value numbers +of its operands. +A table of "available expressions" is used to do this mapping. +.PP +CS recognizes the following kinds of EM operands, called \fIentities\fR: +.DS +- constant +- local variable +- external variable +- indirectly accessed entity +- offsetted entity +- address of local variable +- address of external variable +- address of offsetted entity +- address of local base +- address of argument base +- array element +- procedure identifier +- floating zero +- local base +- heap pointer +- ignore mask +.DE +.LP +Whenever a new entity is encountered in the working window, +it is entered in the symbol table and given a brand new value number. +Most entities have attributes (e.g. the offset in +the current stackframe for local variables), +which are also stored in the symbol table. +.PP +An entity is called static if its value cannot be changed +(e.g. a constant or an address). +.NH 3 +Parsing expressions +.PP +Common subexpressions are recognized by simulating the behaviour +of the EM machine. +The EM code is parsed from left to right; +as EM is postfix code, this is a bottom up parse. +At any point the current state of the EM runtime stack is +reflected by a simulated "fake stack", +containing descriptions of the parsed operands and expressions. +A descriptor consists of: +.DS +(1) the value number of the operand or expression +(2) the size of the operand or expression +(3) a pointer to the first line of EM-code + that constitutes the operand or expression +.DE +Note that operands may consist of several EM instructions. +Whenever an operator is encountered, the +descriptors of its operands are on top of the fake stack. +The operator and the value numbers of the operands +are used as indices in the table of available expressions, +to determine the value number of the expression. +.PP +During the parsing process, +we keep track of the first line of each expression; +we need this information when we decide to eliminate the expression. +.NH 3 +Updating entities +.PP +An entity is assigned a value number when it is +used for the first time +in the working window. +If the entity is used as left hand side of an assignment, +it gets the value number of the right hand side. +Sometimes the effects of an instruction on an entity cannot +be determined exactly; +the current value and value number of the entity may become +inconsistent. +Hence the current value number must be forgotten. +This is achieved by giving the entity a new value number +that was not used before. +The entity is said to be \fIkilled\fR. +.PP +As information is lost when an entity is killed, +CS tries to save as many entities as possible. +In case of an indirect assignment through a pointer, +some analysis is done to see which variables cannot be altered. +For a procedure call, the interprocedural information contained +in the procedure table is used to restrict the set of entities that may +be changed by the call. +Local variables for which the front end generated +a register message can never be changed by an indirect assignment +or a procedure call. +.NH 3 +Changing the EM text +.PP +When a new expression comes available, +it is checked whether its result is saved in a local +that may go in a register. +The last line of the expression must be followed +by a STL or SDL instruction +(depending on the size of the result) +and a register message must be present for +this local. +If there is such a local, +it is recorded in the available expressions table. +Each time a new occurrence of this expression +is found, +the value number of the local is compared against +the value number of the result. +If they are different the local cannot be used and is forgotten. +.PP +The available expressions are linked in a list. +New expressions are linked at the head of the list. +In this way expressions that are contained within other +expressions appear later in the list, +because EM-expressions are postfix. +The elimination process walks through the list, +starting at the head, to find the largest expressions first. +If an expression is eliminated, +any expression later on in the list, contained in the former expression, +is removed from the list, +as expressions can only be eliminated once. +.PP +A STL or SDL is emitted after the first occurrence of the expression, +unless there was an existing local variable that could hold the result. +.NH 3 +Desirability analysis +.PP +Although the global optimizer works on EM code, +the goal is to improve the quality of the object code. +Therefore some machine-dependent information is needed +to decide whether it is desirable to +eliminate a given expression. +Because it is impossible for the CS phase to know +exactly what code will be generated, +some heuristics are used. +CS essentially looks for some special cases +that should not be eliminated. +These special cases can be turned on or off for a given machine, +as indicated in a machine descriptor file. +.PP +Some operators can sometimes be translated +into an addressing mode for the machine at hand. +Such an operator is only eliminated +if its operand is itself expensive, +i.e. it is not just a simple load. +The machine descriptor file contains a set of such operators. +.PP +Eliminating the loading of the Local Base or +the Argument Base by the LXL resp. LXA instruction +is only beneficial if the difference in lexical levels +exceeds a certain threshold. +The machine descriptor file contains this threshold. +.PP +Replacing a SAR or a LAR by an AAR followed by a LOI +may possibly increase the size of the object code. +We assume that this is only possible when the +size of the array element is greater than some limit. +.PP +There are back ends that can very efficiently translate +the index computing instruction sequence LOC SLI ADS. +If this is the case, +the SLI instruction between a LOC +and an ADS is not eliminated. +.PP +To handle unforseen cases, the descriptor file may also contain +a set of operators that should never be eliminated. +.NH 3 +The algorithm +.PP +After these preparatory explanations, +the algorithm itself is easy to understand. +For each instruction within the current window, +the following steps are performed in the given order : +.IP 1. +Check if this instruction defines an entity. +If so, the set of entities is updated accordingly. +.IP 2. +Kill all entities that might be affected by this instruction. +.IP 3. +Simulate the instruction on the fake-stack. +If this instruction is an operator, +update the list of available expressions accordingly. +.PP +The result of this process is +a list of available expressions plus the information +needed to eliminate them. +Expressions that are desirable to eliminate are eliminated. +Next, the window is shifted and the process is repeated. diff --git a/doc/ego/cs/cs4 b/doc/ego/cs/cs4 new file mode 100644 index 0000000..c0a42d5 --- /dev/null +++ b/doc/ego/cs/cs4 @@ -0,0 +1,311 @@ +.NH 2 +Implementation. +.PP +In this section we will discuss the implementation of the CS phase. +We will first describe the basic actions that are undertaken +by the algorithm, than the algorithm itself. +.NH 3 +Partioning the EM instructions +.PP +There are over 100 EM instructions. +For our purpose we partition this huge set into groups of +instructions which can be more or less conveniently handled together. +.PP +There are groups for all sorts of load instructions: +simple loads, expensive loads, loads of an array element. +A load is considered \fIexpensive\fP when more than one EM instructions +are involved in loading it. +The load of a lexical entity is also considered expensive. +For instance: LOF is expensive, LAL is not. +LAR forms a group on its own, +because it is not only an expensive load, +but also implicitly includes the ternary operator AAR, +which computes the address of the array element. +.PP +There are groups for all sorts of operators: +unary, binary, and ternary. +The groups of operators are further partitioned according to the size +of their operand(s) and result. +.\" .PP +.\" The distinction between operators and expensive loads is not always clear. +.\" The ADP instruction for example, +.\" might seem a unary operator because it pops one item +.\" (a pointer) from the stack. +.\" However, two ADP-instructions which pop an item with the same value number +.\" need not have the same result, +.\" because the attributes (an offset, to be added to the pointer) +.\" can be different. +.\" Is it then a binary operator? +.\" That would give rise to the strange, and undesirable, +.\" situation that some binary operators pop two operands +.\" and others pop one. +.\" The conclusion is inevitable: +.\" we have been fooled by the name (ADd Pointer). +.\" The ADP-instruction is an expensive load. +.\" In this context LAF, meaning Load Address of oFfsetted, +.\" would have been a better name, +.\" corresponding to LOF, like LAL, +.\" Load Address of Local, corresponds to LOL. +.PP +There are groups for all sorts of stores: +direct, indirect, array element. +The SAR forms a group on its own for the same reason +as appeared with LAR. +.PP +The effect of the remaining instructions is less clear. +They do not help very much in parsing expressions or +in constructing our pseudo symboltable. +They are partitioned according to the following criteria: +.RS +.IP "-" +They change the value of an entity without using the stack +(e.g. ZRL, DEE). +.IP "-" +They are subroutine calls (CAI, CAL). +.IP "-" +They change the stack in some irreproduceable way (e.g. ASP, LFR, DUP). +.IP "-" +They have no effect whatever on the stack or on the entities. +This does not mean they can be deleted, +but they can be ignored for the moment +(e.g. MES, LIN, NOP). +.IP "-" +Their effect is too complicate too compute, +so we just assume worst case behaviour. +Hopefully, they do not occur very often. +(e.g. MON, STR, BLM). +.IP "-" +They signal the end of the basic block (e.g. BLT, RET, TRP). +.RE +.NH 3 +Parsing expressions +.PP +To recognize expressions, +we simulate the behaviour of the EM machine, +by means of a fake-stack. +When we scan the instructions in sequential order, +we first encounter the instructions that load +the operands on the stack, +and then the instruction that indicates the operator, +because EM expressions are postfix. +When we find an instruction to load an operand, +we load on the fake-stack a struct with the following information: +.DS +.TS +l l. +(1) the value number of the operand +(2) the size of the operand +(3) a pointer to the first line of EM-code + that constitutes the operand +.TE +.DE +In most cases, (3) will point to the line +that loaded the operand (e.g. LOL, LOC), +i.e. there is only one line that refers to this operand, +but sometimes some information must be popped +to load the operand (e.g. LOI, LAR). +This information must have been pushed before, +so we also pop a pointer to the first line that pushed +the information. +This line is now the first line that defines the operand. +.PP +When we find the operator instruction, +we pop its operand(s) from the fake-stack. +The first line that defines the first operand is +now the first line of the expression. +We now have all information to determine +whether the just parsed expression has occurred before. +We also know the first and last line of the expression; +we need this when we decide to eliminate it. +Associated with each available expression is a set of +which the elements contains the first and last line of +a recurrence of this expression. +.PP +Not only will the operand(s) be popped from the fake-stack, +but the following will be pushed: +.DS +.TS +l l. +(1) the value number of the result +(2) the size of the result +(3) a pointer to the first line of the expression +.TE +.DE +In this way an item on the fake-stack always contains +the necessary information. +EM expressions are parsed bottum up. +.NH 3 +Updating entities +.PP +As said before, +we build our private "symboltable", +while scanning the EM-instructions. +The behaviour of the EM-machine is not only reflected +in the fake-stack, +but also in the entities. +When an entity is created, +we do not yet know its value, +so we assign a brand new value number to it. +Each time a store-instruction is encountered, +we change the value number of the target entity of this store +to the value number of the token that was popped +from the fake-stack. +Because entities may overlap, +we must also "forget" the value numbers of entities +that might be affected by this store. +Each such entity will be \fIkilled\fP, +i.e. assigned a brand new valuenumber. +.PP +Because we lose information when we forget +the value number of an entity, +we try to save as much entities as possible. +When we store into an external, +we don't have to kill locals and vice versa. +Furthermore, we can see whether two locals or +two externals overlap, +because we know the offset from the local base, +resp. the offset within the data block, +and the size. +The situation becomes more complicated when we have +to consider indirection. +The worst case is that we store through an unknown pointer. +In that case we kill all entities except those locals +for which a so-called \fIregister message\fP has been generated; +this register message indicates that this local can never be +accessed indirectly. +If we know this pointer we can be more careful. +If it points to a local then the entity that is accessed through +this pointer can never overlap with an external. +If it points to an external this entity can never overlap with a local. +Furthermore, in the latter case, +we can find the data block this entity belongs to. +Since pointer arithmetic is only defined within a data block, +this entity can never overlap with entities that are known to +belong to another data block. +.PP +Not only after a store-instruction but also after a +subroutine-call it may be necessary to kill entities; +the subroutine may affect global variables or store +through a pointer. +If a subroutine is called that is not available as EM-text, +we assume worst case behaviour, +i.e. we kill all entities without register message. +.NH 3 +Additions and replacements. +.PP +When a new expression comes available, +we check whether the result is saved in a local +that may go in a register. +The last line of the expression must be followed +by a STL or SDL instruction, +depending on the size of the result +(resp. WS and 2*WS), +and a register message must be present for +this local. +If we have found such a local, +we store a pointer to it with the available expression. +Each time a new occurrence of this expression +is found, +we compare the value number of the local against +the value number of the result. +When they are different we remove the pointer to it, +because we cannot use it. +.PP +The available expressions are singly linked in a list. +When a new expression comes available, +we link it at the head of the list. +In this way expressions that are contained within other +expressions appear later in the list, +because EM-expressions are postfix. +When we are going to eliminate expressions, +we walk through the list, +starting at the head, to find the largest expressions first. +When we decide to eliminate an expression, +we look at the expressions in the tail of the list, +starting from where we are now, +to delete expressions that are contained within +the chosen one because +we cannot eliminate an expression more than once. +.PP +When we are going to eliminate expressions, +and we do not have a local that holds the result, +we emit a STL or SDL after the line where the expression +was first found. +The other occurrences are simply removed, +unless they contain instructions that not only have +effect on the stack; e.g. messages, stores, calls. +Before each instruction that needs the result on the stack, +we emit a LOL or LDL. +When the expression was an AAR, +but the instruction was a LAR or a SAR, +we append a LOI resp. a STI of the number of bytes +in an array-element after each LOL/LDL. +.NH 3 +Desirability analysis +.PP +Although the global optimizer works on EM code, +the goal is to improve the quality of the object code. +Therefore we need some machine dependent information +to decide whether it is desirable to +eliminate a given expression. +Because it is impossible for the CS phase to know +exactly what code will be generated, +we use some heuristics. +In most cases it will save time when we eliminate an +operator, so we just do it. +We only look for some special cases. +.PP +Some operators can in some cases be translated +into an addressing mode for the machine at hand. +We only eliminate such an operator, +when its operand is itself "expensive", +i.e. not just a simple load. +The user of the CS phase has to supply +a set of such operators. +.PP +Eliminating the loading of the Local Base or +the Argument Base by the LXL resp. LXA instruction +is only beneficial when the number of lexical levels +we have to go back exceeds a certain threshold. +This threshold will be different when registers +are saved by the back end. +The user must supply this threshold. +.PP +Replacing a SAR or a LAR by an AAR followed by a LOI +may possibly increase the size of the object code. +We assume that this is only possible when the +size of the array element is greater than some +(user-supplied) limit. +.PP +There are back ends that can very efficiently translate +the index computing instruction sequence LOC SLI ADS. +If this is the case, +we do not eliminate the SLI instruction between a LOC +and an ADS. +.PP +To handle unforeseen cases, the user may also supply +a set of operators that should never be eliminated. +.NH 3 +The algorithm +.PP +After these preparatory explanations, +we can be short about the algorithm itself. +For each instruction within our window, +the following steps are performed in the order given: +.IP 1. +We check if this instructin defines an entity. +If this is the case the set of entities is updated accordingly. +.IP 2. +We kill all entities that might be affected by this instruction. +.IP 3. +The instruction is simulated on the fake-stack. +Copy propagation is done. +If this instruction is an operator, +we update the list of available expressions accordingly. +.PP +When we have processed all instructions this way, +we have built a list of available expressions plus the information we +need to eliminate them. +Those expressions of which desirability analysis tells us so, +we eliminate. +The we shift our window and continue. diff --git a/doc/ego/cs/cs5 b/doc/ego/cs/cs5 new file mode 100644 index 0000000..eaf8840 --- /dev/null +++ b/doc/ego/cs/cs5 @@ -0,0 +1,46 @@ +.NH 2 +Source files of CS +.PP +The sources of CS are in the following files and packages: +.IP cs.h 14 +declarations of global variables and data structures +.IP cs.c +the routine main; +a driving routine to process +the basic blocks in the right order +.IP vnm +implements a procedure that performs +the value numbering on one basic block +.IP eliminate +implements a procedure that does the +transformations, if desirable +.IP avail +implements a procedure that manipulates the list of available expressions +.IP entity +implements a procedure that manipulates the set of entities +.IP getentity +implements a procedure that extracts the +pseudo symboltable information from EM-instructions; +uses a small table +.IP kill +implements several routines that find the entities +that might be changed by EM-instructions +and kill them +.IP partition +implements several routines that partition the huge set +of EM-instructions into more or less manageable, +more or less logical chunks +.IP profit +implements a procedure that decides whether it +is advantageous to eliminate an expression; +also removes expressions with side-effects +.IP stack +implements the fake-stack and operations on it +.IP alloc +implements several allocation routines +.IP aux +implements several auxiliary routines +.IP debug +implements several routines to provide debugging +and verbose output +.LP diff --git a/doc/ego/ic/ic1 b/doc/ego/ic/ic1 new file mode 100644 index 0000000..6347bc7 --- /dev/null +++ b/doc/ego/ic/ic1 @@ -0,0 +1,57 @@ +.bp +.NH +The Intermediate Code and the IC phase +.PP +In this chapter the intermediate code of the EM global optimizer +will be defined. +The 'Intermediate Code construction' phase (IC), +which builds the initial intermediate code from +EM Compact Assembly Language, +will be described. +.NH 2 +Introduction +.PP +The EM global optimizer is a multi pass program, +hence there is a need for an intermediate code. +Usually, programs in the Amsterdam Compiler Kit use the +Compact Assembly Language format +.[~[ +keizer architecture +.], section 11.2] +for this purpose. +Although this code has some convenient features, +such as being compact, +it is quite unsuitable in our case, +because of a number of reasons. +At first, the code lacks global information +about whole procedures or whole basic blocks. +Second, it uses identifiers ('names') to bind +defining and applied occurrences of +procedures, data labels and instruction labels. +Although this is usual in high level programming +languages, it is awkward in an intermediate code +that must be read many times. +Each pass of the optimizer would have +to incorporate an identifier look-up mechanism +to associate a defining occurrence with each +applied occurrence of an identifier. +Finally, EM programs are used to declare blocks of bytes, +rather than variables. A 'hol 6' instruction may be used to +declare three 2-byte variables. +Clearly, the optimizer wants to deal with variables, and +not with rows of bytes. +.PP +To overcome these problems, we have developed a new +intermediate code. +This code does not merely consist of the EM instructions, +but also contains global information in the +form of tables and graphs. +Before describing the intermediate code we will +first leap aside to outline +the problems one generally encounters +when trying to store complex data structures such as +graphs outside the program, i.e. in a file. +We trust this will enhance the +comprehensibility of the +intermediate code definition and the design and implementation +of the IC phase. diff --git a/doc/ego/ic/ic2 b/doc/ego/ic/ic2 new file mode 100644 index 0000000..f55e699 --- /dev/null +++ b/doc/ego/ic/ic2 @@ -0,0 +1,150 @@ +.NH 2 +Representation of complex data structures in a sequential file +.PP +Most programmers are quite used to deal with +complex data structures, such as +arrays, graphs and trees. +There are some particular problems that occur +when storing such a data structure +in a sequential file. +We call data that is kept in +main memory +.UL internal +,as opposed to +.UL external +data +that is kept in a file outside the program. +.sp +We assume a simple data structure of a +scalar type (integer, floating point number) +has some known external representation. +An +.UL array +having elements of a scalar type can be represented +externally easily, by successively +representing its elements. +The external representation may be preceded by a +number, giving the length of the array. +Now, consider a linear, singly linked list, +the elements of which look like: +.DS +record + data: scalar_type; + next: pointer_type; +end; +.DE +It is significant to note that the "next" +fields of the elements only have a meaning within +main memory. +The field contains the address of some location in +main memory. +If a list element is written to a file in +some program, +and read by another program, +the element will be allocated at a different +address in main memory. +Hence this address value is completely +useless outside the program. +.sp +One may represent the list by ignoring these "next" fields +and storing the data items in the order they are linked. +The "next" fields are represented \fIimplicitly\fR. +When the file is read again, +the same list can be reconstructed. +In order to know where the external representation of the +list ends, +it may be useful to put the length of +the list in front of it. +.sp +Note that arrays and linear lists have the +same external representation. +.PP +A doubly linked, linear list, +with elements of the type: +.DS +record + data: scalar_type; + next, + previous: pointer_type; +end +.DE +can be represented in precisely the same way. +Both the "next" and the "previous" fields are represented +implicitly. +.PP +Next, consider a binary tree, +the nodes of which have type: +.DS +record + data: scalar_type; + left, + right: pointer_type; +end +.DE +Such a tree can be represented sequentially, +by storing its nodes in some fixed order, e.g. prefix order. +A special null data item may be used to +denote a missing left or right son. +For example, let the scalar type be integer, +and let the null item be 0. +Then the tree of fig. 3.1(a) +can be represented as in fig. 3.1(b). +.DS +.ft 5 + 4 + / \e + 9 12 + / \e / \e + 12 3 4 6 + / \e \e / + 8 1 5 1 +.ft R + +Fig. 3.1(a) A binary tree + + +.ft 5 +4 9 12 0 0 3 8 0 0 1 0 0 12 4 0 5 0 0 6 1 0 0 0 +.ft R + +Fig. 3.1(b) Its sequential representation +.DE +We are still able to represent the pointer fields ("left" +and "right") implicitly. +.PP +Finally, consider a general +.UL graph +, where each node has a "data" field and +pointer fields, +with no restriction on where they may point to. +Now we're at the end of our tale. +There is no way to represent the pointers implicitly, +like we did with lists and trees. +In order to represent them explicitly, +we use the following scheme. +Every node gets an extra field, +containing some unique number that identifies the node. +We call this number its +.UL id. +A pointer is represented externally as the id of the node +it points to. +When reading the file we use a table that maps +an id to the address of its node. +In general this table will not be completely filled in +until we have read the entire external representation of +the graph and allocated internal memory locations for +every node. +Hence we cannot reconstruct the graph in one scan. +That is, there may be some pointers from node A to B, +where B is placed after A in the sequential file than A. +When we read the node of A we cannot map the id of B +to the address of node B, +as we have not yet allocated node B. +We can overcome this problem if the size +of every node is known in advance. +In this case we can allocate memory for a node +on first reference. +Else, the mapping from id to pointer +cannot be done while reading nodes. +The mapping can be done either in an extra scan +or at every reference to the node. diff --git a/doc/ego/ic/ic3 b/doc/ego/ic/ic3 new file mode 100644 index 0000000..d140160 --- /dev/null +++ b/doc/ego/ic/ic3 @@ -0,0 +1,431 @@ +.NH 2 +Definition of the intermediate code +.PP +The intermediate code of the optimizer consists +of several components: +.IP - +the object table +.IP - +the procedure table +.IP - +the em code +.IP - +the control flow graphs +.IP - +the loop table +.LP - +.PP +These components are described in +the next sections. +The syntactic structure of every component +is described by a set of context free syntax rules, +with the following conventions: +.DS +.TS +l l. +x a non-terminal symbol +A a terminal symbol (in capitals) +x: a b c; a grammar rule +a | b a or b +(a)+ 1 or more occurrences of a +{a} 0 or more occurrences of a +.TE +.DE +.NH 3 +The object table +.PP +EM programs declare blocks of bytes rather than (global) variables. +A typical program may declare 'HOL 7780' +to allocate space for 8 I/O buffers, +2 large arrays and 10 scalar variables. +The optimizer wants to deal with +.UL objects +like variables, buffers and arrays +and certainly not with huge numbers of bytes. +Therefore the intermediate code contains information +about which global objects are used. +This information can be obtained from an EM program +by just looking at the operands of instruction +such as LOE, LAE, LDE, STE, SDE, INE, DEE and ZRE. +.PP +The object table consists of a list of +.UL datablock +entries. +Each such entry represents a declaration like HOL, BSS, +CON or ROM. +There are five kinds of datablock entries. +The fifth kind, +UNKNOWN, denotes a declaration in a +separately compiled file that is not made +available to the optimizer. +Each datablock entry contains the type of the block, +its size, and a description of the objects that +belong to it. +If it is a rom, +it also contains a list of values given +as arguments to the rom instruction, +provided that this list contains only integer numbers. +An object has an offset (within its datablock) +and a size. +The size need not always be determinable. +Both datablock and object contain a unique +identifying number +(see previous section for their use). +.DS +.UL syntax +.TS +lw(1i) l l. +object_table: + {datablock} ; +datablock: + D_ID -- unique identifying number + PSEUDO -- one of ROM,CON,BSS,HOL,UNKNOWN + SIZE -- # bytes declared + FLAGS + {value} -- contents of rom + {object} ; -- objects of the datablock +object: + O_ID -- unique identifying number + OFFSET -- offset within the datablock + SIZE ; -- size of the object in bytes +value: + argument ; +.TE +.DE +A data block has only one flag: "external", indicating +whether the data label is externally visible. +The syntax for "argument" will be given later on +(see em_text). +.NH 3 +The procedure table +.PP +The procedure table contains global information +about all procedures that are made available +to the optimizer +and that are needed by the EM program. +(Library units may not be needed, see section 3.5). +The table has one entry for +every procedure. +.DS +.UL syntax +.TS +lw(1i) l l. +procedure_table: + {procedure} +procedure: + P_ID -- unique identifying number + #LABELS -- number of instruction labels + #LOCALS -- number of bytes for locals + #FORMALS -- number of bytes for formals + FLAGS -- flag bits + calling -- procedures called by this one + change -- info about global variables changed + use ; -- info about global variables used +calling: + {P_ID} ; -- procedures called +change: + ext -- external variables changed + FLAGS ; +use: + FLAGS ; +ext: + {O_ID} ; -- a set of objects +.TE +.DE +.PP +The number of bytes of formal parameters accessed by +a procedure is determined by the front ends and +passed via a message (parameter message) to the optimizer. +If the front end is not able to determine this number +(e.g. the parameter may be an array of dynamic size or +the procedure may have a variable number of arguments) the attribute +contains the value 'UNKNOWN_SIZE'. +.sp 0 +A procedure has the following flags: +.IP - +external: true if the proc. is externally visible +.IP - +bodyseen: true if its code is available as EM text +.IP - +calunknown: true if it calls a procedure that has its bodyseen +flag not set +.IP - +environ: true if it uses or changes a (non-global) variable in +a lexically enclosing procedure +.IP - +lpi: true if is used as operand of an lpi instruction, so +it may be called indirect +.LP +The change and use attributes both have one flag: "indirect", +indicating whether the procedure does a 'use indirect' +or a 'store indirect' (indirect means through a pointer). +.NH 3 +The EM text +.PP +The EM text contains the EM instructions. +Every EM instruction has an operation code (opcode) +and 0 or 1 operands. +EM pseudo instructions can have more than +1 operand. +The opcode is just a small (8 bit) integer. +.sp +There are several kinds of operands, which we will +refer to as +.UL types. +Many EM instructions can have more than one type of operand. +The types and their encodings in Compact Assembly Language +are discussed extensively in. +.[~[ +keizer architecture +.], section 11.2] +Of special interest is the way numeric values +are represented. +Of prime importance is the machine independency of +the representation. +Ultimately, one could store every integer +just as a string of the characters '0' to '9'. +As doing arithmetic on strings is awkward, +Compact Assembly Language allows several alternatives. +The main idea is to look at the value of the integer. +Integers that fit in 16, 32 or 64 bits are +represented as a row of resp. 2, 4 and 8 bytes, +preceded by an indication of how many bytes are used. +Longer integers are represented as strings; +this is only allowed within pseudo instructions, however. +This concept works very well for target machines +with reasonable word sizes. +At present, most ACK software cannot be used for word sizes +higher than 32 bits, +although the handles for using larger word sizes are +present in the design of the EM code. +In the intermediate code we essentially use the +same ideas. +We allow three representations of integers. +.IP - +integers that fit in a short are represented as a short +.IP - +integers that fit in a long but not in a short are represented +as longs +.IP - +all remaining integers are represented as strings +(only allowed in pseudos). +.LP +The terms short and long are defined in +.[~[ +ritchie reference manual programming language +.], section 4] +and depend only on the source machine +(i.e. the machine on which ACK runs), +not on the target machines. +For historical reasons a long will often be called an +.UL offset. +.PP +Operands can also be instruction labels, +objects or procedures. +Instruction labels are denoted by a +.UL label +.UL identifier, +which can be distinguished from a normal identifier. +.sp +The operand of a pseudo instruction can be a list of +.UL arguments. +Arguments can have the same type as operands, except +for the type short, which is not used for arguments. +Furthermore, an argument can be a string or +a string representation of a signed integer, unsigned integer +or floating point number. +If the number of arguments is not fully determined by +the pseudo instruction (e.g. a ROM pseudo can have any number +of arguments), then the list is terminated by a special +argument of type CEND. +.DS +.UL syntax +.TS +lw(1i) l l. +em_text: + {line} ; +line: + INSTR -- opcode + OPTYPE -- operand type + operand ; +operand: + empty | -- OPTYPE = NO + SHORT | -- OPTYPE = SHORT + OFFSET | -- OPTYPE = OFFSET + LAB_ID | -- OPTYPE = INSTRLAB + O_ID | -- OPTYPE = OBJECT + P_ID | -- OPTYPE = PROCEDURE + {argument} ; -- OPTYPE = LIST +argument: + ARGTYPE + arg ; +arg: + empty | -- ARGTYPE = CEND + OFFSET | + LAB_ID | + O_ID | + P_ID | + string | -- ARGTYPE = STRING + const ; -- ARGTYPE = ICON,UCON or FCON +string: + LENGTH -- number of characters + {CHARACTER} ; +const: + SIZE -- number of bytes + string ; -- string representation of (un)signed + -- or floating point constant +.TE +.DE +.NH 3 +The control flow graphs +.PP +Each procedure can be divided +into a number of basic blocks. +A basic block is a piece of code with +no jumps in, except at the beginning, +and no jumps out, except at the end. +.PP +Every basic block has a set of +.UL successors, +which are basic blocks that can follow it immediately in +the dynamic execution sequence. +The +.UL predecessors +are the basic blocks of which this one +is a successor. +The successor and predecessor attributes +of all basic blocks of a single procedure +are said to form the +.UL control +.UL flow +.UL graph +of that procedure. +.PP +Another important attribute is the +.UL immediate +.UL dominator. +A basic block B dominates a block C if +every path in the graph from the procedure entry block +to C goes through B. +The immediate dominator of C is the closest dominator +of C on any path from the entry block. +(Note that the dominator relation is transitive, +so the immediate dominator is well defined.) +.PP +A basic block also has an attribute containing +the identifiers of every +.UL loop +that the block belongs to (see next section for loops). +.DS +.UL syntax +.TS +lw(1i) l l. +control_flow_graph: + {basic_block} ; +basic_block: + B_ID -- unique identifying number + #INSTR -- number of EM instructions + succ + pred + idom -- immediate dominator + loops -- set of loops + FLAGS ; -- flag bits +succ: + {B_ID} ; +pred: + {B_ID} ; +idom: + B_ID ; +loops: + {LP_ID} ; +.TE +.DE +The flag bits can have the values 'firm' and 'strong', +which are explained below. +.NH 3 +The loop tables +.PP +Every procedure has an associated +.UL loop +.UL table +containing information about all the loops +in the procedure. +Loops can be detected by a close inspection of +the control flow graph. +The main idea is to look for two basic blocks, +B and C, for which the following holds: +.IP - +B is a successor of C +.IP - +B is a dominator of C +.LP +B is called the loop +.UL entry +and C is called the loop +.UL end. +Intuitively, C contains a jump backwards to +the beginning of the loop (B). +.PP +A loop L1 is said to be +.UL nested +within loop L2 if all basic blocks of L1 +are also part of L2. +It is important to note that loops could +originally be written as a well structured for -or +while loop or as a messy goto loop. +Hence loops may partly overlap without one +being nested inside the other. +The +.UL nesting +.UL level +of a loop is the number of loops in +which it is nested (so it is 0 for +an outermost loop). +The details of loop detection will be discussed later. +.PP +It is often desirable to know whether a +basic block gets executed during every iteration +of a loop. +This leads to the following definitions: +.IP - +A basic block B of a loop L is said to be a \fIfirm\fR block +of L if B is executed on all successive iterations of L, +with the only possible exception of the last iteration. +.IP - +A basic block B of a loop L is said to be a \fIstrong\fR block +of L if B is executed on all successive iterations of L. +.LP +Note that a strong block is also a firm block. +If a block is part of a conditional statement, it is neither +strong nor firm, as it may be skipped during some iterations +(see Fig. 3.2). +.DS +loop + if cond1 then + ... \kx-- this code will not + \h'|\nxu'-- result in a firm or strong block + end if; + ... -- strong (always executed) + exit when cond2; + ... \kx-- firm (not executed on last iteration). +end loop; + +Fig. 3.2 Example of firm and strong block +.DE +.DS +.UL syntax +.TS +lw(1i) l l. +looptable: + {loop} ; +loop: + LP_ID -- unique identifying number + LEVEL -- loop nesting level + entry -- loop entry block + end ; +entry: + B_ID ; +end: + B_ID ; +.TE +.DE diff --git a/doc/ego/ic/ic4 b/doc/ego/ic/ic4 new file mode 100644 index 0000000..b75f13f --- /dev/null +++ b/doc/ego/ic/ic4 @@ -0,0 +1,83 @@ +.NH 2 +External representation of the intermediate code +.PP +The syntax of the intermediate code was given +in the previous section. +In this section we will make some remarks about +the representation of the code in sequential files. +.sp +We use sequential files in order to avoid +the bookkeeping of complex file indices. +As a consequence of this decision +we can't store all components +of the intermediate code +in one file. +If a phase wishes to change some attribute +of a procedure, +or wants to add or delete entire procedures +(inline substitution may do the latter), +the procedure table will only be fully updated +after the entire EM text has been scanned. +Yet, the next phase undoubtedly wants +to read the procedure table before it +starts working on the EM text. +Hence there is an ordering problem, which +can be solved easily by putting the +procedure table in a separate file. +Similarly, the data block table is kept +in a file of its own. +.PP +The control flow graphs (CFGs) could be mixed +with the EM text. +Rather, we have chosen to put them +in a separate file too. +The control flow graph file should be regarded as a +file that imposes some structure on the EM-text file, +just as an overhead sheet containing a picture +of a Flow Chart may be put on an overhead sheet +containing statements. +The loop tables are also put in the CFG file. +A loop imposes an extra structure on the +CFGs and hence on the EM text. +So there are four files: +.IP - +the EM-text file +.IP - +the procedure table file +.IP - +the object table file +.IP - +the CFG and loop tables file +.LP +Every table is preceded by its length, in order to +tell where it ends. +The CFG file also contains the number of instructions of +every basic block, +indicating which part of the EM text belongs +to that block. +.DS +.UL syntax +.TS +lw(1i) l l. +intermediate_code: + object_table_file + proctable_file + em_text_file + cfg_file ; +object_table_file: + LENGTH -- number of objects + object_table ; +proctable_file: + LENGTH -- number of procedures + procedure_table ; +em_text_file: + em_text ; +cfg_file: + {per_proc} ; -- one for every procedure +per_proc: + BLENGTH -- number of basic blocks + LLENGTH -- number of loops + control_flow_graph + looptable ; +.TE +.DE diff --git a/doc/ego/ic/ic5 b/doc/ego/ic/ic5 new file mode 100644 index 0000000..eb91bd5 --- /dev/null +++ b/doc/ego/ic/ic5 @@ -0,0 +1,166 @@ +.NH 2 +The Intermediate Code construction phase +.PP +The first phase of the global optimizer, +called +.UL IC, +constructs a major part of the intermediate code. +To be specific, it produces: +.IP - +the EM text +.IP - +the object table +.IP - +part of the procedure table +.LP +The calling, change and use attributes of a procedure +and all its flags except the external and bodyseen flags +are computed by the next phase (Control Flow phase). +.PP +As explained before, +the intermediate code does not contain +any names of variables or procedures. +The normal identifiers are replaced by identifying +numbers. +Yet, the output of the global optimizer must +contain normal identifiers, as this +output is in Compact Assembly Language format. +We certainly want all externally visible names +to be the same in the input as in the output, +because the optimized EM module may be a library unit, +used by other modules. +IC dumps the names of all procedures and data labels +on two files: +.IP - +the procedure dump file, containing tuples (P_ID, procedure name) +.IP - +the data dump file, containing tuples (D_ID, data label name) +.LP +The names of instruction labels are not dumped, +as they are not visible outside the procedure +in which they are defined. +.PP +The input to IC consists of one or more files. +Each file is either an EM module in Compact Assembly Language +format, or a Unix archive file (library) containing such modules. +IC only extracts those modules from a library that are +needed somehow, just as a linker does. +It is advisable to present as much code +of the EM program as possible to the optimizer, +although it is not required to present the whole program. +If a procedure is called somewhere in the EM text, +but its body (text) is not included in the input, +its bodyseen flag in the procedure table will still +be off. +Whenever such a procedure is called, +we assume the worst case for everything; +it will change and use all variables it has access to, +it will call every procedure etc. +.sp +Similarly, if a data label is used +but not defined, the PSEUDO attribute in its data block +will be set to UNKNOWN. +.NH 3 +Implementation +.PP +Part of the code for the EM Peephole Optimizer +.[ +staveren peephole toplass +.] +has been used for IC. +Especially the routines that read and unravel +Compact Assembly Language and the identifier +lookup mechanism have been used. +New code was added to recognize objects, +build the object and procedure tables and to +output the intermediate code. +.PP +IC uses singly linked linear lists for both the +procedure and object table. +Hence there are no limits on the size of such +a table (except for the trivial fact that it must fit +in main memory). +Both tables are outputted after all EM code has +been processed. +IC reads the EM text of one entire procedure +at a time, +processes it and appends the modified code to +the EM text file. +EM code is represented internally as a doubly linked linear +list of EM instructions. +.PP +Objects are recognized by looking at the operands +of instructions that reference global data. +If we come across the instructions: +.DS +.TS +l l. +LDE X+6 -- Load Double External +LAE X+20 -- Load Address External +.TE +.DE +we conclude that the data block +preceded by the data label X contains an object +at offset 6 of size twice the word size, +and an object at offset 20 of unknown size. +.sp +A data block entry of the object table is allocated +at the first reference to a data label. +If this reference is a defining occurrence +or a INA pseudo instruction, +the label is not externally visible +.[~[ +keizer architecture +.], section 11.1.4.3] +In this case, the external flag of the data block +is turned off. +If the first reference is an applied occurrence +or a EXA pseudo instruction, the flag is set. +We record this information, because the +optimizer may change the order of defining and +applied occurrences. +The INA and EXA pseudos are removed from the EM text. +They may be regenerated by the last phase +of the optimizer. +.sp +Similar rules hold for the procedure table +and the INP and EXP pseudos. +.NH 3 +Source files of IC +.PP +The source files of IC consist +of the files ic.c, ic.h and several packages. +.UL ic.h +contains type definitions, macros and +variable declarations that may be used by +ic.c and by every package. +.UL ic.c +contains the definitions of these variables, +the procedure +.UL main +and some high level I/O routines used by main. +.sp +Every package xxx consists of two files. +ic_xxx.h contains type definitions, +macros, variable declarations and +procedure declarations that may be used by +every .c file that includes this .h file. +The file ic_xxx.c provides the +definitions of these variables and +the implementation of the declared procedures. +IC uses the following packages: +.IP lookup: 18 +procedures that loop up procedure, data label +and instruction label names; procedures to dump +the procedure and data label names. +.IP lib: +one procedure that gets the next useful input module; +while scanning archives, it skips unnecessary modules. +.IP aux: +several auxiliary routines. +.IP io: +low-level I/O routines that unravel the Compact +Assembly Language. +.IP put: +routines that output the intermediate code +.LP diff --git a/doc/ego/il/il1 b/doc/ego/il/il1 new file mode 100644 index 0000000..5bc33e6 --- /dev/null +++ b/doc/ego/il/il1 @@ -0,0 +1,112 @@ +.bp +.NH 1 +Inline substitution +.NH 2 +Introduction +.PP +The Inline Substitution technique (IL) +tries to decrease the overhead associated +with procedure calls (invocations). +During a procedure call, several actions +must be undertaken to set up the right +environment for the called procedure. +.[ +johnson calling sequence +.] +On return from the procedure, most of these +effects must be undone. +This entire process introduces significant +costs in execution time as well as +in object code size. +.PP +The inline substitution technique replaces +some of the calls by the modified body of +the called procedure, hence eliminating +the overhead. +Furthermore, as the calling and called procedure +are now integrated, they can be optimized +together, using other techniques of the optimizer. +This often leads to extra opportunities for +optimization +.[ +ball predicting effects +.] +.[ +carter code generation cacm +.] +.[ +scheifler inline cacm +.] +.PP +An inline substitution of a call to a procedure P increases +the size of the program, unless P is very small or P is +called only once. +In the latter case, P can be eliminated. +In practice, procedures that are called only once occur +quite frequently, due to the +introduction of structured programming. +(Carter +.[ +carter umi ann arbor +.] +states that almost 50% of the Pascal procedures +he analyzed were called just once). +.PP +Scheifler +.[ +scheifler inline cacm +.] +has a more general view of inline substitution. +In his model, the program under consideration is +allowed to grow by a certain amount, +i.e. code size is sacrificed to speed up the program. +The above two cases are just special cases of +his model, obtained by setting the size-change to +(approximately) zero. +He formulates the substitution problem as follows: +.IP +"Given a program, a subset of all invocations, +a maximum program size, and a maximum procedure size, +find a sequence of substitutions that minimizes +the expected execution time." +.LP +Scheifler shows that this problem is NP-complete +.[~[ +aho hopcroft ullman analysis algorithms +.], chapter 10] +by reduction to the Knapsack Problem. +Heuristics will have to be used to find a near-optimal +solution. +.PP +In the following chapters we will extend +Scheifler's view and adapt it to the EM Global Optimizer. +We will first describe the transformations that have +to be applied to the EM text when a call is substituted +in line. +Next we will examine in which cases inline substitution +is not possible or desirable. +Heuristics will be developed for +chosing a good sequence of substitutions. +These heuristics make no demand on the user +(such as making profiles +.[ +scheifler inline cacm +.] +or giving pragmats +.[~[ +ichbiah ada military standard +.], section 6.3.2]), +although the model could easily be extended +to use such information. +Finally, we will discuss the implementation +of the IL phase of the optimizer. +.PP +We will often use the term inline expansion +as a synonym of inline substitution. +.sp 0 +The inverse technique of procedure abstraction +(automatic subroutine generation) +.[ +shaffer subroutine generation +.] +will not be discussed in this report. diff --git a/doc/ego/il/il2 b/doc/ego/il/il2 new file mode 100644 index 0000000..ea69b35 --- /dev/null +++ b/doc/ego/il/il2 @@ -0,0 +1,93 @@ +.NH 2 +Parameters and local variables. +.PP +In the EM calling sequence, the calling procedure +pushes its parameters on the stack +before doing the CAL. +The called routine first saves some +status information on the stack and then +allocates space for its own locals +(also on the stack). +Usually, one special purpose register, +the Local Base (LB) register, +is used to access both the locals and the +parameters. +If memory is highly segmented, +the stack frames of the caller and the callee +may be allocated in different fragments; +an extra Argument Base (AB) register is used +in this case to access the actual parameters. +See 4.2 of +.[ +keizer architecture +.] +for further details. +.PP +If a procedure call is expanded in line, +there are two problems: +.IP 1. 3 +No stack frame will be allocated for the called procedure; +we must find another place to put its locals. +.IP 2. +The LB register cannot be used to access the actual +parameters; +as the CAL instruction is deleted, the LB will +still point to the local base of the \fIcalling\fR procedure. +.LP +The local variables of the called procedure will +be put in the stack frame of the calling procedure, +just after its own locals. +The size of the stack frame of the +calling procedure will be increased +during its entire lifetime. +Therefore our model will allow a +limit to be set on the number of bytes +for locals that the called procedure may have +(see next section). +.PP +There are several alternatives to access the parameters. +An actual parameter may be any auxiliary expression, +which we will refer to as +the \fIactual parameter expression\fR. +The value of this expression is stored +in a location on the stack (see above), +the \fIparameter location\fR. +.sp 0 +The alternatives for accessing parameters are: +.IP - +save the value of the stackpointer at the point of the CAL +in a temporary variable X; +this variable can be used to simulate the AB register, i.e. +parameter locations are accessed via an offset to +the value of X. +.IP - +create a new temporary local variable T for +the parameter (in the stack frame of the caller); +every access to the parameter location must be changed +into an access to T. +.IP - +do not evaluate the actual parameter expression before the call; +instead, substitute this expression for every use of the +parameter location. +.LP +The first method may be expensive if X is not +put in a register. +We will not use this method. +The time required to evaluate and access the +parameters when the second method is used +will not differ much from the normal +calling sequence (i.e. not in line call). +It is not expensive, but there are no +extra savings either. +The third method is essentially the 'by name' +parameter mechanism of Algol60. +If the actual parameter is just a numeric constant, +it is advantageous to use it. +Yet, there are several circumstances +under which it cannot or should not be used. +We will deal with this in the next section. +.sp 0 +In general we will use the third method, +if it is possible and desirable. +Such parameters will be called \fIin line parameters\fR. +In all other cases we will use the second method. diff --git a/doc/ego/il/il3 b/doc/ego/il/il3 new file mode 100644 index 0000000..398b4c8 --- /dev/null +++ b/doc/ego/il/il3 @@ -0,0 +1,164 @@ +.NH 2 +Feasibility and desirability analysis +.PP +Feasibility and desirability analysis +of in line substitution differ +somewhat from most other techniques. +Usually, much effort is needed to find +a feasible opportunity for optimization +(e.g. a redundant subexpression). +Desirability analysis then checks +if it is really advantageous to do +the optimization. +For IL, opportunities are easy to find. +To see if an in line expansion is +desirable will not be hard either. +Yet, the main problem is to find the most +desirable ones. +We will deal with this problem later and +we will first attend feasibility and +desirability analysis. +.PP +There are several reasons why a procedure invocation +cannot or should not be expanded in line. +.sp +A call to a procedure P cannot be expanded in line +in any of the following cases: +.IP 1. 3 +The body of P is not available as EM text. +Clearly, there is no way to do the substitution. +.IP 2. +P, or any procedure called by P (transitively), +follows the chain of statically enclosing +procedures (via a LXL or LXA instruction) +or follows the chain of dynamically enclosing +procedures (via a DCH). +If the call were expanded in line, +one level would be removed from the chains, +leading to total chaos. +This chaos could be solved by patching up +every LXL, LXA or DCH in all procedures +that could be part of the chains, +but this is hard to implement. +.IP 3. +P, or any procedure called by P (transitively), +calls a procedure whose body is not +available as EM text. +The unknown procedure may use an LXL, LXA or DCH. +However, in several languages a separately +compiled procedure has no access to the +static or dynamic chain. +In this case +this point does not apply. +.IP 4. +P, or any procedure called by P (transitively), +uses the LPB instruction, which converts a +local base to an argument base; +as the locals and parameters are stored +in a non-standard way (differing from the +normal EM calling sequence) this instruction +would yield incorrect results. +.IP 5. +The total number of bytes of the parameters +of P is not known. +P may be a procedure with a variable number +of parameters or may have an array of dynamic size +as value parameter. +.LP +It is undesirable to expand a call to a procedure P in line +in any of the following cases: +.IP 1. 3 +P is large, i.e. the number of EM instructions +of P exceeds some threshold. +The expanded code would be large too. +Furthermore, several programs in ACK, +including the global optimizer itself, +may run out of memory if they they have to run +in a small address space and are provided +very large procedures. +The threshold may be set to infinite, +in which case this point does not apply. +.IP 2. +P has many local variables. +All these variables would have to be allocated +in the stack frame of the calling procedure. +.PP +If a call may be expanded in line, we have to +decide how to access its parameters. +In the previous section we stated that we would +use in line parameters whenever possible and desirable. +There are several reasons why a parameter +cannot or should not be expanded in line. +.sp +No parameter of a procedure P can be expanded in line, +in any of the following cases: +.IP 1. 3 +P, or any procedure called by P (transitively), +does a store-indirect or a use-indirect (i.e. through +a pointer). +However, if the front-end has generated messages +telling that certain parameters can not be accessed +indirectly, those parameters may be expanded in line. +.IP 2. +P, or any procedure called by P (transitively), +calls a procedure whose body is not available as EM text. +The unknown procedure may do a store-indirect +or a use-indirect. +However, the same remark about front-end messages +as for 1. holds here. +.IP 3. +The address of a parameter location is taken (via a LAL). +In the normal calling sequence, all parameters +are stored sequentially. If the address of one +parameter location is taken, the address of any +other parameter location can be computed from it. +Hence we must put every parameter in a temporary location; +furthermore, all these locations must be in +the same order as for the normal calling sequence. +.IP 4. +P has overlapping parameters; for example, it uses +the parameter at offset 10 both as a 2 byte and as a 4 byte +parameter. +Such code may be produced by the front ends if +the formal parameter is of some record type +with variants. +.PP +Sometimes a specific parameter must not be expanded in line. +.sp 0 +An actual parameter expression cannot be expanded in line +in any of the following cases: +.IP 1. 3 +P stores into the parameter location. +Even if the actual parameter expression is a simple +variable, it is incorrect to change the 'store into +formal' into a 'store into actual', because of +the parameter mechanism used. +In Pascal, the following expansion is incorrect: +.DS +procedure p (x:integer); +begin + x := 20; +end; +\&... +a := 10; \kxa := 10; +p(a); ---> \h'|\nxu'a := 20; +write(a); \h'|\nxu'write(a); +.DE +.IP 2. +P changes any of the operands of the +actual parameter expression. +If the expression is expanded and evaluated +after the operand has been changed, +the wrong value will be used. +.IP 3. +The actual parameter expression has side effects. +It must be evaluated only once, +at the place of the call. +.LP +It is undesirable to expand an actual parameter in line +in the following case: +.IP 1. 3 +The parameter is used more than once +(dynamically) and the actual parameter expression +is not just a simple variable or constant. +.LP diff --git a/doc/ego/il/il4 b/doc/ego/il/il4 new file mode 100644 index 0000000..8ef6858 --- /dev/null +++ b/doc/ego/il/il4 @@ -0,0 +1,135 @@ +.NH 2 +Heuristic rules +.PP +Using the information described +in the previous section, +we can find all calls that can +be expanded in line, and for which +this expansion is desirable. +In general, we cannot expand all these calls, +so we have to choose the 'best' ones. +With every CAL instruction +that may be expanded, we associate +a \fIpay off\fR, +which expresses how desirable it is +to expand this specific CAL. +.sp +Let Tc denote the portion of EM text involved +in a specific call, i.e. the pushing of the actual +parameter expressions, the CAL itself, +the popping of the parameters and the +pushing of the result (if any, via an LFR). +Let Te denote the EM text that would be obtained +by expanding the call in line. +Let Pc be the original program and Pe the program +with Te substituted for Tc. +The pay off of the CAL depends on two factors: +.IP - +T = execution_time(Pe) - execution_time(Pc) +.IP - +S = code_size(Pe) - code_size(Pc) +.LP +The change in execution time (T) depends on: +.IP - +T1 = execution_time(Te) - execution_time(Tc) +.IP - +N = number of times Te or Tc get executed. +.LP +We assume that T1 will be the same every +time the code gets executed. +This is a reasonable assumption. +(Note that we are talking about one CAL, +not about different calls to the same procedure). +Hence +.DS +T = N * T1 +.DE +T1 can be estimated by a careful analysis +of the transformations that are performed. +Below, we list everything that will be +different when a call is expanded in line: +.IP - +The CAL instruction is not executed. +This saves a subroutine jump. +.IP - +The instructions in the procedure prolog +are not executed. +These instructions, generated from the PRO pseudo, +save some machine registers +(including the old LB), set the new LB and allocate space +for the locals of the called routine. +The savings may be less if there are no +locals to allocate. +.IP - +In line parameters are not evaluated before the call +and are not pushed on the stack. +.IP - +All remaining parameters are stored in local variables, +instead of being pushed on the stack. +.IP - +If the number of parameters is nonzero, +the ASP instruction after the CAL is not executed. +.IP - +Every reference to an in line parameter is +substituted by the parameter expression. +.IP - +RET (return) instructions are replaced by +BRA (branch) instructions. +If the called procedure 'falls through' +(i.e. it has only one RET, at the end of its code), +even the BRA is not needed. +.IP - +The LFR (fetch function result) is not executed +.PP +Besides these changes, which are caused directly by IL, +other changes may occur as IL influences other optimization +techniques, such as Register Allocation and Constant Propagation. +Our heuristic rules do not take into account the quite +inpredictable effects on Register Allocation. +It does, however, favour calls that have numeric \fIconstants\fR +as parameter; especially the constant "0" as an inline +parameter gets high scores, +as further optimizations may often be possible. +.PP +It cannot be determined statically how often a CAL instruction gets +executed. +We will use \fIloop nesting\fR information here. +The nesting level of the loop in which +the CAL appears (if any) will be used as an +indication for the number of times it gets executed. +.PP +Based on all these facts, +the pay off of a call will be computed. +The following model was developed empirically. +Assume procedure P calls procedure Q. +The call takes place in basic block B. +.DS +.TS +l l l. +ZP \&= # zero parameters +CP \&= # constant parameters - ZP +LN \&= Loop Nesting level (0 if outside any loop) +F \&= \fIif\fR # formal parameters of Q > 0 \fIthen\fR 1 \fIelse\fR 0 +FT \&= \fIif\fR Q falls through \fIthen\fR 1 \fIelse\fR 0 +S \&= size(Q) - 1 - # inline_parameters - F +L \&= \fIif\fR # local variables of P > 0 \fIthen\fR 0 \fIelse\fR -1 +A \&= CP + 2 * ZP +N \&= \fIif\fR LN=0 and P is never called from a loop \fIthen\fR 0 \fIelse\fR (LN+1)**2 +FM \&= \fIif\fR B is a firm block \fIthen\fR 2 \fIelse\fR 1 + +pay_off \&= (100/S + FT + F + L + A) * N * FM +.TE +.DE +S stands for the size increase of the program, +which is slightly less than the size of Q. +The size of a procedure is taken to be its number +of (non-pseudo) EM instructions. +The terms "loop nesting level" and "firm" were defined +in the chapter on the Intermediate Code (section "loop tables"). +If a call is not inside a loop and the calling procedure +is itself never called from a loop (transitively), +then the call will probably be executed at most once. +Such a call is never expanded in line (its pay off is zero). +If the calling procedure doesn't have local variables, a penalty (L) +is introduced, as it will most likely get local variables if the +call gets expanded. diff --git a/doc/ego/il/il5 b/doc/ego/il/il5 new file mode 100644 index 0000000..2c434de --- /dev/null +++ b/doc/ego/il/il5 @@ -0,0 +1,446 @@ +.NH 2 +Implementation +.PP +A major factor in the implementation +of Inline Substitution is the requirement +not to use an excessive amount of memory. +IL essentially analyzes the entire program; +it makes decisions based on which procedure calls +appear in the whole program. +Yet, because of the memory restriction, it is +not feasible to read the entire program +in main memory. +To solve this problem, the IL phase has been +split up into three subphases that are executed sequentially: +.IP 1. +analyze every procedure; see how it accesses its parameters; +simultaneously collect all calls +appearing in the whole program an put them +in a \fIcall-list\fR. +.IP 2. +use the call-list and decide which calls will be substituted +in line. +.IP 3. +take the decisions of subphase 2 and modify the +program accordingly. +.LP +Subphases 1 and 3 scan the input program; only +subphase 3 modifies it. +It is essential that the decisions can be made +in subphase 2 +without using the input program, +provided that subphase 1 puts enough information +in the call-list. +Subphase 2 keeps the entire call-list in main memory +and repeatedly scans it, to +find the next best candidate for expansion. +.PP +We will specify the +data structures used by IL before +describing the subphases. +.NH 3 +Data structures +.NH 4 +The procedure table +.PP +In subphase 1 information is gathered about every procedure +and added to the procedure table. +This information is used by the heuristic rules. +A proctable entry for procedure p has +the following extra information: +.IP - +is it allowed to substitute an invocation of p in line? +.IP - +is it allowed to put any parameter of such a call in line? +.IP - +the size of p (number of EM instructions) +.IP - +does p 'fall through'? +.IP - +a description of the formal parameters that p accesses; this information +is obtained by looking at the code of p. For every parameter f, +we record: +.RS +.IP - +the offset of f +.IP - +the type of f (word, double word, pointer) +.IP - +may the corresponding actual parameter be put in line? +.IP - +is f ever accessed indirectly? +.IP - +if f used: never, once or more than once? +.RE +.IP - +the number of times p is called (see below) +.IP - +the file address of its call-count information (see below). +.LP +.NH 4 +Call-count information +.PP +As a result of Inline Substitution, some procedures may +become useless, because all their invocations have been +substituted in line. +One of the tasks of IL is to keep track which +procedures are no longer called. +Note that IL is especially keen on procedures that are +called only once +(possibly as a result of expanding all other calls to it). +So we want to know how many times a procedure +is called \fIduring\fR Inline Substitution. +It is not good enough to compute this +information afterwards. +The task is rather complex, because +the number of times a procedure is called +varies during the entire process: +.IP 1. +If a call to p is substituted in line, +the number of calls to p gets decremented by 1. +.IP 2. +If a call to p is substituted in line, +and p contains n calls to q, then the number of calls to q +gets incremented by n. +.IP 3. +If a procedure p is removed (because it is no +longer called) and p contains n calls to q, +then the number of calls to q gets decremented by n. +.LP +(Note that p may be the same as q, if p is recursive). +.sp 0 +So we actually want to have the following information: +.DS +NRCALL(p,q) = number of call to q appearing in p, + +for all procedures p and q that may be put in line. +.DE +This information, called \fIcall-count information\fR is +computed by the first subphase. +It is stored in a file. +It is represented as a number of lists, rather than as +a (very sparse) matrix. +Every procedure has a list of (proc,count) pairs, +telling which procedures it calls, and how many times. +The file address of its call-count list is stored +in its proctable entry. +Whenever this information is needed, it is fetched from +the file, using direct access. +The proctable entry also contains the number of times +a procedure is called, at any moment. +.NH 4 +The call-list +.PP +The call-list is the major data structure use by IL. +Every item of the list describes one procedure call. +It contains the following attributes: +.IP - +the calling procedure (caller) +.IP - +the called procedure (callee) +.IP - +identification of the CAL instruction (sequence number) +.IP - +the loop nesting level; our heuristic rules appreciate +calls inside a loop (or even inside a loop nested inside +another loop, etc.) more than other calls +.IP - +the actual parameter expressions involved in the call; +for every actual, we record: +.RS +.IP - +the EM code of the expression +.IP - +the number of bytes of its result (size) +.IP - +an indication if the actual may be put in line +.RE +.LP +The structure of the call-list is rather complex. +Whenever a call is expanded in line, new calls +will suddenly appear in the program, +that were not contained in the original body +of the calling subroutine. +These calls are inherited from the called procedure. +We will refer to these invocations as \fInested calls\fR +(see Fig. 5.1). +.DS +.TS +lw(2.5i) l. +procedure p is +begin . + a(); . + b(); . +end; +.TE + +.TS +lw(2.5i) l. +procedure r is procedure r is +begin begin + x(); x(); + p(); -- in line a(); -- nested call + y(); b(); -- nested call +end; y(); + end; +.TE + +Fig. 5.1 Example of nested procedure calls +.DE +Nested calls may subsequently be put in line too +(probably resulting in a yet deeper nesting level, etc.). +So the call-list does not always reflect the source program, +but changes dynamically, as decisions are made. +If a call to p is expanded, all calls appearing in p +will be added to the call-list. +.sp 0 +A convenient and elegant way to represent +the call-list is to use a LISP-like list. +.[ +poel lisp trac +.] +Calls that appear at the same level +are linked in the CDR direction. If a call C +to a procedure p is expanded, +all calls appearing in p are put in a sub-list +of C, i.e. in its CAR. +In the example above, before the decision +to expand the call to p is made, the +call-list of procedure r looks like: +.DS +(call-to-x, call-to-p, call-to-y) +.DE +After the decision, it looks like: +.DS +(call-to-x, (call-to-p*, call-to-a, call-to-b), call-to-y) +.DE +The call to p is marked, because it has been +substituted. +Whenever IL wants to traverse the call-list of some procedure, +it uses the well-known LISP technique of +recursion in the CAR direction and +iteration in the CDR direction +(see page 1.19-2 of +.[ +poel lisp trac +.] +). +All list traversals look like: +.DS +traverse(list) +{ + for (c = first(list); c != 0; c = CDR(c)) { + if (c is marked) { + traverse(CAR(c)); + } else { + do something with c + } + } +} +.DE +The entire call-list consists of a number of LISP-like lists, +one for every procedure. +The proctable entry of a procedure contains a pointer +to the beginning of the list. +.NH 3 +The first subphase: procedure analysis +.PP +The tasks of the first subphase are to determine +several attributes of every procedure +and to construct the basic call-list, +i.e. without nested calls. +The size of a procedure is determined +by simply counting its EM instructions. +Pseudo instructions are skipped. +A procedure does not 'fall through' if its CFG +contains a basic block +that is not the last block of the CFG and +that ends on a RET instruction. +The formal parameters of a procedure are determined +by inspection of +its code. +.PP +The call-list in constructed by looking at all CAL instructions +appearing in the program. +The call-list should only contain calls to procedures +that may be put in line. +This fact is only known if the procedure was +analyzed earlier. +If a call to a procedure p appears in the program +before the body of p, +the call will always be put in the call-list. +If p is later found to be unsuitable, +the call will be removed from the list by the +second subphase. +.PP +An important issue is the recognition +of the actual parameter expressions of the call. +The front ends produces messages telling how many +bytes of formal parameters every procedure accesses. +(If there is no such message for a procedure, it +cannot be put in line). +The actual parameters together must account for +the same number of bytes.A recursive descent parser is used +to parse side-effect free EM expressions. +It uses a table and some +auxiliary routines to determine +how many bytes every EM instruction pops from the stack +and how many bytes it pushes onto the stack. +These numbers depend on the EM instruction, its argument, +and the wordsize and pointersize of the target machine. +Initially, the parser has to recognize the +number of bytes specified in the formals-message, +say N. +Assume the first instruction before the CAL pops S bytes +and pushes R bytes. +If R > N, too many bytes are recognized +and the parser fails. +Else, it calls itself recursively to recognize the +S bytes used as operand of the instruction. +If it succeeds in doing so, it continues with the next instruction, +i.e. the first instruction before the code recognized by +the recursive call, to recognize N-R more bytes. +The result is a number of EM instructions that collectively push N bytes. +If an instruction is come across that has side-effects +(e.g. a store or a procedure call) or of which R and S cannot +be computed statically (e.g. a LOS), it fails. +.sp 0 +Note that the parser traverses the code backwards. +As EM code is essentially postfix code, the parser works top down. +.PP +If the parser fails to recognize the parameters, the call will not +be substituted in line. +If the parameters can be determined, they still have to +match the formal parameters of the called procedure. +This check is performed by the second subphase; it cannot be +done here, because it is possible that the called +procedure has not been analyzed yet. +.PP +The entire call-list is written to a file, +to be processed by the second subphase. +.NH 3 +The second subphase: making decisions +.PP +The task of the second subphase is quite easy +to understand. +It reads the call-list file, +builds an incore call-list and deletes every +call that may not be expanded in line (either because the called +procedure may not be put in line, or because the actual parameters +of the call do not match the formal parameters of the called procedure). +It assigns a \fIpay-off\fR to every call, +indicating how desirable it is to expand it. +.PP +The subphase repeatedly scans the call-list and takes +the call with the highest ratio. +The chosen one gets marked, +and the call-list is extended with the nested calls, +as described above. +These nested calls are also assigned a ratio, +and will be considered too during the next scans. +.sp 0 +After every decision the number of times +every procedure is called is updated, using +the call-count information. +Meanwhile, the subphase keeps track of the amount of space left +available. +If all space is used, or if there are no more calls left to +be expanded, it exits this loop. +Finally, calls to procedures that are called only +once are also chosen. +.PP +The actual parameters of a call are only needed by +this subphase to assign a ratio to a call. +To save some space, these actuals are not kept in main memory. +They are removed after the call has been read and a ratio +has been assigned to it. +So this subphase works with \fIabstracts\fR of calls. +After all work has been done, +the actual parameters of the chosen calls are retrieved +from a file, +as they are needed by the transformation subphase. +.NH 3 +The third subphase: doing transformations +.PP +The third subphase makes the actual modifications to +the EM text. +It is directed by the decisions made in the previous subphase, +as expressed via the call-list. +The call-list read by this subphase contains +only calls that were selected for expansion. +The list is ordered in the same way as the EM text, +i.e. if a call C1 appears before a call C2 in the call-list, +C1 also appears before C2 in the EM text. +So the EM text is traversed linearly, +the calls that have to be substituted are determined +and the modifications are made. +If a procedure is come across that is no longer needed, +it is simply not written to the output EM file. +The substitution of a call takes place in distinct steps: +.IP "change the calling sequence" 7 +.sp 0 +The actual parameter expressions are changed. +Parameters that are put in line are removed. +All remaining ones must store their result in a +temporary local variable, rather than +push it on the stack. +The CAL instruction and any ASP (to pop actual parameters) +or LFR (to fetch the result of a function) +are deleted. +.IP "fetch the text of the called procedure" +.sp 0 +Direct disk access is used to to read the text of the +called procedure. +The file offset is obtained from the proctable entry. +.IP "allocate bytes for locals and temporaries" +.sp 0 +The local variables of the called procedure will be put in the +stack frame of the calling procedure. +The same applies to any temporary variables +that hold the result of parameters +that were not put in line. +The proctable entry of the caller is updated. +.IP "put a label after the CAL" +.sp 0 +If the called procedure contains a RET (return) instruction +somewhere in the middle of its text (i.e. it does +not fall through), the RET must be changed into +a BRA (branch), to jump over the +remainder of the text. +This label is not needed if the called +procedure falls through. +.IP "copy the text of the called procedure and modify it" +.sp 0 +References to local variables of the called routine +and to parameters that are not put in line +are changed to refer to the +new local of the caller. +References to in line parameters are replaced +by the actual parameter expression. +Returns (RETs) are either deleted or +replaced by a BRA. +Messages containing information about local +variables or parameters are changed. +Global data declarations and the PRO and END pseudos +are removed. +Instruction labels and references to them are +changed to make sure they do not have the +same identifying number as +labels in the calling procedure. +.IP "insert the modified text" +.sp 0 +The pseudos of the called procedure are put after the pseudos +of the calling procedure. +The real text of the callee is put at +the place where the CAL was. +.IP "take care of nested substitutions" +.sp 0 +The expanded procedure may contain calls that +have to be expanded too (nested calls). +If the descriptor of this call contains actual +parameter expressions, +the code of the expressions has to be changed +the same way as the code of the callee was changed. +Next, the entire process of finding CALs and doing +the substitutions is repeated recursively. +.LP diff --git a/doc/ego/il/il6 b/doc/ego/il/il6 new file mode 100644 index 0000000..a7e37a4 --- /dev/null +++ b/doc/ego/il/il6 @@ -0,0 +1,27 @@ +.NH 2 +Source files of IL +.PP +The sources of IL are in the following files +and packages (the prefixes 1_, 2_ and 3_ refer to the three subphases): +.IP il.h: 14 +declarations of global variables and +data structures +.IP il.c: +the routine main; the driving routines of the three subphases +.IP 1_anal: +contains a subroutine that analyzes a procedure +.IP 1_cal: +contains a subroutine that analyzes a call +.IP 1_aux: +implements auxiliary procedures used by subphase 1 +.IP 2_aux: +implements auxiliary procedures used by subphase 2 +.IP 3_subst: +the driving routine for doing the substitution +.IP 3_change: +lower level routines that do certain modifications +.IP 3_aux: +implements auxiliary procedures used by subphase 3 +.IP aux: +implements auxiliary procedures used by several subphases. +.LP diff --git a/doc/ego/intro/head b/doc/ego/intro/head new file mode 100644 index 0000000..ccc710b --- /dev/null +++ b/doc/ego/intro/head @@ -0,0 +1,10 @@ +.ND +.\".ll 80m +.\".nr LL 80m +.\".nr tl 78m +.tr ~ +.ds >. . +.ds >, , +.ds [. " [ +.ds .] ] +.cs 5 22 diff --git a/doc/ego/intro/intro1 b/doc/ego/intro/intro1 new file mode 100644 index 0000000..de7a5ae --- /dev/null +++ b/doc/ego/intro/intro1 @@ -0,0 +1,79 @@ +.TL +The design and implementation of +the EM Global Optimizer +.AU +H.E. Bal +.AI +Vrije Universiteit +Wiskundig Seminarium, Amsterdam +.AB +The EM Global Optimizer is part of the Amsterdam Compiler Kit, +a toolkit for making retargetable compilers. +It optimizes the intermediate code common to all compilers of +the toolkit (EM), +so it can be used for all programming languages and +all processors supported by the kit. +.PP +The optimizer is based on well-understood concepts like +control flow analysis and data flow analysis. +It performs the following optimizations: +Inline Substitution, Strength Reduction, Common Subexpression Elimination, +Stack Pollution, Cross Jumping, Branch Optimization, Copy Propagation, +Constant Propagation, Dead Code Elimination and Register Allocation. +.PP +This report describes the design of the optimizer and several +of its implementation issues. +.AE +.bp +.NH 1 +Introduction +.PP +.FS +This work was supported by the +Stichting Technische Wetenschappen (STW) +under grant VWI00.0001. +.FE +The EM Global Optimizer is part of a software toolkit +for making production-quality retargetable compilers. +This toolkit, +called the Amsterdam Compiler Kit +.[ +tanenbaum toolkit rapport +.] +.[ +tanenbaum toolkit cacm +.] +runs under the Unix* +.FS +*Unix is a Trademark of Bell Laboratories +.FE +operating system. +.sp 0 +The main design philosophy of the toolkit is to use +a language- and machine-independent +intermediate code, called EM. +.[ +keizer architecture +.] +The basic compilation process can be split up into +two parts. +A language-specific front end translates the source program into EM. +A machine-specific back end transforms EM to assembly code +of the target machine. +.PP +The global optimizer is an optional phase of the +compilation process, and can be used to obtain +machine code of a higher quality. +The optimizer transforms EM-code to better EM-code, +so it comes between the front end and the back end. +It can be used with any combination of languages +and machines, as far as they are supported by +the compiler kit. +.PP +This report describes the design of the +global optimizer and several of its +implementation issues. +Measurements can be found in. +.[ +bal tanenbaum global +.] diff --git a/doc/ego/intro/tail b/doc/ego/intro/tail new file mode 100644 index 0000000..46cced8 --- /dev/null +++ b/doc/ego/intro/tail @@ -0,0 +1,17 @@ +.SH +Acknowledgements +.PP +The author would like to thank Andy Tanenbaum for his guidance, +Duk Bekema for implementing the Common Subexpression Elimination phase +and writing the initial documentation of that phase, +Dick Grune for reading the manuscript of this report +and Ceriel Jacobs, Ed Keizer, Martin Kersten, Hans van Staveren +and the members of the S.T.W. user's group for their +interest and assistance. +.bp +.SH +References +.LP +.[ +$LIST$ +.] diff --git a/doc/ego/lv/lv1 b/doc/ego/lv/lv1 new file mode 100644 index 0000000..7574ca6 --- /dev/null +++ b/doc/ego/lv/lv1 @@ -0,0 +1,95 @@ +.bp +.NH 1 +Live-Variable analysis +.NH 2 +Introduction +.PP +The "Live-Variable analysis" optimization technique (LV) +performs some code improvements and computes information that may be +used by subsequent optimizations. +The main task of this phase is the +computation of \fIlive-variable information\fR. +.[~[ +aho compiler design +.] section 14.4] +A variable A is said to be \fIdead\fR at some point p of the +program text, if on no path in the control flow graph +from p to a RET (return), A can be used before being changed; +else A is said to be \fIlive\fR. +.PP +A statement of the form +.DS +VARIABLE := EXPRESSION +.DE +is said to be dead if the left hand side variable is dead just after +the statement and the right hand side expression has no +side effects (i.e. it doesn't change any variable). +Such a statement can be eliminated entirely. +Dead code will seldom be present in the original program, +but it may be the result of earlier optimizations, +such as copy propagation. +.PP +Live-variable information is passed to other phases via +messages in the EM code. +Live/dead messages are generated at points in the EM text where +variables become dead or live. +This information is especially useful for the Register +Allocation phase. +.NH 2 +Implementation +.PP +The implementation uses algorithm 14.6 of. +.[ +aho compiler design +.] +First two sets DEF and USE are computed for every basic block b: +.IP DEF(b) 9 +the set of all variables that are assigned a value in b before +being used +.IP USE(b) 9 +the set of all variables that may be used in b before being changed. +.LP +(So variables that may, but need not, be used resp. changed via a procedure +call or through a pointer are included in USE but not in DEF). +The next step is to compute the sets IN and OUT : +.IP IN[b] 9 +the set of all variables that are live at the beginning of b +.IP OUT[b] 9 +the set of all variables that are live at the end of b +.LP +IN and OUT can be computed for all blocks simultaneously by solving the +data flow equations: +.DS +(1) IN[b] = OUT[b] - DEF[b] + USE[b] +[2] OUT[b] = IN[s1] + ... + IN[sn] ; + where SUCC[b] = {s1, ... , sn} +.DE +The equations are solved by a similar algorithm as for +the Use Definition equations (see previous chapter). +.PP +Finally, each basic block is visited in turn to remove its dead code +and to emit the live/dead messages. +Every basic block b is traversed from its last +instruction backwards to the beginning of b. +Initially, all variables that are dead at the end +of b are marked dead. All others are marked live. +If we come across an assignment to a variable X that +was marked live, a live-message is put after the +assignment and X is marked dead; +if X was marked dead, the assignment may be removed, provided that +the right hand side expression contains no side effects. +If we come across a use of a variable X that +was marked dead, a dead-message is put after the +use and X is marked live. +So at any point, the mark of X tells whether X is +live or dead immediately before that point. +A message is also generated at the start of a basic block +for every variable that was live at the end of the (textually) +previous block, but dead at the entry of this block, or v.v. +.PP +Only local variables are considered. +This significantly reduces the memory needed by this phase, +eases the implementation and is hardly less efficient than +considering all variables. +(Note that it is very hard to prove that an assignment to +a global variable is dead). diff --git a/doc/ego/ov/ov1 b/doc/ego/ov/ov1 new file mode 100644 index 0000000..78d4326 --- /dev/null +++ b/doc/ego/ov/ov1 @@ -0,0 +1,374 @@ +.bp +.NH 1 +Overview of the global optimizer +.NH 2 +The ACK compilation process +.PP +The EM Global Optimizer is one of three optimizers that are +part of the Amsterdam Compiler Kit (ACK). +The phases of ACK are: +.IP 1. +A Front End translates a source program to EM +.IP 2. +The Peephole Optimizer +.[ +tanenbaum staveren peephole toplass +.] +reads EM code and produces 'better' EM code. +It performs a number of optimizations (mostly peephole +optimizations) +such as constant folding, strength reduction and unreachable code +elimination. +.IP 3. +The Global Optimizer further improves the EM code. +.IP 4. +The Code Generator transforms EM to assembly code +of the target computer. +.IP 5. +The Target Optimizer improves the assembly code. +.IP 6. +An Assembler/Loader generates an executable file. +.LP +For a more extensive overview of the ACK compilation process, +we refer to. +.[ +tanenbaum toolkit rapport +.] +.[ +tanenbaum toolkit cacm +.] +.PP +The input of the Global Optimizer may consist of files and +libraries. +Every file or module in the library must contain EM code in +Compact Assembly Language format. +.[~[ +tanenbaum machine architecture +.], section 11.2] +The output consists of one such EM file. +The input files and libraries together need not +constitute an entire program, +although as much of the program as possible should be supplied. +The more information about the program the optimizer +gets, the better its output code will be. +.PP +The Global Optimizer is language- and machine-independent, +i.e. it can be used for all languages and machines supported by ACK. +Yet, it puts some unavoidable restrictions on the EM code +produced by the Front End (see below). +It must have some knowledge of the target machine. +This knowledge is expressed in a machine description table +which is passed as argument to the optimizer. +This table does not contain very detailed information about the +target (such as its instruction set and addressing modes). +.NH 2 +The EM code +.PP +The definition of EM, the intermediate code of all ACK compilers, +is given in a separate document. +.[ +tanenbaum machine architecture +.] +We will only discuss some features of EM that are most relevant +to the Global Optimizer. +.PP +EM is the assembly code of a virtual \fIstack machine\fR. +All operations are performed on the top of the stack. +For example, the statement "A := B + 3" may be expressed in EM as: +.DS +.TS +l l. +LOL -4 -- push local variable B +LOC 3 -- push constant 3 +ADI 2 -- add two 2-byte items on top of + -- the stack and push the result +STL -2 -- pop A +.TE +.DE +So EM is essentially a \fIpostfix\fR code. +.PP +EM has a rich instruction set, containing several arithmetic +and logical operators. +It also contains special-case instructions (such as INCrement). +.PP +EM has \fIglobal\fR (\fIexternal\fR) variables, accessible +by all procedures and \fIlocal\fR variables, accessible by a few +(nested) procedures. +The local variables of a lexically enclosing procedure may +be accessed via a \fIstatic link\fR. +EM has instructions to follow the static chain. +There are EM instruction to allow a procedure +to access its local variables directly (such as LOL and STL above). +Local variables are referenced via an offset in the stack frame +of the procedure, rather than by their names (e.g. -2 and -4 above). +The EM code does not contain the (source language) type +of the variables. +.PP +All structured statements in the source program are expressed in +low level jump instructions. +Besides conditional and unconditional branch instructions, there are +two case instructions (CSA and CSB), +to allow efficient translation of case statements. +.NH 2 +Requirements on the EM input +.PP +As the optimizer should be useful for all languages, +it clearly should not put severe restrictions on the EM code +of the input. +There is, however, one immovable requirement: +it must be possible to determine the \fIflow of control\fR of the +input program. +As virtually all global optimizations are based on control flow information, +the optimizer would be totally powerless without it. +For this reason we restrict the usage of the case jump instructions (CSA/CSB) +of EM. +Such an instruction is always called with the address of a case descriptor +on top the the stack. +.[~[ +tanenbaum machine architecture +.] section 7.4] +This descriptor contains the labels of all possible +destinations of the jump. +We demand that all case descriptors are allocated in a global +data fragment of type ROM, i.e. the case descriptors +may not be modifyable. +Furthermore, any case instruction should be immediately preceded by +a LAE (Load Address External) instruction, that loads the +address of the descriptor, +so the descriptor can be uniquely identified. +.PP +The optimizer will work improperly if the user deceives the control flow. +We will give two methods to do this. +.PP +In "C" the notorious library routines "setjmp" and "longjmp" +.[ +unix programmer's manual McIlroy +.] +may be used to jump out of a procedure, +but can also be used for a number of other stuffy purposes, +for example, to create an extra entry point in a loop. +.DS + while (condition) { + .... + setjmp(buf); + ... + } + ... + longjmp(buf); +.DE +The invocation to longjmp actually is a jump to the place of +the last call to setjmp with the same argument (buf). +As the calls to setjmp and longjmp are indistinguishable from +normal procedure calls, the optimizer will not see the danger. +No need to say that several loop optimizations will behave +unexpectedly when presented with such pathological input. +.PP +Another way to deceive the flow of control is +by using exception handling routines. +Ada* +.FS +* Ada is a registered trademark of the U.S. Government +(Ada Joint Program Office). +.FE +has clearly recognized the dangers of exception handling, +but other languages (such as PL/I) have not. +.[ +ada rationale +.] +.PP +The optimizer will be more effective if the EM input contains +some extra information about the source program. +Especially the \fIregister message\fR is very important. +These messages indicate which local variables may never be +accessed indirectly. +Most optimizations benefit significantly by this information. +.PP +The Inline Substitution technique needs to know how many bytes +of formal parameters every procedure accesses. +Only calls to procedures for which the EM code contains this information +will be substituted in line. +.NH 2 +Structure of the optimizer +.PP +The Global Optimizer is organized as a number of \fIphases\fR, +each one performing some task. +The main structure is as follows: +.IP IC 6 +the Intermediate Code construction phase transforms EM into the +intermediate code (ic) of the optimizer +.IP CF +the Control Flow phase extends the ic with control flow +information and interprocedural information +.IP OPTs +zero or more optimization phases, each one performing one or +more related optimizations +.IP CA +the Compact Assembly phase generates Compact Assembly Language EM code +out of ic. +.LP +.PP +An important issue in the design of a global optimizer is the +interaction between optimization techniques. +It is often advantageous to combine several techniques in +one algorithm that takes into account all interactions between them. +Ideally, one single algorithm should be developed that does +all optimizations simultaneously and deals with all possible interactions. +In practice, such an algorithm is still far out of reach. +Instead some rather ad hoc (albeit important) combinations are chosen, +such as Common Subexpression Elimination and Register Allocation. +.[ +prabhala sethi common subexpressions +.] +.[ +sethi ullman optimal code +.] +.PP +In the Em Global Optimizer there is one separate algorithm for +every technique. +Note that this does not mean that all techniques are independent +of each other. +.PP +In principle, the optimization phases can be run in any order; +a phase may even be run more than once. +However, the following rules should be obeyed: +.IP - +the Live Variable analysis phase (LV) must be run prior to +Register Allocation (RA), as RA uses information outputted by LV. +.IP - +RA should be the last phase; this is a consequence of the way +the interface between RA and the Code Generator is defined. +.LP +The ordering of the phases has significant impact on +the quality of the produced code. +In +.[ +wulf overview production quality carnegie-mellon +.] +two kinds of phase ordering problems are distinguished. +If two techniques A and B both take away opportunities of each other, +there is a "negative" ordering problem. +If, on the other hand, both A and B introduce new optimization +opportunities for each other, the problem is called "positive". +In the Global Optimizer the following interactions must be +taken into account: +.IP - +Inline Substitution (IL) may create new opportunities for most +other techniques, so it should be run as early as possible +.IP - +Use Definition analysis (UD) may introduce opportunities for LV. +.IP - +Strength Reduction may create opportunities for UD +.LP +The optimizer has a default phase ordering, which can +be changed by the user. +.NH 2 +Structure of this document +.PP +The remaining chapters of this document each describe one +phase of the optimizer. +For every phase, we describe its task, its design, +its implementation, and its source files. +The latter two sections are intended to aid the +maintenance of the optimizer and +can be skipped by the initial reader. +.NH 2 +References +.PP +There are very +few modern textbooks on optimization. +Chapters 12, 13, and 14 of +.[ +aho compiler design +.] +are a good introduction to the subject. +Wulf et. al. +.[ +wulf optimizing compiler +.] +describe one specific optimizing (Bliss) compiler. +Anklam et. al. +.[ +anklam vax-11 +.] +discuss code generation and optimization in +compilers for one specific machine (a Vax-11). +Kirchgaesner et. al. +.[ +optimizing ada compiler +.] +present a brief description of many +optimizations; the report also contains a lengthy (over 60 pages) +bibliography. +.PP +The number of articles on optimization is quite impressive. +The Lowry and Medlock paper on the Fortran H compiler +.[ +object code optimization Lowry Medlock +.] +is a classical one. +Other papers on global optimization are. +.[ +faiman optimizing pascal +.] +.[ +perkins sites +.] +.[ +harrison general purpose optimizing +.] +.[ +morel partial redundancies +.] +.[ +Mintz global optimizer +.] +Freudenberger +.[ +freudenberger setl optimizer +.] +describes an optimizer for a Very High Level Language (SETL). +The Production-Quality Compiler-Compiler (PQCC) project uses +very sophisticated compiler techniques, as described in. +.[ +wulf overview ieee +.] +.[ +wulf overview carnegie-mellon +.] +.[ +wulf machine-relative +.] +.PP +Several Ph.D. theses are dedicated to optimization. +Davidson +.[ +davidson simplifying +.] +outlines a machine-independent peephole optimizer that +improves assembly code. +Katkus +.[ +katkus +.] +describes how efficient programs can be obtained at little cost by +optimizing only a small part of a program. +Photopoulos +.[ +photopoulos mixed code +.] +discusses the idea of generating interpreted intermediate code as well +as assembly code, to obtain programs that are both small and fast. +Shaffer +.[ +shaffer automatic +.] +describes the theory of automatic subroutine generation. +.] +Leverett +.[ +leverett register allocation compilers +.] +deals with register allocation in the PQCC compilers. +.PP +References to articles about specific optimization techniques +will be given in later chapters. diff --git a/doc/ego/proto.make b/doc/ego/proto.make new file mode 100644 index 0000000..0537003 --- /dev/null +++ b/doc/ego/proto.make @@ -0,0 +1,64 @@ +# $Id: proto.make,v 1.2 1994/06/24 10:04:23 ceriel Exp $ + +#PARAMS do not remove this line! + +SRC_DIR = $(SRC_HOME)/doc/ego + +REFS=-p $(SRC_DIR)/refs.opt -p $(SRC_DIR)/refs.stat -p $(SRC_DIR)/refs.gen +REFFILES = $(SRC_DIR)/refs.opt $(SRC_DIR)/refs.stat $(SRC_DIR)/refs.gen +INTRO=$(SRC_DIR)/intro/intro? +OV=$(SRC_DIR)/ov/ov? +IC=$(SRC_DIR)/ic/ic? +CF=$(SRC_DIR)/cf/cf? +IL=$(SRC_DIR)/il/il? +SR=$(SRC_DIR)/sr/sr? +CS=$(SRC_DIR)/cs/cs? +SP=$(SRC_DIR)/sp/sp? +UD=$(SRC_DIR)/ud/ud? +LV=$(SRC_DIR)/lv/lv? +CJ=$(SRC_DIR)/cj/cj? +BO=$(SRC_DIR)/bo/bo? +RA=$(SRC_DIR)/ra/ra? +CA=$(SRC_DIR)/ca/ca? +EGO=$(INTRO) $(OV) $(IC) $(CF) $(IL) $(SR) $(CS) $(SP) $(CJ) $(BO) \ + $(UD) $(LV) $(RA) $(CA) +REFER=refer +TROFF=troff +TBL=tbl +TARGET=-Tlp +HEAD = $(SRC_DIR)/intro/head +TAIL = $(SRC_DIR)/intro/tail + +$(TARGET_HOME)/doc/ego.doc: $(REFFILES) $(HEAD) $(TAIL) $(EGO) + $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(EGO) $(TAIL) | $(TBL) > $(TARGET_HOME)/doc/ego.doc + +ego.f: $(REFFILES) $(HEAD) $(TAIL) $(EGO) + $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(EGO) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > ego.f +intro.f: $(REFFILES) $(HEAD) $(TAIL) $(INTRO) + $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(INTRO) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > intro.f +ov.f: $(REFFILES) $(HEAD) $(TAIL) $(OV) + $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(OV) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > ov.f +ic.f: $(REFFILES) $(HEAD) $(TAIL) $(IC) + $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(IC) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > ic.f +cf.f: $(REFFILES) $(HEAD) $(TAIL) $(CF) + $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(CF) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > cf.f +il.f: $(REFFILES) $(HEAD) $(TAIL) $(IL) + $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(IL) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > il.f +sr.f: $(REFFILES) $(HEAD) $(TAIL) $(SR) + $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(SR) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > sr.f +cs.f: $(REFFILES) $(HEAD) $(TAIL) $(CS) + $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(CS) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > cs.f +sp.f: $(REFFILES) $(HEAD) $(TAIL) $(SP) + $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(SP) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > sp.f +cj.f: $(REFFILES) $(HEAD) $(TAIL) $(CJ) + $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(CJ) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > cj.f +bo.f: $(REFFILES) $(HEAD) $(TAIL) $(BO) + $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(BO) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > bo.f +ud.f: $(REFFILES) $(HEAD) $(TAIL) $(UD) + $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(UD) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > ud.f +lv.f: $(REFFILES) $(HEAD) $(TAIL) $(LV) + $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(LV) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > lv.f +ra.f: $(REFFILES) $(HEAD) $(TAIL) $(RA) + $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(RA) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > ra.f +ca.f: $(REFFILES) $(HEAD) $(TAIL) $(CA) + $(REFER) -sA+T -l4,2 $(REFS) $(HEAD) $(CA) $(TAIL) | $(TBL) | $(TROFF) $(TARGET) -ms > ca.f diff --git a/doc/ego/ra/ra1 b/doc/ego/ra/ra1 new file mode 100644 index 0000000..fb5343f --- /dev/null +++ b/doc/ego/ra/ra1 @@ -0,0 +1,33 @@ +.bp +.NH 1 +Register Allocation +.NH 2 +Introduction +.PP +The efficient usage of the general purpose registers +of the target machine plays a key role in any optimizing compiler. +This subject, often referred to as \fIRegister Allocation\fR, +has great impact on both the code generator and the +optimizing part of such a compiler. +The code generator needs registers for at least the evaluation of +arithmetic expressions; +the optimizer uses the registers to decrease the access costs +of frequently used entities (such as variables). +The design of an optimizing compiler must pay great +attention to the cooperation of optimization, register allocation +and code generation. +.PP +Register allocation has received much attention in literature (see +.[ +leverett register allocation compilers +.] +.[ +chaitin register coloring +.] +.[ +freiburghouse usage counts +.] +and +.[~[ +sites register +.]]). diff --git a/doc/ego/ra/ra2 b/doc/ego/ra/ra2 new file mode 100644 index 0000000..e6dfc13 --- /dev/null +++ b/doc/ego/ra/ra2 @@ -0,0 +1,139 @@ +.NH 2 +Usage of registers in ACK compilers +.PP +We will first describe the major design decisions +of the Amsterdam Compiler Kit, +as far as they concern register allocation. +Subsequently we will outline +the role of the Global Optimizer in the register +allocation process and the interface +between the code generator and the optimizer. +.NH 3 +Usage of registers without the intervention of the Global Optimizer +.PP +Registers are used for two purposes: +.IP 1. +for the evaluation of arithmetic expressions +.IP 2. +to hold local variables, for the duration of the procedure they +are local to. +.LP +It is essential to note that no translation part of the compilers, +except for the code generator, knows anything at all +about the register set of the target computer. +Hence all decisions about registers are ultimately made by +the code generator. +Earlier phases of a compiler can only \fIadvise\fR the code generator. +.PP +The code generator splits the register set into two: +a fixed part for the evaluation of expressions (called \fIscratch\fR +registers) and a fixed part to store local variables. +This partitioning, which depends only on the target computer, significantly +reduces the complexity of register allocation, at the penalty +of some loss of code quality. +.PP +The code generator has some (machine-dependent) knowledge of the access costs +of memory locations and registers and of the costs of saving and +restoring registers. (Registers are always saved by the \fIcalled\fR +procedure). +This knowledge is expressed in a set of procedures for each target machine. +The code generator also knows how many registers there are and of +which type they are. +A register can be of type \fIpointer\fR, \fIfloating point\fR +or \fIgeneral\fR. +.PP +The front ends of the compilers determine which local variables may +be put in a register; +such a variable may never be accessed indirectly (i.e. through a pointer). +The front end also determines the types and sizes of these variables. +The type can be any of the register types or the type \fIloop variable\fR, +which denotes a general-typed variable that is used as loop variable +in a for-statement. +All this information is collected in a \fIregister message\fR in +the EM code. +Such a message is a pseudo EM instruction. +This message also contains a \fIscore\fR field, +indicating how desirable it is to put this variable in a register. +A front end may assign a high score to a variable if it +was declared as a register variable (which is only possible in +some languages, such as "C"). +Any compiler phase before the code generator may change this score field, +if it has reason to do so. +The code generator bases its decisions on the information contained +in the register message, most notably on the score. +.PP +If the global optimizer is not used, +the score fields are set by the Peephole Optimizer. +This optimizer simply counts the number of occurrences +of every local (register) variable and adds this count +to the score provided by the front end. +In this way a simple, yet quite effective +register allocation scheme is achieved. +.NH 3 +The role of the Global Optimizer +.PP +The Global Optimizer essentially tries to improve the scheme +outlined above. +It uses the following principles for this purpose: +.IP - +Entities are not always assigned a register for the duration +of an entire procedure; smaller regions of the program text +may be considered too. +.IP - +several variables may be put in the same register simultaneously, +provided at most one of them is live at any point. +.IP - +besides local variables, other entities (such as constants and addresses of +variables and procedures) may be put in a register. +.IP - +more accurate cost estimates are used. +.LP +To perform its task, the optimizer must have some +knowledge of the target machine. +.NH 3 +The interface between the register allocator and the code generator +.PP +The RA phase of the optimizer must somehow be able to express its +decisions. +Such decisions may look like: 'put constant 1283 in a register from +line 12 to line 40'. +To be precise, RA must be able to tell the code generator to: +.IP - +initialize a register with some value +.IP - +update an entity from a register +.IP - +replace all occurrences of an entity in a certain region +of text by a reference to the register. +.LP +At least three problems occur here: the code generator is only used to +put local variables in registers, +it only assigns a register to a variable for the duration of an entire +procedure and it is not used to have some earlier compiler phase +make all the decisions. +.PP +All problems are solved by one mechanism, that involves no changes +to the code generator. +With every (non-scratch) register R that will be used in +a procedure P, we associate a new variable T, local to P. +The size of T is the same as the size of R. +A register message is generated for T with an exceptionally high score. +The scores of all original register messages are set to zero. +Consequently, the code generator will always assign precisely those new +variables to a register. +If the optimizer wants to put some entity, say the constant 1283, in +a register, it emits the code "T := 1283" and replaces all occurrences +of '1283' by T. +Similarly, it can put the address of a procedure in T and replace all +calls to that procedure by indirect calls. +Furthermore, it can put several different entities in T (and thus in R) +during the lifetime of P. +.PP +In principle, the code generated by the optimizer in this way would +always be valid EM code, even if the optimizer would be presented +a totally wrong description of the target computer register set. +In practice, it would be a waste of data as well as text space to +allocate memory for these new variables, as they will always be assigned +a register (in the correct order of events). +Hence, no memory locations are allocated for them. +For this reason they are called pseudo local variables. diff --git a/doc/ego/ra/ra3 b/doc/ego/ra/ra3 new file mode 100644 index 0000000..d02ed3b --- /dev/null +++ b/doc/ego/ra/ra3 @@ -0,0 +1,386 @@ +.NH 2 +The register allocation phase +.PP +.NH 3 +Overview +.PP +The RA phase deals with one procedure at a time. +For every procedure, it first determines which entities +may be put in a register. Such an entity +is called an \fIitem\fR. +For every item it decides during which parts of the procedure it +might be assigned a register. +Such a region is called a \fItimespan\fR. +For any item, several (possibly overlapping) timespans may +be considered. +A pair (item,timespan) is called an \fIallocation\fR. +If the items of two allocations are both live at some +point of time in the intersections of their timespans, +these allocations are said to be \fIrivals\fR of each other, +as they cannot be assigned the same register. +The rivals-set of every allocation is computed. +Next, the gains of assigning a register to an allocation are estimated, +for every allocation. +With all this information, decisions are made which allocations +to store in which registers (\fIpacking\fR). +Finally, the EM text is transformed to reflect these decisions. +.NH 3 +The item recognition subphase +.PP +RA tries to put the following entities in a register: +.IP - +a local variable for which a register message was found +.IP - +the address of a local variable for which no +register message was found +.IP - +the address of a global variable +.IP - +the address of a procedure +.IP - +a numeric constant. +.LP +Only the \fIaddress\fR of a global variable +may be put in a register, not the variable itself. +This approach avoids the very complex problems that would be +caused by procedure calls and indirect pointer references (see +.[~[ +aho design compiler +.] sections 14.7 and 14.8] +and +.[~[ +spillman side-effects +.]]). +Still, on most machines accessing a global variable using indirect +addressing through a register is much cheaper than +accessing it via its address. +Similarly, if the address of a procedure is put in a register, the +procedure can be called via an indirect call. +.PP +With every item we associate a register type. +This type is +.DS +for local variables: the type contained in the register message +for addresses of variables and procedures: the pointer type +for constants: the general type +.DE +An entity other than a local variable is not taken to be an item +if it is used only once within the current procedure. +.PP +An item is said to be \fIlive\fR at some point of the program text +if its value may be used before it is changed. +As addresses and constants are never changed, all items but local +variables are always live. +The region of text during which a local variable is live is +determined via the live/dead messages generated by the +Live Variable analysis phase of the Global Optimizer. +.NH 3 +The allocation determination subphase +.PP +If a procedure has more items than registers, +it may be advantageous to put an item in a register +only during those parts of the procedure where it is most +heavily used. +Such a part will be called a timespan. +With every item we may associate a set of timespans. +If two timespans of an item overlap, +at most one of them may be granted a register, +as there is no use in putting the same item in two +registers simultaneously. +If two timespans of an item are distinct, +both may be chosen; +the item will possibly be put in two +different registers during different parts of the procedure. +The timespan may also consist +of the whole procedure. +.PP +A list of (item,timespan) pairs (allocations) +is build, which will be the input to the decision making +subphase of RA (packing subphase). +This allocation list is the main data structure of RA. +The description of the remainder of RA will be in terms +of allocations rather than items. +The phrase "to assign a register to an allocation" means "to assign +a register to the item of the allocation for the duration of +the timespan of the allocation". +Subsequent subphases will add more information +to this list. +.PP +Several factors must be taken into account when a +timespan for an item is constructed: +.IP 1. +At any \fIentry point\fR of the timespan where the +item is live, +the register must be initialized with the item +.IP 2. +At any exit point of the timespan where the item is live, +the item must be updated. +.LP +In order to decrease these costs, we will only consider timespans with +one entry point +and no live exit points. +.NH 3 +The rivals computation subphase +.PP +As stated before, several different items may be put in the +same register, provided they are not live simultaneously. +For every allocation we determine the intersection +of its timespan and the lifetime of its item (i.e. the part of the +procedure during which the item is live). +The allocation is said to be busy during this intersection. +If two allocations are ever busy simultaneously they are +said to be rivals of each other. +The rivals information is added to the allocation list. +.NH 3 +The profits computation subphase +.PP +To make good decisions, the packing subphase needs to +know which allocations can be assigned the same register +(rivals information) and how much is gained by +granting an allocation a register. +.PP +Besides the gains of using a register instead of an +item, +two kinds of overhead costs must be +taken into account: +.IP - +the register must be initialized with the item +.IP - +the register must be saved at procedure entry +and restored at procedure exit. +.LP +The latter costs should not be due to a single +allocation, as several allocations can be assigned the same register. +These costs are dealt with after packing has been done. +They do not influence the decisions of the packing algorithm, +they may only undo them. +.PP +The actual profits consist of improvements +of execution time and code size. +As the former is far more difficult to estimate , we will +discuss code size improvements first. +.PP +The gains of putting a certain item in a register +depends on how the item is used. +Suppose the item is +a pointer variable. +On machines that do not have a +double-indirect addressing mode, +two instructions are needed to dereference the variable +if it is not in a register, but only one if it is put in a register. +If the variable is not dereferenced, but simply copied, one instruction +may be sufficient in both cases. +So the gains of putting a pointer variable in a register are higher +if the variable is dereferenced often. +.PP +To make accurate estimates, detailed knowledge of +the target machine and of the code generator +would be needed. +Therefore, a simplification has been made that substantially limits +the amount of target machine information that is needed. +The estimation of the number of bytes saved does +not take into account how an item is used. +Rather, an average number is used. +So these gains are computed as follows: +.DS +#bytes_saved = #occurrences * gains_per_occurrence +.DE +The number of occurrences is derived from +the EM code. +Note that this is not exact either, +as there is no one-to-one correspondence between occurrences in +the EM code and in the assembler code. +.PP +The gains of one occurrence depend on: +.IP 1. +the type of the item +.IP 2. +the size of the item +.IP 3. +the type of the register +.LP +and for local variables and addresses of local variables: +.IP 4. +the type of the local variable +.IP 5. +the offset of the variable in the stackframe +.LP +For every allocation we try two types of registers: the register type +of the item and the general register type. +Only the type with the highest profits will subsequently be used. +This type is added to the allocation information. +.PP +To compute the gains, RA uses a machine-dependent table +that is read from a machine descriptor file. +By means of this table the number of bytes saved can be computed +as a function of the five properties. +.PP +The costs of initializing a register with an item +is determined in a similar way. +The cost of one initialization is also +obtained from the descriptor file. +Note that there can be at most one initialization for any +allocation. +.PP +To summarize, the number of bytes a certain allocation would +save is computed as follows: +.DS +.TS +l l. +net_bytes_saved = bytes_saved - init_cost +bytes_saved = #occurrences * gains_per_occ +init_cost = #initializations * costs_per_init +.TE +.DE +.PP +It is inherently more difficult to estimate the execution +time saved by putting an item in a register, +because it is impossible to predict how +many times an item will be used dynamically. +If an occurrence is part of a loop, +it may be executed many times. +If it is part of a conditional statement, +it may never be executed at all. +In the latter case, the speed of the program may even get +worse if an initialization is needed. +As a clear example, consider the piece of "C" code in Fig. 13.1. +.DS +switch(expr) { + case 1: p(); break; + case 2: p(); p(); break; + case 3: p(); break; + default: break; +} + +Fig. 13.1 A "C" switch statement +.DE +Lots of bytes may be saved by putting the address of procedure p +in a register, as p is called four times (statically). +Dynamically, p will be called zero, one or two times, +depending on the value of the expression. +.PP +The optimizer uses the following strategy for optimizing +execution time: +.IP 1. +try to put items in registers during \fIloops\fR first +.IP 2. +always keep the initializing code outside the loop +.IP 3. +if an item is not used in a loop, do not put it in a register if +the initialization costs may be higher than the gains +.LP +The latter condition can be checked by determining the +minimal number of usages (dynamically) of the item during the procedure, +via a shortest path algorithm. +In the example above, this minimal number is zero, so the address of +p is not put in a register. +.PP +The costs of one occurrence is estimated as described above for the +code size. +The number of dynamic occurrences is guessed by looking at the +loop nesting level of every occurrence. +If the item is never used in a loop, +the minimal number of occurrences is used. +From these facts, the execution time improvement is assessed +for every allocation. +.NH 3 +The packing subphase +.PP +The packing subphase takes as input the allocation +list and outputs a +description of which allocations should be put +in which registers. +So it is essentially the decision making part of RA. +.PP +The packing system tries to assign a register to allocations one +at a time, in some yet to be defined order. +For every allocation A, it first checks if there is a register +(of the right type) +that is already assigned to one or more allocations, +none of which are rivals of A. +In this case A is assigned the same register. +Else, A is assigned a new register, if one exists. +A table containing the number of free registers for every type +is maintained. +It is initialized with the number of non-scratch registers of +the target computer and updated whenever a +new register is handed out. +The packing algorithm stops when no more allocations can +or need be assigned a register. +.PP +After an allocation A has been packed, +all allocations with non-disjunct timespans (including +A itself) are removed from the allocation list. +.PP +In case the number of items exceeds the number of registers, it +is important to choose the most profitable allocations. +Due to the possibility of having several allocations +occupying the same register, +this problem is quite complex. +Our packing algorithm uses simple heuristic rules +and avoids any combinatorial search. +It has distinct rules for different costs measures. +.PP +If object code size is the most important factor, +the algorithm is greedy and chooses allocations in +decreasing order of their profits attribute. +It does not take into account the fact that +other allocations may be passed over because of +this decision. +.PP +If execution time is at prime stake, the algorithm +first considers allocations whose timespans consist of loops. +After all these have been packed, it considers the remaining +allocations. +Within the two subclasses, it considers allocations +with the highest profits first. +When assigning a register to an allocation with a loop +as timespan, the algorithm checks if the item has +already been put in a register during another loop. +If so, it tries to use the same register for the +new allocation. +After all packing has been done, +it checks if the item has always been assigned the same +register (although not necessarily during all loops). +If so, it tries to put the item in that register during +the entire procedure. This is possible +if the allocation (item,whole_procedure) is not a rival +of any allocation with a different item that has been +assigned to the same register. +Note that this approach is essentially 'bottom up', +as registers are first assigned over small regions +of text which are later collapsed into larger regions. +The advantage of this approach is the fact that +the decisions for one loop can be made independently +of all other loops. +.PP +After the entire packing process has been completed, +we compute for each register how much is gained in using +this register, by simply adding the net profits +of all allocations assigned to it. +This total yield should outweigh the costs of +saving/restoring the register at procedure entry/exit. +As most modern processors (e.g. 68000, Vax) have special +instructions to save/restore several registers, +the differential costs of saving one extra register are by +no means constant. +The costs are read from the machine descriptor file and +compared to the total yields of the registers. +As a consequence of this analysis, some allocations +may have their registers taken away. +.NH 3 +The transformation subphase +.PP +The final subphase of RA transforms the EM text according to the +decisions made by the packing system. +It traverses the text of the currently optimized procedure and +changes all occurrences of items at points where +they are assigned a register. +It also clears the score field of the register messages for +normal local variables and emits register messages with a very +high score for the pseudo locals. +At points where registers have to be initialized with items, +it generates EM code to do so. +Finally it tries to decrease the size of the stackframe +of the procedure by looking at which local variables need not +be given memory locations. diff --git a/doc/ego/ra/ra4 b/doc/ego/ra/ra4 new file mode 100644 index 0000000..4bfeef7 --- /dev/null +++ b/doc/ego/ra/ra4 @@ -0,0 +1,28 @@ +.NH 2 +Source files of RA +.PP +The sources of RA are in the following files and packages: +.IP ra.h: 14 +declarations of global variables and data structures +.IP ra.c: +the routine main; initialization of target machine-dependent tables +.IP items: +a routine to build the list of items of one procedure; +routines to manipulate items +.IP lifetime: +contains a subroutine that determines when items are live/dead +.IP alloclist: +contains subroutines that build the initial allocations list +and that compute the rivals sets. +.IP profits: +contains a subroutine that computes the profits of the allocations +and a routine that determines the costs of saving/restoring registers +.IP pack: +contains the packing subphase +.IP xform: +contains the transformation subphase +.IP interval: +contains routines to manipulate intervals of time +.IP aux: +contains auxiliary routines +.LP diff --git a/doc/ego/refs.gen b/doc/ego/refs.gen new file mode 100644 index 0000000..408fc50 --- /dev/null +++ b/doc/ego/refs.gen @@ -0,0 +1,120 @@ +%T A Practical Toolkit for Making Portable Compilers +%A A.S. Tanenbaum +%A H. van Staveren +%A E.G. Keizer +%A J.W. Stevenson +%I Vrije Universiteit, Amsterdam +%R Rapport nr IR-74 +%D October 1981 + +%T A Practical Toolkit for Making Portable Compilers +%A A.S. Tanenbaum +%A H. van Staveren +%A E.G. Keizer +%A J.W. Stevenson +%J CACM +%V 26 +%N 9 +%P 654-660 +%D September 1983 + +%T A Unix Toolkit for Making Portable Compilers +%A A.S. Tanenbaum +%A H. van Staveren +%A E.G. Keizer +%A J.W. Stevenson +%J Proceedings USENIX conf. +%C Toronto, Canada +%V 26 +%D July 1983 +%P 255-261 + +%T Using Peephole Optimization on Intermediate Code +%A A.S. Tanenbaum +%A H. van Staveren +%A J.W. Stevenson +%J TOPLAS +%V 4 +%N 1 +%P 21-36 +%D January 1982 + +%T Language- and Machine-independent Global Optimization on Intermediate Code +%A H.E. Bal +%A A.S. Tanenbaum +%J Computer Languages +%V 11 +%N 2 +%P 105-121 +%D April 1986 + +%T Description of a machine architecture for use with +block structured languages +%A A.S. Tanenbaum +%A H. van Staveren +%A E.G. Keizer +%A J.W. Stevenson +%I Vrije Universiteit, Amsterdam +%R Rapport nr IR-81 +%D August 1983 + +%T Amsterdam Compiler Kit documentation +%A A.S. Tanenbaum et. al. +%I Vrije Universiteit, Amsterdam +%R Rapport nr IR-90 +%D June 1984 + +%T The C Programming Language - Reference Manual +%A D.M. Ritchie +%I Bell Laboratories +%C Murray Hill, New Jersey +%D 1978 + +%T Unix programmer's manual, Seventh Edition +%A B.W. Kernighan +%A M.D. McIlroy +%I Bell Laboratories +%C Murray Hill, New Jersey +%V 1 +%D January 1979 + +%T A Tour Through the Portable C Compiler +%A S.C. Johnson +%I Bell Laboratories +%B Unix programmer's manual, Seventh Edition +%C Murray Hill, New Jersey +%D January 1979 + + +%T Ada Programming Language - MILITARY STANDARD +%A J.D. Ichbiah +%I U.S. Department of Defense +%R ANSI/MIL-STD-1815A +%D 22 January 1983 + +%T Rationale for the Design of the Ada Programming Language +%A J.D. Ichbiah +%J SIGPLAN Notices +%V 14 +%N 6 +%D June 1979 + +%T The Programming Languages LISP and TRAC +%A W.L. van der Poel +%I Technische Hogeschool Delft +%C Delft +%D 1972 + +%T Compiler construction +%A W.M. Waite +%A G. Goos +%I Springer-Verlag +%C New York +%D 1984 + +%T The C Programming Language +%A B.W. Kernighan +%A D.M. Ritchie +%I Prentice-Hall, Inc +%C Englewood Cliffs,NJ +%D 1978 diff --git a/doc/ego/refs.opt b/doc/ego/refs.opt new file mode 100644 index 0000000..6029c7b --- /dev/null +++ b/doc/ego/refs.opt @@ -0,0 +1,546 @@ +%T Principles of compiler design +%A A.V. Aho +%A J.D. Ullman +%I Addison-Wesley +%C Reading, Massachusetts +%D 1978 + +%T The Design and Analysis of Computer Algorithms +%A A.V. Aho +%A J.E. Hopcroft +%A J.D. Ullman +%I Addison-Wesley +%C Reading, Massachusetts +%D 1974 + +%T Code generation in a machine-independent compiler +%A R.G.G. Cattell +%A J.M. Newcomer +%A B.W. Leverett +%J SIGPLAN Notices +%V 14 +%N 8 +%P 65-75 +%D August 1979 + +%T An algorithm for Reduction of Operator Strength +%A J. Cocke +%A K. Kennedy +%J CACM +%V 20 +%N 11 +%P 850-856 +%D November 1977 + +%T Reduction of Operator Strength +%A F.E. Allen +%A J. Cocke +%A K. Kennedy +%B Program Flow Analysis +%E S.S. Muchnick and D. Jones +%I Prentice-Hall +%C Englewood Cliffs, N.J. +%D 1981 + +%T Simplifying Code Generation Through Peephole Optimization +%A J.W. Davidson +%R Ph.D. thesis +%I Dept. of Computer Science +%C Univ. of Arizona +%D December 1981 + +%T A study of selective optimization techniques +%A G.R. Katkus +%R Ph.D. Thesis +%C University of Southern California +%D 1973 + +%T Automatic subroutine generation in an optimizing compiler +%A J.B. Shaffer +%R Ph.D. Thesis +%C University of Maryland +%D 1978 + +%T Optimal mixed code generation for microcomputers +%A D.S. Photopoulos +%R Ph.D. Thesis +%C Northeastern University +%D 1981 + +%T The Design of an Optimizing Compiler +%A W.A. Wulf +%A R.K. Johnsson +%A C.B. Weinstock +%A S.O. Hobbs +%A C.M. Geschke +%I American Elsevier Publishing Company +%C New York +%D 1975 + +%T Retargetable Compiler Code Generation +%A M. Ganapathi +%A C.N. Fischer +%A J.L. Hennessy +%J ACM Computing Surveys +%V 14 +%N 4 +%P 573-592 +%D December 1982 + +%T An Optimizing Pascal Compiler +%A R.N. Faiman +%A A.A. Kortesoja +%J IEEE Trans. on Softw. Eng. +%V 6 +%N 6 +%P 512-518 +%D November 1980 + +%T Experience with the SETL Optimizer +%A S.M. Freudenberger +%A J.T. Schwartz +%J TOPLAS +%V 5 +%N 1 +%P 26-45 +%D Januari 1983 + +%T An Optimizing Ada Compiler +%A W. Kirchgaesner +%A J. Uhl +%A G. Winterstein +%A G. Goos +%A M. Dausmann +%A S. Drossopoulou +%I Institut fur Informatik II, Universitat Karlsruhe +%D February 1983 + +%T A Fast Algorithm for Finding Dominators +in a Flowgraph +%A T. Lengauer +%A R.E. Tarjan +%J TOPLAS +%V 1 +%N 1 +%P 121-141 +%D July 1979 + +%T Optimization of hierarchical directed graphs +%A M.T. Lepage +%A D.T. Barnard +%A A. Rudmik +%J Computer Languages +%V 6 +%N 1 +%P 19-34 +%D Januari 1981 + +%T Object Code Optimization +%A E.S. Lowry +%A C.W. Medlock +%J CACM +%V 12 +%N 1 +%P 13-22 +%D Januari 1969 + +%T Automatic Program Improvement: +Variable Usage Transformations +%A B. Maher +%A D.H. Sleeman +%J TOPLAS +%V 5 +%N 2 +%P 236-264 +%D April 1983 + +%T The design of a global optimizer +%A R.J. Mintz +%A G.A. Fisher +%A M. Sharir +%J SIGPLAN Notices +%V 14 +%N 9 +%P 226-234 +%D September 1979 + +%T Global Optimization by Suppression of Partial Redundancies +%A E. Morel +%A C. Renvoise +%J CACM +%V 22 +%N 2 +%P 96-103 +%D February 1979 + +%T Efficient Computation of Expressions with Common Subexpressions +%A B. Prabhala +%A R. Sethi +%J JACM +%V 27 +%N 1 +%P 146-163 +%D Januari 1980 + +%T An Analysis of Inline Substitution for a Structured +Programming Language +%A R.W. Scheifler +%J CACM +%V 20 +%N 9 +%P 647-654 +%D September 1977 + +%T Immediate Predominators in a Directed Graph +%A P.W. Purdom +%A E.F. Moore +%J CACM +%V 15 +%N 8 +%P 777-778 +%D August 1972 + +%T The Generation of Optimal Code for Arithmetic Expressions +%A R. Sethi +%A J.D. Ullman +%J JACM +%V 17 +%N 4 +%P 715-728 +%D October 1970 + +%T Exposing side-effects in a PL/I optimizing compiler +%A T.C. Spillman +%B Information Processing 1971 +%I North-Holland Publishing Company +%C Amsterdam +%P 376-381 +%D 1971 + +%T Inner Loops in Flowgraphs and Code Optimization +%A S. Vasudevan +%J Acta Informatica +%N 17 +%P 143-155 +%D 1982 + +%T A New Strategy for Code Generation - the General-Purpose +Optimizing Compiler +%A W.H. Harrison +%J IEEE Trans. on Softw. Eng. +%V 5 +%N 4 +%P 367-373 +%D July 1979 + +%T PQCC: A Machine-Relative Compiler Technology +%A W.M. Wulf +%R CMU-CS-80-144 +%I Carnegie-Mellon University +%C Pittsburgh +%D 25 september 1980 + +%T Machine-independent Pascal code optimization +%A D.R. Perkins +%A R.L. Sites +%J SIGPLAN Notices +%V 14 +%N 8 +%P 201-207 +%D August 1979 + +%T A Case Study of a New Code Generation Technique for Compilers +%A J.L. Carter +%J CACM +%V 20 +%N 12 +%P 914-920 +%D December 1977 + +%T Table-driven Code Generation +%A S.L. Graham +%J IEEE Computer +%V 13 +%N 8 +%P 25-33 +%D August 1980 + +%T Register Allocation in Optimizing Compilers +%A B.W. Leverett +%R Ph.D. Thesis, CMU-CS-81-103 +%I Carnegie-Mellon University +%C Pittsburgh +%D February 1981 + +%T Register Allocation via Coloring +%A G.J. Chaitin +%A M.A. Auslander +%A A.K. Chandra +%A J. Cocke +%A M.E. Hopkins +%A P.W. Markstein +%J Computer Languages +%V 6 +%N 1 +%P 47-57 +%D January 1981 + +%T How to Call Procedures, or Second Thoughts on +Ackermann's Function +%A B.A. Wichmann +%J Software - Practice and Experience +%V 7 +%P 317-329 +%D 1977 + +%T Register Allocation Via Usage Counts +%A R.A. Freiburghouse +%J CACM +%V 17 +%N 11 +%P 638-642 +%D November 1974 + +%T Machine-independent register allocation +%A R.L. Sites +%J SIGPLAN Notices +%V 14 +%N 8 +%P 221-225 +%D August 1979 + +%T An Overview of the Production-Quality Compiler-Compiler Project +%A B.W. Leverett +%A R.G.G Cattell +%A S.O. Hobbs +%A J.M. Newcomer +%A A.H. Reiner +%A B.R. Schatz +%A W.A. Wulf +%J IEEE Computer +%V 13 +%N 8 +%P 38-49 +%D August 1980 + +%T An Overview of the Production-Quality Compiler-Compiler Project +%A B.W. Leverett +%A R.G.G Cattell +%A S.O. Hobbs +%A J.M. Newcomer +%A A.H. Reiner +%A B.R. Schatz +%A W.A. Wulf +%R CMU-CS-79-105 +%I Carnegie-Mellon University +%C Pittsburgh +%D 1979 + +%T Topics in Code Generation and Register Allocation +%A B.W. Leverett +%R CMU-CS-82-130 +%I Carnegie-Mellon University +%C Pittsburgh +%D 28 July 1982 + +%T Predicting the Effects of Optimization on a Procedure Body +%A J.E. Ball +%J SIGPLAN Notices +%V 14 +%N 8 +%P 214-220 +%D August 1979 + +%T The C Language Calling Sequence +%A S.C. Johnson +%A D.M. Ritchie +%I Bell Laboratories +%C Murray Hill, New Jersey +%D September 1981 + +%T A Generalization of Two Code Ordering Optimizations +%A C.W. Fraser +%R TR 82-11 +%I Department of Computer Science +%C The University of Arizona, Tucson +%D October 1982 + +%T A Survey of Data Flow Analysis Techniques +%A K. Kennedy +%B Program Flow Analysis +%E S.S. Muchnick and D. Jones +%I Prentice-Hall +%C Englewood Cliffs +%D 1981 + +%T Delayed Binding in PQCC Generated Compilers +%A W.A. Wulf +%A K.V. Nori +%R CMU-CS-82-138 +%I Carnegie-Mellon University +%C Pittsburgh +%D 1982 + +%T Interprocedural Data Flow Analysis in the presence +of Pointers, Procedure Variables, and Label Variables +%A W.E. Weihl +%J Conf. Rec. of the 7th ACM Symp. on Principles of +Programming Languages +%C Las Vegas, Nevada +%P 83-94 +%D 1980 + +%T Low-Cost, High-Yield Code Optimization +%A D.R. Hanson +%R TR 82-17 +%I Department of Computer Science +%C The University of Arizona, Tucson +%D November 1982 + +%T Program Flow Analysis +%E S.S. Muchnick and D. Jones +%I Prentice-Hall +%C Englewood Cliffs +%D 1981 + +%T A machine independent algorithm for code generation and its +use in retargetable compilers +%A R. Glanville +%R Ph.D. thesis +%C University of California, Berkeley +%D December 1977 + +%T A formal framework for the derivation of machine-specific optimizers +%A R. Giegerich +%J TOPLAS +%V 5 +%N 3 +%P 478-498 +%D July 1983 + +%T Engineering a compiler: Vax-11 code generation and optimization +%A P. Anklam +%A D. Cutler +%A R. Heinen +%A M. MacLaren +%I Digital Equipment Corporation +%D 1982 + +%T Analyzing exotic instructions for a retargetable code generator +%A T.M. Morgan +%A L.A. Rowe +%J SIGPLAN Notices +%V 17 +%N 6 +%P 197-204 +%D June 1982 + +%T TCOLAda and the Middle End of the PQCC Ada Compiler +%A B.M. Brosgol +%J SIGPLAN Notices +%V 15 +%N 11 +%P 101-112 +%D November 1980 + +%T Implementation Implications of Ada Generics +%A G. Bray +%J Ada Letters +%V III +%N 2 +%P 62-71 +%D September 1983 + +%T Attributed Linear Intermediate Representations for Retargetable +Code Generators +%A M. Ganapathi +%A C.N. Fischer +%J Software-Practice and Experience +%V 14 +%N 4 +%P 347-364 +%D April 1984 + +%T UNCOL: The myth and the fact +%A T.B. Steel +%J Annu. Rev. Autom. Program. +%V 2 +%D 1960 +%P 325-344 + +%T Experience with a Graham-Glanville Style Code Generator +%A P. Aigrain +%A S.L. Graham +%A R.R. Henry +%A M.K. McKusick +%A E.P. Llopart +%J SIGPLAN Notices +%V 19 +%N 6 +%D June 1984 +%P 13-24 + +%T Using Dynamic Programming to generate Optimized Code in a +Graham-Glanville Style Code Generator +%A T.W. Christopher +%A P.J. Hatcher +%A R.C. Kukuk +%J SIGPLAN Notices +%V 19 +%N 6 +%D June 1984 +%P 25-36 + +%T Peep - An Architectural Description Driven Peephole Optimizer +%A R.R. Kessler +%J SIGPLAN Notices +%V 19 +%N 6 +%D June 1984 +%P 106-110 + +%T Automatic Generation of Peephole Optimizations +%A J.W. Davidson +%A C.W. Fraser +%J SIGPLAN Notices +%V 19 +%N 6 +%D June 1984 +%P 111-116 + +%T Analysing and Compressing Assembly Code +%A C.W. Fraser +%A E.W. Myers +%A A.L. Wendt +%J SIGPLAN Notices +%V 19 +%N 6 +%D June 1984 +%P 117-121 + +%T Register Allocation by Priority-based Coloring +%A F. Chow +%A J. Hennessy +%J SIGPLAN Notices +%V 19 +%N 6 +%D June 1984 +%P 222-232 +%V 19 +%N 6 +%D June 1984 +%P 117-121 + +%T Code Selection through Object Code Optimization +%A J.W. Davidson +%A C.W. Fraser +%I Dept. of Computer Science +%C Univ. of Arizona +%D November 1981 + +%T A Portable Machine-Independent Global Optimizer - Design +and Measurements +%A F.C. Chow +%I Computer Systems Laboratory +%C Stanford University +%D December 1983 diff --git a/doc/ego/refs.stat b/doc/ego/refs.stat new file mode 100644 index 0000000..56fcd7f --- /dev/null +++ b/doc/ego/refs.stat @@ -0,0 +1,29 @@ +%T An analysis of Pascal Programs +%A L.R. Carter +%I UMI Research Press +%C Ann Arbor, Michigan +%D 1982 + +%T An Emperical Study of FORTRAN Programs +%A D.E. Knuth +%J Software - Practice and Experience +%V 1 +%P 105-133 +%D 1971 + +%T F77 Performance +%A D.A. Mosher +%A R.P. Corbett +%J ;login: +%V 7 +%N 3 +%D June 1982 + +%T Ada Language Statistics for the iMAX 432 Operating System +%A S.F. Zeigler +%A R.P. Weicker +%J Ada LETTERS +%V 2 +%N 6 +%P 63-67 +%D May 1983 diff --git a/doc/ego/sp/sp1 b/doc/ego/sp/sp1 new file mode 100644 index 0000000..86df413 --- /dev/null +++ b/doc/ego/sp/sp1 @@ -0,0 +1,184 @@ +.bp +.NH 1 +Stack pollution +.NH 2 +Introduction +.PP +The "Stack Pollution" optimization technique (SP) decreases the costs +(time as well as space) of procedure calls. +In the EM calling sequence, the actual parameters are popped from +the stack by the \fIcalling\fR procedure. +The ASP (Adjust Stack Pointer) instruction is used for this purpose. +A call in EM is shown in Fig. 8.1 +.DS +.TS +l l. +Pascal: EM: + +f(a,2) LOC 2 + LOE A + CAL F + ASP 4 -- pop 4 bytes +.TE + +Fig. 8.1 An example procedure call in Pascal and EM +.DE +As procedure calls occur often in most programs, +the ASP is one of the most frequently used EM instructions. +.PP +The main intention of removing the actual parameters after a procedure call +is to avoid the stack size to increase rapidly. +Yet, in some cases, it is possible to \fIdelay\fR or even \fIavoid\fR the +removal of the parameters without letting the stack grow +significantly. +In this way, considerable savings in code size and execution time may +be achieved, at the cost of a slightly increased stack size. +.PP +A stack adjustment may be delayed if there is some other stack adjustment +later on in the same basic block. +The two ASPs can be combined into one. +.DS +.TS +l l l. +Pascal: EM: optimized EM: + +f(a,2) LOC 2 LOC 2 +g(3,b,c) LOE A LOE A + CAL F CAL F + ASP 4 LOE C + LOE C LOE B + LOE B LOC 3 + LOC 3 CAL G + CAL G ASP 10 + ASP 6 +.TE + +Fig. 8.2 An example of local Stack Pollution +.DE +The stacksize will be increased only temporarily. +If the basic block contains another ASP, the ASP 10 may subsequently be +combined with that next ASP, and so on. +.PP +For some back ends, a stack adjustment also takes place +at the point of a procedure return. +There is no need to specify the number of bytes to be popped at a +return. +This provides an opportunity to remove ASPs more globally. +If all ASPs outside any loop are removed, the increase of the +stack size will still only be small, as no such ASP is executed more +than once without an intervening return from the procedure it is part of. +.PP +This second approach is not generally applicable to all target machines, +as some back ends require the stack to be cleaned up at the point of +a procedure return. +.NH 2 +Implementation +.PP +There is one main problem the implementation has to solve. +In EM, the stack is not only used for passing parameters, +but also for evaluating expressions. +Hence, ASP instructions can only be combined or removed +if certain conditions are satisfied. +.PP +Two consecutive ASPs of one basic block can only be combined +(as described above) if: +.IP 1. +On no point of text in between the two ASPs, any item is popped from +the stack that was pushed onto it before the first ASP. +.IP 2. +The number of bytes popped from the stack by the second ASP must equal +the number of bytes pushed since the first ASP. +.LP +Condition 1. is not satisfied in Fig. 8.3. +.DS +.TS +l l. +Pascal: EM: + +5 + f(10) + g(30) LOC 5 + LOC 10 + CAL F + ASP 2 -- cannot be removed + LFR 2 -- push function result + ADI 2 + LOC 30 + CAL G + ASP 2 + LFR 2 + ADI 2 +.TE + +Fig. 8.3 An illegal transformation +.DE +If the first ASP were removed (delayed), the first ADI would add +10 and f(10), instead of 5 and f(10). +.sp +Condition 2. is not satisfied in Fig. 8.4. +.DS +.TS +l l. +Pascal: EM: + +f(10) + 5 * g(30) LOC 10 + CAL F + ASP 2 + LFR 2 + LOC 5 + LOC 30 + CAL G + ASP 2 + LFR 2 + MLI 2 -- 5 * g(30) + ADI 2 +.TE + +Fig. 8.4 A second illegal transformation +.DE +If the two ASPs were combined into one 'ASP 4', the constant 5 would +have been popped, rather than the parameter 10 (so '10 + f(10)*g(30)' +would have been computed). +.PP +The second approach to deleting ASPs (i.e. let the procedure return +do the stack clean-up) +is only applied to the last ASP of every basic block. +Any preceding ASPs are dealt with by the first approach. +The last ASP of a basic block B will only be removed if: +.IP - +on no path in the control flow graph from B to any block containing a +RET (return) there is a basic block that, at some point of its text, pops +items from the stack that it has not itself pushed earlier. +.LP +Clearly, if this condition is satisfied, no harm can be done; no +other basic block will ever access items that were pushed +on the stack before the ASP. +.PP +The number of bytes pushed onto or popped from the stack can be +easily encoded in a so called "pop-push table". +The numbers in general depend on the target machine word- and pointer +size and on the argument given to the instruction. +For example, an ADS instruction is described by: +.DS + -a-p+p +.DE +which means: an 'ADS n' first pops an n-byte value (n being the argument), +next pops a pointer-size value and finally pushes a pointer-size value. +For some infrequently used EM instructions the pop-push numbers +cannot be computed statically. +.PP +The stack pollution algorithm first performs a depth first search over +the control flow graph and marks all blocks that do not satisfy +the global condition. +Next it visits all basic blocks in turn. +For every pair of adjacent ASPs, it checks conditions 1. and 2. and +combines the ASPs if they are satisfied. +The new ASP may be used as first ASP in the next pair. +If a condition fails, it simply continues with the next ASP. +Finally, the last ASP is removed if: +.IP - +nothing has been popped from the stack after the last ASP that was +pushed before it +.IP - +the block was not marked by the depth first search +.IP - +the block is not in a loop +.LP diff --git a/doc/ego/sr/sr1 b/doc/ego/sr/sr1 new file mode 100644 index 0000000..7273d8f --- /dev/null +++ b/doc/ego/sr/sr1 @@ -0,0 +1,47 @@ +.bp +.NH 1 +Strength reduction +.NH 2 +Introduction +.PP +The Strength Reduction optimization technique (SR) +tries to replace expensive operators +by cheaper ones, +in order to decrease the execution time +of the program. +A classical example is replacing a 'multiplication by 2' +by an addition or a shift instruction. +These kinds of local transformations are already +done by the EM Peephole Optimizer. +Strength reduction can also be applied +more generally to operators used in a loop. +.DS +.TS +l l. +i := 1; i := 1; +while i < 100 loop\ \ \ \ \ \ \ --> TMP := i * 118; + put(i * 118); while i < 100 loop + i := i + 1; put(TMP); +end loop; i := i + 1; + TMP := TMP + 118; + end loop; +.TE + +Fig. 6.1 An example of Strenght Reduction +.DE +In Fig. 6.1, a multiplication inside a loop is +replaced by an addition inside the loop and a multiplication +outside the loop. +Clearly, this is a global optimization; it cannot +be done by a peephole optimizer. +.PP +In some cases a related technique, \fItest replacement\fR, +can be used to eliminate the +loop variable i. +This technique will not be discussed in this report. +.sp 0 +In the example above, the resulting code +can be further optimized by using +constant propagation. +Obviously, this is not the task of the +Strength Reduction phase. diff --git a/doc/ego/sr/sr2 b/doc/ego/sr/sr2 new file mode 100644 index 0000000..9f7f01e --- /dev/null +++ b/doc/ego/sr/sr2 @@ -0,0 +1,223 @@ +.NH 2 +The model of strength reduction +.PP +In this section we will describe +the transformations performed by +Strength Reduction (SR). +Before doing so, we will introduce the +central notion of an induction variable. +.NH 3 +Induction variables +.PP +SR looks for variables whose +values form an arithmetic progression +at the beginning of a loop. +These variables are called induction variables. +The most frequently occurring example of such +a variable is a loop-variable in a high-order +programming language. +Several quite sophisticated models of strength +reduction can be found in the literature. +.[ +cocke reduction strength cacm +.] +.[ +allen cocke kennedy reduction strength +.] +.[ +lowry medlock cacm +.] +.[ +aho compiler design +.] +In these models the notion of an induction variable +is far more general than the intuitive notion +of a loop-variable. +The definition of an induction variable we present here +is more restricted, +yielding a simpler model and simpler transformations. +We think the principle source for strength reduction lies in +expressions using a loop-variable, +i.e. a variable that is incremented or decremented +by the same amount after every loop iteration, +and that cannot be changed in any other way. +.PP +Of course, the EM code does not contain high level constructs +such as for-statements. +We will define an induction variable in terms +of the Intermediate Code of the optimizer. +Note that the notions of a loop in the +EM text and of a firm basic block +were defined in section 3.3.5. +.sp +.UL definition +.sp 0 +An induction variable i of a loop L is a local variable +that is never accessed indirectly, +whose size is the word size of the target machine, and +that is assigned exactly once within L, +the assignment: +.IP - +being of the form i := i + c or i := c +i, +c is a constant +called the \fIstep value\fR of i. +.IP - +occurring in a firm block of L. +.LP +(Note that the first restriction on the assignment +is not described in terms of the Intermediate Code; +we will give such a description later; the current +definition is easier to understand however). +.NH 3 +Recognized expressions +.PP +SR recognizes certain expressions using +an induction variable and replaces +them by cheaper ones. +Two kinds of expensive operations are recognized: +multiplication and array address computations. +The expressions that are simplified must +use an induction variable +as an operand of +a multiplication or as index in an array expression. +.PP +Often a linear function of an induction variable is used, +rather than the variable itself. +In these cases optimization is still possible. +We call such expressions \fIiv-expressions\fR. +.sp +.UL definition: +.sp 0 +An iv-expression of an induction variable i of a loop L is +an expression that: +.IP - +uses only the operators + and - (unary as well as binary) +.IP - +uses i as operand exactly once +.IP - +uses (besides i) only constants or variables that are +never changed in L as operands. +.LP +.PP +The expressions recognized by SR are of the following forms: +.IP (1) +iv_expression * constant +.IP (2) +constant * iv_expression +.IP (3) +A[iv-expression] := \kx(assign to array element) +.IP (4) +A[iv-expression] \h'|\nxu'(use array element) +.IP (5) +& A[iv-expression] \h'|\nxu'(take address of array element) +.LP +(Note that EM has different instructions to use an array element, +store into one, or take the address of one, resp. LAR, SAR, and AAR). +.sp 0 +The size of the elements of A must +be known statically. +In cases (3) and (4) this size +must equal the word size of the +target machine. +.NH 3 +Transformations +.PP +With every recognized expression we associate +a new temporary local variable TMP, +allocated in the stack frame of the +procedure containing the expression. +At any program point within the loop, TMP will +contain the following value: +.IP multiplication: 18 +the current value of iv-expression * constant +.IP arrays: +the current value of &A[iv-expression]. +.LP +In the second case, TMP essentially is a pointer variable, +pointing to the element of A that is currently in use. +.sp 0 +If the same expression occurs several times in the loop, +the same temporary local is used each time. +.PP +Three transformations are applied to the EM text: +.IP (1) +TMP is initialized with the right value. +This initialization takes place just +before the loop. +.IP (2) +The recognized expression is simplified. +.IP (3) +TMP is incremented; this takes place just +after the induction variable is incremented. +.LP +For multiplication, the initial value of TMP +is the value of the recognized expression at +the program point immediately before the loop. +For arrays, TMP is initialized with the address +of the first array element that is accessed. +So the initialization code is: +.DS +TMP := iv-expression * constant; or +TMP := &A[iv-expression] +.DE +At the point immediately before the loop, +the induction variable will already have been +initialized, +so the value used in the code above will be the +value it has during the first iteration. +.PP +For multiplication, the recognized expression can simply be +replaced by TMP. +For array optimizations, the replacement +depends on the form: +.DS +.TS +l l l. +\fIform\fR \fIreplacement\fR +(3) A[iv-expr] := *TMP := (assign indirect) +(4) A[iv-expr] *TMP (use indirect) +(5) &A[iv-expr] TMP +.TE +.DE +The '*' denotes the indirect operator. (Note that +EM has different instructions to do +an assign-indirect and a use-indirect). +As the size of the array elements is restricted +to be the word size in case (3) and (4), +only one EM instruction needs to +be generated in all cases. +.PP +The amount by which TMP is incremented is: +.IP multiplication: 18 +step value * constant +.IP arrays: +step value * element size +.LP +Note that the step value (see definition of induction variable above), +the constant, and the element size (see previous section) can all +be determined statically. +If the sign of the induction variable in the +iv-expression is negative, the amount +must be negated. +.PP +The transformations are demonstrated by an example. +.DS +.TS +l l. +i := 100; i := 100; +while i > 1 loop TMP := (6-i) * 5; + X := (6-i) * 5 + 2; while i > 1 loop + Y := (6-i) * 5 - 8;\ \ \ \ \ \ \ --> X := TMP + 2; + i := i - 3; Y := TMP - 8; +end loop; i := i - 3; + TMP := TMP + 15; + end loop; +.TE + +Fig. 6.2 Example of complex Strength Reduction transformations +.DE +The expression '(6-i)*5' is recognized twice. The constant +is 5. +The step value is -3. +The sign of i in the recognized expression is '-'. +So the increment value of TMP is -(-3*5) = +15. diff --git a/doc/ego/sr/sr3 b/doc/ego/sr/sr3 new file mode 100644 index 0000000..10dbf64 --- /dev/null +++ b/doc/ego/sr/sr3 @@ -0,0 +1,244 @@ +.NH 2 +Implementation +.PP +Like most phases, SR deals with one procedure +at a time. +Within a procedure, SR works on one loop at a time. +Loops are processed in textual order. +If loops are nested inside each other, +SR starts with the outermost loop and proceeds in the +inwards direction. +This order is chosen, +because it enables the optimization +of multi-dimensional array address computations, +if the elements are accessed in the usual way +(i.e. row after row, rather than column after column). +For every loop, SR first detects all induction variables +and then tries to recognize +expressions that can be optimized. +.NH 3 +Finding induction variables +.PP +The process of finding induction variables +can conveniently be split up +into two parts. +First, the EM text of the loop is scanned to find +all \fIcandidate\fR induction variables, +which are word-sized local variables +that are assigned precisely once +in the loop, within a firm block. +Second, for every candidate, the single assignment +is inspected, to see if it has the form +required by the definition of an induction variable. +.PP +Candidates are found by scanning the EM code of the loop. +During this scan, two sets are maintained. +The set "cand" contains all variables that were +assigned exactly once so far, within a firm block. +The set "dismiss" contains all variables that +should not be made a candidate. +Initially, both sets are empty. +If a variable is assigned to, it is put +in the cand set, if three conditions are met: +.IP 1. +the variable was not in cand or dismiss already +.IP 2. +the assignment takes place in a firm block +.IP 3. +the assignment is not a ZRL instruction (assignment by zero) +or a SDL instruction (store double local). +.LP +If any condition fails, the variable is dismissed from cand +(if it was there already) and put in dismiss +(if it was not there already). +.sp 0 +All variables for which no register message was generated (i.e. those +variables that may be accessed indirectly) are assumed +to be changed in the loop. +.sp 0 +All variables that remain in cand are candidate induction variables. +.PP +From the set of candidates, the induction variables can +be determined, by inspecting the single assignment. +The assignment must match one of the EM patterns below. +('x' is the candidate. 'ws' is the word size of the target machine. +'n' is any number.) +.DS +.TS +l l. +\fIpattern\fR \fIstep size\fR +INL x | +1 +DEL x | -1 +LOL x ; (INC | DEC) ; STL x | +1 | -1 +LOL x ; LOC n ; (ADI ws | SBI ws) ; STL x | +n | -n +LOC n ; LOL x ; ADI ws ; STL x +n +.TE +.DE +From the patterns the step size of the induction variable +can also be determined. +These step sizes are displayed on the right hand side. +.sp +For every induction variable we maintain the following information: +.IP - +the offset of the variable in the stackframe of its procedure +.IP - +a pointer to the EM text of the assignment statement +.IP - +the step value +.LP +.NH 3 +Optimizing expressions +.PP +If any induction variables of the loop were found, +the EM text of the loop is scanned again, +to detect expressions that can be optimized. +SR scans for multiplication and array instructions. +Whenever it finds such an instruction, it analyses the +code in front of it. +If an expression is to be optimized, it must +be generated by the following syntax rules. +.DS +.TS +l l. +optimizable_expr: + iv_expr const mult | + const iv_expr mult | + address iv_expr address array_instr; +mult: + MLI ws | + MLU ws ; +array_instr: + LAR ws | + SAR ws | + AAR ws ; +const: + LOC n ; +.TE +.DE +An 'address' is an EM instruction that loads an +address on the stack. +An instruction like LOL may be an 'address', if +the size of an address (pointer size, =ps) is +the same as the word size. +If the pointer size is twice the word size, +instructions like LDL are an 'address'. +(The addresses in the third grammar rule +denote resp. the array address and the +array descriptor address). +.DS +.TS +l l. +address: + LAE | + LAL | + LOL if ps=ws | + LOE ,, | + LIL ,, | + LDL if ps=2*ws | + LDE ,, ; +.TE +.DE +The notion of an iv-expression was introduced earlier. +.DS +.TS +l l. +iv_expr: + iv_expr unair_op | + iv_expr iv_expr binary_op | + loopconst | + iv ; +unair_op: + NGI ws | + INC | + DEC ; +binary_op: + ADI ws | + ADU ws | + SBI ws | + SBU ws ; +loopconst: + const | + LOL x if x is not changed in loop ; +iv: + LOL x if x is an induction variable ; +.TE +.DE +An iv_expression must satisfy one additional constraint: +it must use exactly one operand that is an induction +variable. +A simple, hand written, top-down parser is used +to recognize an iv-expression. +It scans the EM code from right to left +(recall that EM is essentially postfix). +It uses semantic attributes (inherited as well as +derived) to check the additional constraint. +.PP +All information assembled during the recognition +process is put in a 'code_info' structure. +This structure contains the following information: +.IP - +the optimizable code itself +.IP - +the loop and basic block the code is part of +.IP - +the induction variable +.IP - +the iv-expression +.IP - +the sign of the induction variable in the +iv-expression +.IP - +the offset and size of the temporary local variable +.IP - +the expensive operator (MLI, LAR etc.) +.IP - +the instruction that loads the constant +(for multiplication) or the array descriptor +(for arrays). +.LP +The entire transformation process is driven +by this information. +As the EM text is represented internally +as a list, this process consists +mainly of straightforward list manipulations. +.sp 0 +The initialization code must be put +immediately before the loop entry. +For this purpose a \fIheader block\fR is +created that has the loop entry block as +its only successor and that dominates the +entry block. +The CFG and all relations (SUCC,PRED, IDOM, LOOPS etc.) +are updated. +.sp 0 +An EM instruction that will +replace the optimizable code +is created and put at the place of the old code. +The list representing the old optimizable code +is used to create a list for the initializing code, +as they are similar. +Only two modifications are required: +.IP - +if the expensive operator is a LAR or SAR, +it must be replaced by an AAR, as the initial value +of TMP is the \fIaddress\fR of the first +array element that is accessed. +.IP - +code must be appended to store the result of the +expression in TMP. +.LP +Finally, code to increment TMP is created and put after +the code of the single assignment to the +induction variable. +The generated code uses either an integer addition +(ADI) or an integer-to-pointer addition (ADS) +to do the increment. +.PP +SR maintains a set of all expressions that have already +been recognized in the present loop. +Such expressions are said to be \fIavailable\fR. +If an expression is recognized that is +already available, +no new temporary local variable is allocated for it, +and the code to initialize and increment the local +is not generated. diff --git a/doc/ego/sr/sr4 b/doc/ego/sr/sr4 new file mode 100644 index 0000000..ae87643 --- /dev/null +++ b/doc/ego/sr/sr4 @@ -0,0 +1,28 @@ +.NH 2 +Source files of SR +.PP +The sources of SR are in the following files +and packages: +.IP sr.h: 14 +declarations of global variables and +data structures +.IP sr.c: +the routine main; a driving routine to process +(possibly nested) loops in the right order +.IP iv +implements a procedure that finds the induction variables +of a loop +.IP reduce +implements a procedure that finds optimizable expressions +and that does the transformations +.IP cand +implements a procedure that finds the candidate induction +variables; used to implement iv +.IP xform +implements several useful routines that transform +lists of EM text or a CFG; used to implement reduce +.IP expr +implements a procedure that parses iv-expressions +.IP aux +implements several auxiliary procedures. +.LP diff --git a/doc/ego/ud/ud1 b/doc/ego/ud/ud1 new file mode 100644 index 0000000..8f2a12f --- /dev/null +++ b/doc/ego/ud/ud1 @@ -0,0 +1,58 @@ +.bp +.NH 1 +Use-Definition analysis +.NH 2 +Introduction +.PP +The "Use-Definition analysis" phase (UD) consists of two related optimization +techniques that both depend on "Use-Definition" information. +The techniques are Copy Propagation and Constant Propagation. +They are best explained via an example (see Figs. 11.1 and 11.2). +.DS + (1) A := B A := B + ... --> ... + (2) use(A) use(B) + +Fig. 11.1 An example of Copy Propagation +.DE +.DS + (1) A := 12 A := 12 + ... --> ... + (2) use(A) use(12) + +Fig. 11.2 An example of Constant Propagation +.DE +Both optimizations have to check that the value of A at line (2) +can only be obtained at line (1). +Copy Propagation also has to assure that the value of B is +the same at line (1) as at line (2). +.PP +One purpose of both transformations is to introduce +opportunities for the Dead Code Elimination optimization. +If the variable A is used nowhere else, the assignment A := B +becomes useless and can be eliminated. +.sp 0 +If B is less expensive to access than A (e.g. this is sometimes the case +if A is a local variable and B is a global variable), +Copy Propagation directly improves the code itself. +If A is cheaper to access the transformation will not be performed. +Likewise, a constant as operand may be cheeper than a variable. +Having a constant as operand may also facilitate other optimizations. +.PP +The design of UD is based on the theory described in section +14.1 and 14.3 of. +.[ +aho compiler design +.] +As a main departure from that theory, +we do not demand the statement A := B to become redundant after +Copy Propagation. +If B is cheaper to access than A, the optimization is always performed; +if B is more expensive than A, we never do the transformation. +If A and B are equally expensive UD uses the heuristic rule to +replace infrequently used variables by frequently used ones. +This rule increases the chances of the assignment to become useless. +.PP +In the next section we will give a brief outline of the data +flow theory used +for the implementation of UD. diff --git a/doc/ego/ud/ud2 b/doc/ego/ud/ud2 new file mode 100644 index 0000000..21174f4 --- /dev/null +++ b/doc/ego/ud/ud2 @@ -0,0 +1,64 @@ +.NH 2 +Data flow information +.PP +.NH 3 +Use-Definition information +.PP +A \fIdefinition\fR of a variable A is an assignment to A. +A definition is said to \fIreach\fR a point p if there is a +path in the control flow graph from the definition to p, such that +A is not redefined on that path. +.PP +For every basic block B, we define the following sets: +.IP GEN[b] 9 +the set of definitions in b that reach the end of b. +.IP KILL[b] +the set of definitions outside b that define a variable that +is changed in b. +.IP IN[b] +the set of all definitions reaching the beginning of b. +.IP OUT[b] +the set of all definitions reaching the end of b. +.LP +GEN and KILL can be determined by inspecting the code of the procedure. +IN and OUT are computed by solving the following data flow equations: +.DS +(1) OUT[b] = IN[b] - KILL[b] + GEN[b] +(2) IN[b] = OUT[p1] + ... + OUT[pn], + where PRED(b) = {p1, ... , pn} +.DE +.NH 3 +Copy information +.PP +A \fIcopy\fR is a definition of the form "A := B". +A copy is said to be \fIgenerated\fR in a basic block n if +it occurs in n and there is no subsequent assignment to B in n. +A copy is said to be \fIkilled\fR in n if: +.IP (i) +it occurs in n and there is a subsequent assignment to B within n, or +.IP (ii) +it occurs outside n, the definition A := B reaches the beginning of n +and B is changed in n (note that a copy also is a definition). +.LP +A copy \fIreaches\fR a point p, if there are no assignments to B +on any path in the control flow graph from the copy to p. +.PP +We define the following sets: +.IP C_GEN[b] 11 +the set of all copies in b generated in b. +.IP C_KILL[b] +the set of all copies killed in b. +.IP C_IN[b] +the set of all copies reaching the beginning of b. +.IP C_OUT[b] +the set of all copies reaching the end of b. +.LP +C_IN and C_OUT are computed by solving the following equations: +(root is the entry node of the current procedure; '*' denotes +set intersection) +.DS +(1) C_OUT[b] = C_IN[b] - C_KILL[b] + C_GEN[b] +(2) C_IN[b] = C_OUT[p1] * ... * C_OUT[pn], + where PRED(b) = {p1, ... , pn} and b /= root + C_IN[root] = {all copies} +.DE diff --git a/doc/ego/ud/ud3 b/doc/ego/ud/ud3 new file mode 100644 index 0000000..99bf2a0 --- /dev/null +++ b/doc/ego/ud/ud3 @@ -0,0 +1,26 @@ +.NH 2 +Pointers and subroutine calls +.PP +The theory outlined above assumes that variables can +only be changed by a direct assignment. +This condition does not hold for EM. +In case of an assignment through a pointer variable, +it is in general impossible to see which variable is affected +by the assignment. +Similar problems occur in the presence of procedure calls. +Therefore we distinguish two kinds of definitions: +.IP - +an \fIexplicit\fR definition is a direct assignment to one +specific variable +.IP - +an \fIimplicit\fR definition is the potential alteration of +a variable as a result of a procedure call or an indirect assignment. +.LP +An indirect assignment causes implicit definitions to +all variables that may be accessed indirectly, i.e. +all local variables for which no register message was generated +and all global variables. +If a procedure contains an indirect assignment it may change the +same set of variables, else it may change some global variables directly. +The KILL, GEN, IN and OUT sets contain explicit as well +as implicit definitions. diff --git a/doc/ego/ud/ud4 b/doc/ego/ud/ud4 new file mode 100644 index 0000000..c31ad64 --- /dev/null +++ b/doc/ego/ud/ud4 @@ -0,0 +1,78 @@ +.NH 2 +Implementation +.PP +UD first builds a number of tables: +.IP locals: 9 +contains information about the local variables of the +current procedure (offset,size,whether a register message was found +for it and, if so, the score field of that message) +.IP defs: +a table of all explicit definitions appearing in the +current procedure. +.IP copies: +a table of all copies appearing in the +current procedure. +.LP +Every variable (local as well as global), definition and copy +is identified by a unique number, which is the index +in the table. +All tables are constructed by traversing the EM code. +A fourth table, "vardefs" is used, indexed by a 'variable number', +which contains for every variable the set of explicit definitions of it. +Also, for each basic block b, the set CHGVARS containing all variables +changed by it is computed. +.PP +The GEN sets are obtained in one scan over the EM text, +by analyzing every EM instruction. +The KILL set of a basic block b is computed by looking at the +set of variables +changed by b (i.e. CHGVARS[b]). +For every such variable v, all explicit definitions to v +(i.e. vardefs[v]) that are not in GEN[b] are added to KILL[b]. +Also, the implicit defininition of v is added to KILL[b]. +Next, the data flow equations for use-definition information +are solved, +using a straight forward, iterative algorithm. +All sets are represented as bitvectors, so the operations +on sets (union, difference) can be implemented efficiently. +.PP +The C_GEN and C_KILL sets are computed simultaneously in one scan +over the EM text. +For every copy A := B appearing in basic block b we do +the following: +.IP 1. +for every basic block n /= b that changes B, see if the definition A := B +reaches the beginning of n (i.e. check if the index number of A := B in +the "defs" table is an element of IN[n]); +if so, add the copy to C_KILL[n] +.IP 2. +if B is redefined later on in b, add the copy to C_KILL[b], else +add it to C_GEN[b] +.LP +C_IN and C_OUT are computed from C_GEN and C_KILL via the second set of +data flow equations. +.PP +Finally, in one last scan all opportunities for optimization are +detected. +For every use u of a variable A, we check if +there is a unique explicit definition d reaching u. +.sp +If the definition is a copy A := B and B has the same value at d as +at u, then the use of A at u may be changed into B. +The latter condition can be verified as follows: +.IP - +if u and d are in the same basic block, see if there is +any assignment to B in between d and u +.IP - +if u and d are in different basic blocks, the condition is +satisfied if there is no assignment to B in the block of u prior to u +and d is in C_IN[b]. +.LP +Before the transformation is actually done, UD first makes sure the +alteration is really desirable, as described before. +The information needed for this purpose (access costs of local and +global variables) is read from a machine descriptor file. +.sp +If the only definition reaching u has the form "A := constant", the use +of A at u is replaced by the constant. + diff --git a/doc/ego/ud/ud5 b/doc/ego/ud/ud5 new file mode 100644 index 0000000..1d617e1 --- /dev/null +++ b/doc/ego/ud/ud5 @@ -0,0 +1,19 @@ + +.NH 2 +Source files of UD +.PP +The sources of UD are in the following files and packages: +.IP ud.h: 14 +declarations of global variables and data structures +.IP ud.c: +the routine main; initialization of target machine dependent tables +.IP defs: +routines to compute the GEN and KILL sets and routines to analyse +EM instructions +.IP const: +routines involved in constant propagation +.IP copy: +routines involved in copy propagation +.IP aux: +contains auxiliary routines +.LP diff --git a/doc/em/READ_ME b/doc/em/READ_ME new file mode 100644 index 0000000..1d0ae71 --- /dev/null +++ b/doc/em/READ_ME @@ -0,0 +1,6 @@ +This it the text of IR-81, +DESCRIPTION OF A MACHINE ARCHITECTURE FOR USE WITH BLOCK STRUCTURED LANGUAGES + +The file em.i (text of the defining interpreter) was hand-edited from int/em.p + +The directory int contains the interpreter. diff --git a/doc/em/app.codes.nr b/doc/em/app.codes.nr new file mode 100644 index 0000000..256e8b2 --- /dev/null +++ b/doc/em/app.codes.nr @@ -0,0 +1,153 @@ +.bp +.AP "EM CODE TABLES" +The following table is used by the assembler for EM machine +language. +It specifies the opcodes used for each instruction and +how arguments are mapped to machine language arguments. +The table is presented in three columns, +each line in each column contains three or four fields. +Each line describes a range of interpreter opcodes by +specifying for which instruction the range is used, the type of the +opcodes (mini, shortie, etc..) and range for the instruction +argument. +.QQ +The first field on each line gives the EM instruction mnemonic, +the second field gives some flags. +If the opcodes are minis or shorties the third field specifies +how many minis/shorties are used. +The last field gives the number of the (first) interpreter +opcode. +.LP +Flags : +.IP "" +Opcode type, only one of the following may be specified. +.RS +.IP \- +opcode without argument +.IP m +mini +.IP s +shortie +.IP 2 +opcode with 2-byte signed argument +.IP 4 +opcode with 4-byte signed argument +.IP 8 +opcode with 8-byte signed argument +.IP u +opcode with 2-byte unsigned argument +.RE +.IP "" +Secondary (escaped) opcodes. +.RS +.IP e +The opcode thus marked is in the secondary opcode group instead +of the primary +.RE +.IP "" +restrictions on arguments +.RS +.IP N +Negative arguments only +.IP P +Positive and zero arguments only +.RE +.IP "" +mapping of arguments +.RS +.IP w +argument must be divisible by the wordsize and is divided by the +wordsize before use as opcode argument. +.IP o +argument ( possibly after division ) must be >= 1 and is +decremented before use as opcode argument +.RE +.LP +If the opcode type is 2,4 or 8 the resulting argument is used as +opcode argument (least significant byte first). +If the opcode type is mini, the argument is added +to the first opcode \- if in range \- . +If the argument is negative, the absolute value minus one is +used in the algorithm above. +.br +For shorties with positive arguments the first opcode is used +for arguments in the range 0..255, the second for the range +256..511, etc.. +For shorties with negative arguments the first opcode is used +for arguments in the range \-1..\-256, the second for the range +\-257..\-512, etc.. +The byte following the opcode contains the least significant +byte of the argument. +First some examples of these specifications. +.IP "aar mwPo 1 34" +.br +Indicates that opcode 34 is used as a mini for Positive +instruction arguments only. +The w and o indicate division and decrementing of the +instruction argument. +Because the resulting argument must be zero ( only opcode 34 may be used), +this mini can only be used for instruction argument 2. +Conclusion: opcode 34 is for "AAR 2". +.IP "adp sP 1 41" +.br +Opcode 41 is used as shortie for ADP with arguments in the range +0..255. +.IP "bra sN 2 60" +.br +Opcode 60 is used as shortie for BRA with arguments \-1..\-256, +61 is used for arguments \-257..\-512. +.IP "zer e\- 145" +.br +Escaped opcode 145 is used for ZER. +.LP +The interpreter opcode table: +.DS +.so itables +.DE +.PP +The table above results in the following dispatch tables. +Dispatch tables are used by interpreters to jump to the +routines implementing the EM instructions, indexed by the next opcode. +Each line of the dispatch tables gives the routine names +of eight consecutive opcodes, preceded by the first opcode number +on that line. +Routine names consist of an EM mnemonic followed by a suffix. +The suffices show the encoding used for each opcode. +.LP +The following suffices exist: +.TS +tab(:); +l l. +.z:no arguments +.l:16-bit argument +.L:32-bit argument +.u:16-bit unsigned argument +.lw:16-bit argument divided by the wordsize +.Lw:32-bit argument divided by the wordsize +.p:positive 16-bit argument +.P:positive 32-bit argument +.pw:positive 16-bit argument divided by the wordsize +.Pw:positive 32-bit argument divided by the wordsize +.n:negative 16-bit argument +.N:negative 32-bit argument +.nw:negative 16-bit argument divided by the wordsize +.Nw:negative 32-bit argument divided by the wordsize +.s:shortie with as high order argument byte +.w:shortie with argument divided by the wordsize +.:mini with as argument +.W:mini with *wordsize as argument +.TE +.LP + is a possibly negative integer. +.LP +The dispatch table for the 256 primary opcodes: +.sp 1 +.so dispat1 +.sp 2 +The list of secondary opcodes (escape1): +.sp 1 +.so dispat2 +.sp 2 +Finally, the list of opcodes with four byte arguments (escape2). +.sp 1 +.so dispat3 diff --git a/doc/em/app.exam.nr b/doc/em/app.exam.nr new file mode 100644 index 0000000..3080d6a --- /dev/null +++ b/doc/em/app.exam.nr @@ -0,0 +1,275 @@ +.bp +.AP "AN EXAMPLE PROGRAM" +.PP +.na +.ta 4n 8n 12n 16n 20n +.nf + 1 program example(output); + 2 {This program just demonstrates typical EM code.} + 3 type rec = record r1: integer; r2:real; r3: boolean end; + 4 var mi: integer; mx:real; r:rec; + 5 + 6 function sum(a,b:integer):integer; + 7 begin + 8 sum := a + b + 9 end; +10 +11 procedure test(var r: rec); +12 label 1; +13 var i,j: integer; +14 x,y: real; +15 b: boolean; +16 c: char; +17 a: array[1..100] of integer; +18 +19 begin +20 j := 1; +21 i := 3 * j + 6; +22 x := 4.8; +23 y := x/0.5; +24 b := true; +25 c := 'z'; +26 for i:= 1 to 100 do a[i] := i * i; +27 r.r1 := j+27; +28 r.r3 := b; +29 r.r2 := x+y; +30 i := sum(r.r1, a[j]); +31 while i > 0 do begin j := j + r.r1; i := i - 1 end; +32 with r do begin r3 := b; r2 := x+y; r1 := 0 end; +33 goto 1; +34 1: writeln(j, i:6, x:9:3, b) +35 end; {test} +36 begin {main program} +37 mx := 15.96; +38 mi := 99; +39 test(r) +40 end. +.fi +.ad +.bp +The EM code as produced by the Pascal-VU compiler is given below. Comments +have been added manually. Note that this code has already been optimized. +.LP +.na +.nf +.ta 1n 24n + mes 2,2,2 ; wordsize 2, pointersize 2 +\&.1 + rom 't.p\e000' ; the name of the source file + hol 552,\-32768,0 ; externals and buf occupy 552 bytes + exp $sum ; sum can be called from other modules + pro $sum,2 ; procedure sum ; 2 bytes local storage + lin 8 ; code from source line 8 + ldl 0 ; load two locals ( a and b ) + adi 2 ; add them + ret 2 ; return the result + end 2 ; end of procedure ( still two bytes local storage ) +\&.2 + rom 1,99,2 ; descriptor of array a[] + exp $test ; the compiler exports all level 0 procedures + pro $test,226 ; procedure test, 226 bytes local storage +\&.3 + rom 4.8F8 ; assemble Floating point 4.8 (8 bytes) in +\&.4 ; global storage + rom 0.5F8 ; same for 0.5 + mes 3,\-226,2,2 ; compiler temporary not referenced by address + mes 3,\-24,2,0 ; the same is true for i, j, b and c in test + mes 3,\-22,2,0 + mes 3,\-4,2,0 + mes 3,\-2,2,0 + mes 3,\-20,8,0 ; and for x and y + mes 3,\-12,8,0 + lin 20 ; maintain source line number + loc 1 + stl \-4 ; j := 1 + lni ; lin 21 prior to optimization + lol \-4 + loc 3 + mli 2 + loc 6 + adi 2 + stl \-2 ; i := 3 * j + 6 + lni ; lin 22 prior to optimization + lae .3 + loi 8 + lal \-12 + sti 8 ; x := 4.8 + lni ; lin 23 prior to optimization + lal \-12 + loi 8 + lae .4 + loi 8 + dvf 8 + lal \-20 + sti 8 ; y := x / 0.5 + lni ; lin 24 prior to optimization + loc 1 + stl \-22 ; b := true + lni ; lin 25 prior to optimization + loc 122 + stl \-24 ; c := 'z' + lni ; lin 26 prior to optimization + loc 1 + stl \-2 ; for i:= 1 +2 + lol \-2 + dup 2 + mli 2 ; i*i + lal \-224 + lol \-2 + lae .2 + sar 2 ; a[i] := + lol \-2 + loc 100 + beq *3 ; to 100 do + inl \-2 ; increment i and loop + bra *2 +3 + lin 27 + lol \-4 + loc 27 + adi 2 ; j + 27 + sil 0 ; r.r1 := + lni ; lin 28 prior to optimization + lol \-22 ; b + lol 0 + stf 10 ; r.r3 := + lni ; lin 29 prior to optimization + lal \-20 + loi 16 + adf 8 ; x + y + lol 0 + adp 2 + sti 8 ; r.r2 := + lni ; lin 30 prior to optimization + lal \-224 + lol \-4 + lae .2 + lar 2 ; a[j] + lil 0 ; r.r1 + cal $sum ; call now + asp 4 ; remove parameters from stack + lfr 2 ; get function result + stl \-2 ; i := +4 + lin 31 + lol \-2 + zle *5 ; while i > 0 do + lol \-4 + lil 0 + adi 2 + stl \-4 ; j := j + r.r1 + del \-2 ; i := i - 1 + bra *4 ; loop +5 + lin 32 + lol 0 + stl \-226 ; make copy of address of r + lol \-22 + lol \-226 + stf 10 ; r3 := b + lal \-20 + loi 16 + adf 8 + lol \-226 + adp 2 + sti 8 ; r2 := x + y + loc 0 + sil \-226 ; r1 := 0 + lin 34 ; note the absence of the unnecessary jump + lae 22 ; address of output structure + lol \-4 + cal $_wri ; write integer with default width + asp 4 ; pop parameters + lae 22 + lol \-2 + loc 6 + cal $_wsi ; write integer width 6 + asp 6 + lae 22 + lal \-12 + loi 8 + loc 9 + loc 3 + cal $_wrf ; write fixed format real, width 9, precision 3 + asp 14 + lae 22 + lol \-22 + cal $_wrb ; write boolean, default width + asp 4 + lae 22 + cal $_wln ; writeln + asp 2 + ret 0 ; return, no result + end 226 + exp $_main + pro $_main,0 ; main program +\&.6 + con 2,\-1,22 ; description of external files +\&.5 + rom 15.96F8 + fil .1 ; maintain source file name + lae .6 ; description of external files + lae 0 ; base of hol area to relocate buffer addresses + cal $_ini ; initialize files, etc... + asp 4 + lin 37 + lae .5 + loi 8 + lae 2 + sti 8 ; mx := 15.96 + lni ; lin 38 prior to optimization + loc 99 + ste 0 ; mi := 99 + lni ; lin 39 prior to optimization + lae 10 ; address of r + cal $test + asp 2 + loc 0 ; normal exit + cal $_hlt ; cleanup and finish + asp 2 + end 0 + mes 5 ; reals were used +.fi +.ad +.PP +The compact code corresponding to the above program is listed below. +Read it horizontally, line by line, not column by column. +Each number represents a byte of compact code, printed in decimal. +The first two bytes form the magic word. +.LP +.Dr 33 + 173 0 159 122 122 122 255 242 1 161 250 124 116 46 112 0 + 255 156 245 40 2 245 0 128 120 155 249 123 115 117 109 160 + 249 123 115 117 109 122 67 128 63 120 3 122 88 122 152 122 + 242 2 161 121 219 122 255 155 249 124 116 101 115 116 160 249 + 124 116 101 115 116 245 226 0 242 3 161 253 128 123 52 46 + 56 255 242 4 161 253 128 123 48 46 53 255 159 123 245 30 + 255 122 122 255 159 123 96 122 120 255 159 123 98 122 120 255 + 159 123 116 122 120 255 159 123 118 122 120 255 159 123 100 128 + 120 255 159 123 108 128 120 255 67 140 69 121 113 116 68 73 + 116 69 123 81 122 69 126 3 122 113 118 68 57 242 3 72 + 128 58 108 112 128 68 58 108 72 128 57 242 4 72 128 44 + 128 58 100 112 128 68 69 121 113 98 68 69 245 122 0 113 + 96 68 69 121 113 118 182 73 118 42 122 81 122 58 245 32 + 255 73 118 57 242 2 94 122 73 118 69 220 10 123 54 118 + 18 122 183 67 147 73 116 69 147 3 122 104 120 68 73 98 + 73 120 111 130 68 58 100 72 136 2 128 73 120 4 122 112 + 128 68 58 245 32 255 73 116 57 242 2 59 122 65 120 20 + 249 123 115 117 109 8 124 64 122 113 118 184 67 151 73 118 + 128 125 73 116 65 120 3 122 113 116 41 118 18 124 185 67 + 152 73 120 113 245 30 255 73 98 73 245 30 255 111 130 58 + 100 72 136 2 128 73 245 30 255 4 122 112 128 69 120 104 + 245 30 255 67 154 57 142 73 116 20 249 124 95 119 114 105 + 8 124 57 142 73 118 69 126 20 249 124 95 119 115 105 8 + 126 57 142 58 108 72 128 69 129 69 123 20 249 124 95 119 + 114 102 8 134 57 142 73 98 20 249 124 95 119 114 98 8 + 124 57 142 20 249 124 95 119 108 110 8 122 88 120 152 245 + 226 0 155 249 125 95 109 97 105 110 160 249 125 95 109 97 + 105 110 120 242 6 151 122 119 142 255 242 5 161 253 128 125 + 49 53 46 57 54 255 50 242 1 57 242 6 57 120 20 249 + 124 95 105 110 105 8 124 67 157 57 242 5 72 128 57 122 + 112 128 68 69 219 110 120 68 57 130 20 249 124 116 101 115 + 116 8 122 69 120 20 249 124 95 104 108 116 8 122 152 120 + 159 124 160 255 159 125 255 +.De diff --git a/doc/em/assem.nr b/doc/em/assem.nr new file mode 100644 index 0000000..59ea755 --- /dev/null +++ b/doc/em/assem.nr @@ -0,0 +1,802 @@ +.bp +.P1 "EM ASSEMBLY LANGUAGE" +.PP +We use two representations for assembly language programs, +one is in ASCII and the other is the compact assembly language. +The latter needs less space than the first for the same program +and therefore allows faster processing. +Our only program accepting ASCII assembly +language converts it to the compact form. +All other programs expect compact assembly input. +The first part of the chapter describes the ASCII assembly +language and its semantics. +The second part describes the syntax of the compact assembly +language. +The last part lists the EM instructions with the type of +arguments allowed and an indication of the function. +Appendix A gives a detailed description of the effect of all +instructions in the form of a Pascal program. +.P2 "ASCII assembly language" +.PP +An assembly language program consists of a series of lines, each +line may be blank, contain one (pseudo)instruction or contain one +label. +Input to the assembler is in lower case. +Upper case is used in this +document merely to distinguish keywords from the surrounding prose. +Comment is allowed at the end of each line and starts with a semicolon ";". +This kind of comment does not exist in the compact form. +.QQ +Labels must be placed all by themselves on a line and start in +column 1. +There are two kinds of labels, instruction and data labels. +Instruction labels are unsigned positive integers. +The scope of an instruction label is its procedure. +.QQ +The pseudoinstructions CON, ROM and BSS may be preceded by a +line containing a +1\-8 character data label, the first character of which is a +letter, period or underscore. +The period may only be followed by +digits, the others may be followed by letters, digits and underscores. +The use of the character "." followed by a constant, +which must be in the range 1 to 32767 (e.g. ".40") is recommended +for compiler +generated programs. +These labels are considered as a special case and handled +more efficiently in compact assembly language (see below). +Note that a data label on its own or two consecutive labels are not +allowed. +.PP +Each statement may contain an instruction mnemonic or pseudoinstruction. +These must begin in column 2 or later (not column 1) and must be followed +by a space, tab, semicolon or LF. +Everything on the line following a semicolon is +taken as a comment. +.PP +Each input file contains one module. +A module may contain many procedures, +which may be nested. +A procedure consists of +a PRO statement, a (possibly empty) +collection of instructions and pseudoinstructions and finally an END +statement. +Pseudoinstructions are also allowed between procedures. +They do not belong to a specific procedure. +.PP +All constants in EM are interpreted in the decimal base. +The ASCII assembly language accepts constant expressions +wherever constants are allowed. +The operators recognized are: +, \-, *, % and / with the usual +precedence order. +Use of the parentheses ( and ) to alter the precedence order is allowed. +.P3 "Instruction arguments" +.PP +Unlike many other assembly languages, the EM assembly +language requires all arguments of normal and pseudoinstructions +to be either a constant or an identifier, but not a combination +of these two. +There is one exception to this rule: when a data label is used +for initialization or as an instruction argument, +expressions of the form 'label+constant' and 'label-constant' +are allowed. +This makes it possible to address, for example, the +third word of a ten word BSS block +directly. +Thus LOE LABEL+4 is permitted and so is CON LABEL+3. +The resulting address is must be in the same fragment as the label. +It is not allowed to add or subtract from instruction labels or procedure +identifiers, +which certainly is not a severe restriction and greatly aids +optimization. +.PP +Instruction arguments can be constants, +data labels, data labels offsetted by a constant, instruction +labels and procedure identifiers. +The range of integers allowed depends on the instruction. +Most instructions allow only integers +(signed or unsigned) +that fit in a word. +Arguments used as offsets to pointers should fit in a +pointer-sized integer. +Finally, arguments to LDC should fit in a double-word integer. +.PP +Several instructions have two possible forms: +with an explicit argument and with an implicit argument on top of the stack. +The size of the implicit argument is the wordsize. +The implicit argument is always popped before all other operands. +For example: 'CMI 4' specifies that two four-byte signed +integers on top of the stack are to be compared. +\&'CMI' without an argument expects a wordsized integer +on top of the stack that specifies the size of the integers to +be compared. +Thus the following two sequences are equivalent: +.KS +.TS +center, tab(:) ; +l r 30 l r. +LDL:\-10:LDL:\-10 +LDL:\-14:LDL:\-14 +::LOC:4 +CMI:4:CMI: +ZEQ:*1:ZEQ:*1 +.TE +.KE +Section 11.1.6 shows the arguments allowed for each instruction. +.P3 "Pseudoinstruction arguments" +.PP +Pseudoinstruction arguments can be divided in two classes: +Initializers and others. +The following initializers are allowed: signed integer constants, +unsigned integer constants, floating-point constants, strings, +data labels, data labels offsetted by a constant, instruction +labels and procedure identifiers. +.PP +Constant initializers in BSS, HOL, CON and ROM pseudoinstructions +can be followed by a letter I, U or F. +This indicator +specifies the type of the initializer: Integer, Unsigned or Float. +If no indicator is present I is assumed. +The size of the initializer is the wordsize unless +the indicator is followed by an integer specifying the +initializer's size. +This integer is governed by the same restrictions as for +transfer of objects to/from memory. +As in instruction arguments, initializers include expressions of the form: +\&"LABEL+offset" and "LABEL\-offset". +The offset must be an unsigned decimal constant. +The 'IUF' indicators cannot be used in the offsets. +.PP +Data labels are referred to by their name. +.PP +Strings are surrounded by double quotes ("). +Semicolon's in string do not indicate the start of comment. +In the ASCII representation the escape character \e (backslash) +alters the meaning of subsequent character(s). +This feature allows inclusion of zeroes, graphic characters and +the double quote in the string. +The following escape sequences exist: +.TS +center, tab(:); +l l l. +newline:NL\|(LF):\en +horizontal tab:HT:\et +backspace:BS:\eb +carriage return:CR:\er +form feed:FF:\ef +backslash:\e:\e\e +double quote:":\e" +bit pattern:\fBddd\fP:\e\fBddd\fP +.TE +The escape \fB\eddd\fP consists of the backslash followed by 1, +2, or 3 octal digits specifying the value of +the desired character. +If the character following a backslash is not one of those +specified, +the backslash is ignored. +Example: CON "hello\e012\e0". +Each string element initializes a single byte. +The ASCII character set is used to map characters onto values. +.PP +Instruction labels are referred to as *1, *2, etc. in both branch +instructions and as initializers. +.PP +The notation $procname means the identifier for the procedure +with the specified name. +This identifier has the size of a pointer. +.P3 Notation +.PP +First, the notation used for the arguments, classes of +instructions and pseudoinstructions. +.DS +.TS +tab(:); +l l l. +:\&=:integer constant (current range \-2**31..2**31\-1) +:\&=:data label +:\&=: or or + or \- +:\&=:integer constant, unsigned constant, floating-point constant +:\&=:string constant (surrounded by double quotes), +:\&=:instruction label +::'*' followed by an integer in the range 0..32767. +:\&=:procedure number ('$' followed by a procedure name) +:\&=:, , or . +:\&=: or +<...>*:\&=:zero or more of <...> +<...>+:\&=:one or more of <...> +[...]:\&=:optional ... +.TE +.DE +.P3 "Pseudoinstructions" +.P4 "Storage declaration" +.PP +Initialized global data is allocated by the pseudoinstruction CON, +which needs at least one argument. +Each argument is used to allocate and initialize a number of +consecutive bytes in data memory. +The number of bytes to be allocated and the alignment depend on the type +of the argument. +For each argument, an integral number of words, +determined by the argument type, is allocated and initialized. +.PP +The pseudoinstruction ROM is the same as CON, +except that it guarantees that the initialized words +will not change during the execution of the program. +This information allows optimizers to do +certain calculations such as array indexing and +subrange checking at compile time instead +of at run time. +.PP +The pseudoinstruction BSS allocates +uninitialized global data or large blocks of data initialized +by the same value. +The first argument to this pseudo is the number +of bytes required, which must be a multiple of the wordsize. +The other arguments specify the value used for initialization and +whether the initialization is only for convenience or a strict necessity. +The pseudoinstruction HOL is similar to BSS in that it requests an +(un)initialized global data block. +Addressing of a HOL block, however, is quasi absolute. +The first byte is addressed by 0, +the second byte by 1 etc. in assembly language. +The assembler/loader adds the base address of +the HOL block to these numbers to obtain the +absolute address in the machine language. +.PP +The scope of a HOL block starts at the HOL pseudo and +ends at the next HOL pseudo or at the end of a module +whatever comes first. +Each instruction falls in the scope of at most one +HOL block, the current HOL block. +It is not allowed to have more than one HOL block per procedure. +.PP +The alignment restrictions are enforced by the +pseudoinstructions. +All initializers are aligned on a multiple of their size or the wordsize +whichever is smaller. +Strings form an exception, they are to be seen as a sequence of initializers +each for one byte, i.e. strings are not padded with zero bytes. +Switching to another type of fragment or placing a label forces +word-alignment. +There are three types of fragments in global data space: CON, ROM and +BSS/HOL. +.IP "BSS ,," +.br +Reserve bytes. + is the value used to initialize the area. + must be a multiple of the size of . + is 0 if the initialization is not strictly necessary, +1 if it is. +.IP "HOL ,," +.br +Idem, but all following absolute global data references will +refer to this block. +Only one HOL is allowed per procedure, +it has to be placed before the first instruction. +.IP "CON +" +.br +Assemble global data words initialized with the constants. +.IP "ROM +" +.br +Idem, but the initialized data will never be changed by the program. +.P4 "Partitioning" +.PP +Two pseudoinstructions partition the input into procedures: +.IP "PRO [,]" +.br +Start of procedure. + is the procedure name. + is the number of bytes for locals. +The number of bytes for locals must be specified in the PRO or +END pseudoinstruction. +When specified in both, they must be identical. +.IP "END []" +.br +End of Procedure. + is the number of bytes for locals. +The number of bytes for locals must be specified in either the PRO or +END pseudoinstruction or both. +.P4 "Visibility" +.PP +Names of data and procedures in an EM module can either be +internal or external. +External names are known outside the module and are used to link +several pieces of a program. +Internal names are not known outside the modules they are used in. +Other modules will not 'see' an internal name. +.QQ +To reduce the number of passes needed, +it must be known at the first occurrence whether +a name is internal or external. +If the first occurrence of a name is in a definition, +the name is considered to be internal. +If the first occurrence of a name is a reference, +the name is considered to be external. +If the first occurrence is in one of the following pseudoinstructions, +the effect of the pseudo has precedence. +.IP "EXA " +.br +External name. + is known, possibly defined, outside this module. +Note that may be defined in the same module. +.IP "EXP " +.br +External procedure identifier. +Note that may be defined in the same module. +.IP "INA " +.br +Internal name. + is internal to this module and must be defined in this module. +.IP "INP " +.br +Internal procedure. + is internal to this module and must be defined in this module. +.P4 "Miscellaneous" +.PP +Two other pseudoinstructions provide miscellaneous features: +.IP "EXC ," +.br +Two blocks of instructions preceding this one are +interchanged before being processed. + gives the number of lines of the first block. + gives the number of lines of the second one. +Blank and pure comment lines do not count. +This instruction is obsolete. Its use is strongly discouraged. +.IP "MES [,]*" +.br +A special type of comment. +Used by compilers to communicate with the +optimizer, assembler, etc. as follows: +.RS +.IP "MES 0" +.br +An error has occurred, stop further processing. +.IP "MES 1" +.br +Suppress optimization. +.IP "MES 2,," +.br +Use wordsize and pointer size . +.IP "MES 3,,,," +.br +Indicates that a local variable is never referenced indirectly. +Used to indicate that a register may be used for a specific +variable. + is offset in bytes from AB if positive +and offset from LB if negative. + gives the size of the variable. + indicates the class of the variable. +The following values are currently recognized: +.br +0\0\0\0The variable can be used for anything. +.br +1\0\0\0The variable is used as a loopindex. +.br +2\0\0\0The variable is used as a pointer. +.br +3\0\0\0The variable is used as a floating point number. +.br + gives the priority of the variable, +higher numbers indicate better candidates. +.IP "MES 4,," +.br +Number of source lines in file (for profiler). +.IP "MES 5" +.br +Floating point used. +.IP "MES 6,*" +.br +Comment. Used to provide comments in compact assembly language. +.IP "MES 7,....." +.br +Reserved. +.IP "MES 8,[,]..." +.br +Library module. Indicates that the module may only be loaded +if it is useful, that is, if it can satisfy any unresolved +references during the loading process. +May not be preceded by any other pseudo, except MES's. +.IP "MES 9," +.br +Guarantees that no more than bytes of parameters are +accessed, either directly or indirectly. +.IP "MES 10,[,]* +.br +This message number is reserved for the global optimizer. +It inserts these messages in its output as hints to backends. + indicates the type of hint. +.IP "MES 11" +.br +Procedures containing this message are possible destinations of +non-local goto's with the GTO instruction. +Some backends keep locals in registers, +the locals in this procedure should not be kept in registers and +all registers containing locals of other procedures should be +saved upon entry to this procedure. +.RE +.IP "" +Each backend is free to skip irrelevant MES pseudos. +.P2 "The Compact Assembly Language" +.PP +The assembler accepts input in a highly encoded form. +This +form is intended to reduce the amount of file transport between the +front ends, optimizers +and back ends, and also reduces the amount of storage required for storing +libraries. +Libraries are stored as archived compact assembly language, not machine +language. +.PP +When beginning to read the input, the assembler is in neutral state, and +expects either a label or an instruction (including the pseudoinstructions). +The meaning of the next byte(s) when in neutral state is as follows, where +b1, b2 +etc. represent the succeeding bytes. +.TS +tab(:); +rw17 4 l. +0:Reserved for future use +1\-129:Machine instructions, see Appendix A, alphabetical list +130\-149:Reserved for future use +150\-161:BSS,CON,END,EXA,EXC,EXP,HOL,INA,INP,MES,PRO,ROM +162\-179:Reserved for future pseudoinstructions +180\-239:Instruction labels 0 \- 59 (180 is local label 0 etc.) +240\-244:See the Common Table below +245\-255:Not used +.TE +After a label, the assembler is back in neutral state; it can immediately +accept another label or an instruction in the next byte. +No linefeeds are used to separate lines. +.PP +If an opcode expects no arguments, +the assembler is back in neutral state after +reading the one byte containing the instruction number. +If it has one or +more arguments (only pseudos have more than 1), the arguments follow directly, +encoded as follows: +.TS +tab(:); +r l. +0\-239:Offsets from \-120 to 119 +240\-255:See the Common Table below +.TE +Absence of an optional argument is indicated by a special +byte. +.TS +tab(:); +c s s s +c c s c +l4 l l4 l. +Common Table for Neutral State and Arguments +class:bytes:description + +:240:b1:Instruction label b1 (Not used for branches) +:241:b1 b2:16 bit instruction label (256*b2 + b1) +:242:b1:Global label .0\-.255, with b1 being the label +:243:b1 b2:Global label .0\-.32767 +:::with 256*b2+b1 being the label +:244::Global symbol not of the form .nnn +:245:b1 b2:16 bit constant +:246:b1 b2 b3 b4:32 bit constant +:247:b1 .. b8:64 bit constant +:248::Global label + (possibly negative) constant +:249::Procedure name (not including $) +:250::String used in CON or ROM (no quotes-no escapes) +:251::Integer constant, size bytes +:252::Unsigned constant, size bytes +:253::Floating constant, size bytes +:254::unused +:255::Delimiter for argument lists or +:::indicates absence of optional argument +.TE 1 +.PP +The bytes specifying the value of a 16, 32 or 64 bit constant +are presented in two's complement notation, with the least +significant byte first. For example: the value of a 32 bit +constant is ((s4*256+b3)*256+b2)*256+b1, where s4 is b4\-256 if +b4 is greater than 128 else s4 takes the value of b4. +A consists of a immediately followed by +a sequence of bytes with length . +.PP +.ne 8 +The pseudoinstructions fall into several categories, depending on their +arguments: +.DS +Group 1 \- EXC, BSS, HOL have a known number of arguments +Group 2 \- EXA, EXP, INA, INP have a string as argument +Group 3 \- CON, MES, ROM have a variable number of various things +Group 4 \- END, PRO have a trailing optional argument. +.DE +Groups 1 and 2 +use the encoding described above. +Group 3 also uses the encoding listed above, with an byte after the +last argument to indicate the end of the list. +Group 4 uses +an byte if the trailing argument is not present. +.TS +tab(|); +l s l +l s s +l 2 lw(30) l. +Example ASCII|Example compact +(LOC = 69, BRA = 18 here): + +2||182 +1||181 +\0LOC|10|69 130 +\0LOC|\-10|69 110 +\0LOC|300|69 245 44 1 +\0BRA|*19|18 139 +300||241 44 1 +.3||242 3 +\0CON|4,9,*2,$foo|151 124 129 240 2 249 123 102 111 111 255 +\0CON|.35|151 242 35 255 +.TE +.P2 "Assembly language instruction list" +.PP +For each instruction in the list the range of argument values +in the assembly language is given. +The column headed \fIassem\fP contains the mnemonics defined +in 11.1.3. +The following column specifies restrictions of the argument +value. +Addresses have to obey the restrictions mentioned in chapter 2. +The classes of arguments +are indicated by letters: +.ds b \fBb\fP +.ds c \fBc\fP +.ds d \fBd\fP +.ds g \fBg\fP +.ds f \fBf\fP +.ds l \fBl\fP +.ds n \fBn\fP +.ds w \fBw\fP +.ds p \fBp\fP +.ds r \fBr\fP +.ds s \fBs\fP +.ds z \fBz\fP +.ds o \fBo\fP +.ds - \fB\-\fP +.sp +.TS +tab(:); +c s l l +l l 15 l l. +\fIassem\fP:constraints:rationale + +\&\*c:cst:fits word:constant +\&\*d:cst:fits double word:constant +\&\*l:cst::local offset +\&\*g:arg:>= 0:global offset +\&\*f:cst::fragment offset +\&\*n:cst:>= 0:counter +\&\*s:cst:>0 , word multiple:object size +\&\*z:cst:>= 0 , zero or word multiple:object size +\&\*o:cst:> 0 , word multiple or fraction:object size +\&\*w:cst:> 0 , word multiple:object size * +\&\*p:pro::pro identifier +\&\*b:ilb:>= 0:label number +\&\*r:cst:0,1,2:register number +\&\*-:::no argument +.TE +.PP +The * at the rationale for \*w indicates that the argument +can either be given as argument or on top of the stack. +If the argument is omitted, the argument is fetched from the +stack; +it is assumed to be a wordsized unsigned integer. +Instructions that check for undefined integer or floating-point +values and underflow or overflow +are indicated below by (*). +.sp 1 +.DS +.ta 12n +GROUP 1 \- LOAD + + LOC \*c : Load constant (i.e. push one word onto the stack) + LDC \*d : Load double constant ( push two words ) + LOL \*l : Load word at \*l-th local (\*l<0) or parameter (\*l>=0) + LOE \*g : Load external word \*g + LIL \*l : Load word pointed to by \*l-th local or parameter + LOF \*f : Load offsetted (top of stack + \*f yield address) + LAL \*l : Load address of local or parameter + LAE \*g : Load address of external + LXL \*n : Load lexical (address of LB \*n static levels back) + LXA \*n : Load lexical (address of AB \*n static levels back) + LOI \*o : Load indirect \*o bytes (address is popped from the stack) + LOS \*w : Load indirect, \*w-byte integer on top of stack gives object size + LDL \*l : Load double local or parameter (two consecutive words are stacked) + LDE \*g : Load double external (two consecutive externals are stacked) + LDF \*f : Load double offsetted (top of stack + \*f yield address) + LPI \*p : Load procedure identifier +.DE + +.DS +GROUP 2 \- STORE + + STL \*l : Store local or parameter + STE \*g : Store external + SIL \*l : Store into word pointed to by \*l-th local or parameter + STF \*f : Store offsetted + STI \*o : Store indirect \*o bytes (pop address, then data) + STS \*w : Store indirect, \*w-byte integer on top of stack gives object size + SDL \*l : Store double local or parameter + SDE \*g : Store double external + SDF \*f : Store double offsetted +.DE + +.DS +GROUP 3 \- INTEGER ARITHMETIC + + ADI \*w : Addition (*) + SBI \*w : Subtraction (*) + MLI \*w : Multiplication (*) + DVI \*w : Division (*) + RMI \*w : Remainder (*) + NGI \*w : Negate (two's complement) (*) + SLI \*w : Shift left (*) + SRI \*w : Shift right (*) +.DE + +.DS +GROUP 4 \- UNSIGNED ARITHMETIC + + ADU \*w : Addition + SBU \*w : Subtraction + MLU \*w : Multiplication + DVU \*w : Division + RMU \*w : Remainder + SLU \*w : Shift left + SRU \*w : Shift right +.DE + +.DS +GROUP 5 \- FLOATING POINT ARITHMETIC + + ADF \*w : Floating add (*) + SBF \*w : Floating subtract (*) + MLF \*w : Floating multiply (*) + DVF \*w : Floating divide (*) + NGF \*w : Floating negate (*) + FIF \*w : Floating multiply and split integer and fraction part (*) + FEF \*w : Split floating number in exponent and fraction part (*) +.DE + +.DS +GROUP 6 \- POINTER ARITHMETIC + + ADP \*f : Add \*f to pointer on top of stack + ADS \*w : Add \*w-byte value and pointer + SBS \*w : Subtract pointers in same fragment and push diff as size \*w integer +.DE + +.DS +GROUP 7 \- INCREMENT/DECREMENT/ZERO + + INC \*- : Increment word on top of stack by 1 (*) + INL \*l : Increment local or parameter (*) + INE \*g : Increment external (*) + DEC \*- : Decrement word on top of stack by 1 (*) + DEL \*l : Decrement local or parameter (*) + DEE \*g : Decrement external (*) + ZRL \*l : Zero local or parameter + ZRE \*g : Zero external + ZRF \*w : Load a floating zero of size \*w + ZER \*w : Load \*w zero bytes +.DE + +.DS +GROUP 8 \- CONVERT (stack: source, source size, dest. size (top)) + + CII \*- : Convert integer to integer (*) + CUI \*- : Convert unsigned to integer (*) + CFI \*- : Convert floating to integer (*) + CIF \*- : Convert integer to floating (*) + CUF \*- : Convert unsigned to floating (*) + CFF \*- : Convert floating to floating (*) + CIU \*- : Convert integer to unsigned + CUU \*- : Convert unsigned to unsigned + CFU \*- : Convert floating to unsigned +.DE + +.DS +GROUP 9 \- LOGICAL + + AND \*w : Boolean and on two groups of \*w bytes + IOR \*w : Boolean inclusive or on two groups of \*w bytes + XOR \*w : Boolean exclusive or on two groups of \*w bytes + COM \*w : Complement (one's complement of top \*w bytes) + ROL \*w : Rotate left a group of \*w bytes + ROR \*w : Rotate right a group of \*w bytes +.DE + +.DS +GROUP 10 \- SETS + + INN \*w : Bit test on \*w byte set (bit number on top of stack) + SET \*w : Create singleton \*w byte set with bit n on (n is top of stack) +.DE + +.DS +GROUP 11 \- ARRAY + + LAR \*w : Load array element, descriptor contains integers of size \*w + SAR \*w : Store array element + AAR \*w : Load address of array element +.DE + +.DS +GROUP 12 \- COMPARE + + CMI \*w : Compare \*w byte integers, Push negative, zero, positive for <, = or > + CMF \*w : Compare \*w byte reals + CMU \*w : Compare \*w byte unsigneds + CMS \*w : Compare \*w byte values, can only be used for bit for bit equality test + CMP \*- : Compare pointers + + TLT \*- : True if less, i.e. iff top of stack < 0 + TLE \*- : True if less or equal, i.e. iff top of stack <= 0 + TEQ \*- : True if equal, i.e. iff top of stack = 0 + TNE \*- : True if not equal, i.e. iff top of stack non zero + TGE \*- : True if greater or equal, i.e. iff top of stack >= 0 + TGT \*- : True if greater, i.e. iff top of stack > 0 +.DE + +.DS +GROUP 13 \- BRANCH + + BRA \*b : Branch unconditionally to label \*b + + BLT \*b : Branch less (pop 2 words, branch if top > second) + BLE \*b : Branch less or equal + BEQ \*b : Branch equal + BNE \*b : Branch not equal + BGE \*b : Branch greater or equal + BGT \*b : Branch greater + + ZLT \*b : Branch less than zero (pop 1 word, branch negative) + ZLE \*b : Branch less or equal to zero + ZEQ \*b : Branch equal zero + ZNE \*b : Branch not zero + ZGE \*b : Branch greater or equal zero + ZGT \*b : Branch greater than zero +.DE + +.DS +GROUP 14 \- PROCEDURE CALL + + CAI \*- : Call procedure (procedure identifier on stack) + CAL \*p : Call procedure (with identifier \*p) + LFR \*s : Load function result + RET \*z : Return (function result consists of top \*z bytes) +.DE + +.DS +GROUP 15 \- MISCELLANEOUS + + ASP \*f : Adjust the stack pointer by \*f + ASS \*w : Adjust the stack pointer by \*w-byte integer + BLM \*z : Block move \*z bytes; first pop destination addr, then source addr + BLS \*w : Block move, size is in \*w-byte integer on top of stack + CSA \*w : Case jump; address of jump table at top of stack + CSB \*w : Table lookup jump; address of jump table at top of stack + DCH \*- : Follow dynamic chain, convert LB to LB of caller + DUP \*s : Duplicate top \*s bytes + DUS \*w : Duplicate top \*w bytes + EXG \*w : Exchange top \*w bytes + FIL \*g : File name (external 4 := \*g) + GTO \*g : Non-local goto, descriptor at \*g + LIM \*- : Load 16 bit ignore mask + LIN \*n : Line number (external 0 := \*n) + LNI \*- : Line number increment + LOR \*r : Load register (0=LB, 1=SP, 2=HP) + LPB \*- : Convert local base to argument base + MON \*- : Monitor call + NOP \*- : No operation + RCK \*w : Range check; trap on error + RTT \*- : Return from trap + SIG \*- : Trap errors to proc identifier on top of stack, \-2 resets default + SIM \*- : Store 16 bit ignore mask + STR \*r : Store register (0=LB, 1=SP, 2=HP) + TRP \*- : Cause trap to occur (Error number on stack) +.DE diff --git a/doc/em/cont.nr b/doc/em/cont.nr new file mode 100644 index 0000000..e61369f --- /dev/null +++ b/doc/em/cont.nr @@ -0,0 +1,4 @@ +.de PT +.. +.bp +.Ct diff --git a/doc/em/descr.nr b/doc/em/descr.nr new file mode 100644 index 0000000..7035a6e --- /dev/null +++ b/doc/em/descr.nr @@ -0,0 +1,153 @@ +.bp +.P1 "DESCRIPTORS" +.PP +Several instructions use descriptors, notably the range check instruction, +the array instructions, the goto instruction and the case jump instructions. +Descriptors reside in data space. +They may be constructed at run time, but +more often they are fixed and allocated in ROM data. +.PP +All instructions using descriptors, except GTO, have as argument +the size of the integers in the descriptor. +All implementations have to allow integers of the size of a +word in descriptors. +All integers popped from the stack and used for indexing or comparing +must have the same size as the integers in the descriptor. +.P2 "Range check descriptors" +.PP +Range check descriptors consist of two integers: +.IP 1. +lower bound signed +.IP 2. +upper bound signed +.LP +The range check instruction checks an integer on the stack against +these bounds and causes a trap if the value is outside the interval. +The value itself is neither changed nor removed from the stack. +.P2 "Array descriptors" +.PP +Each array descriptor describes a single dimension. +For multi-dimensional arrays, several array instructions are +needed to access a single element. +Array descriptors contain the following three integers: +.IP 1. +lower bound signed +.IP 2. +upper bound \- lower bound unsigned +.IP 3. +number of bytes per element unsigned +.LP +The array instructions LAR, SAR and AAR have the pointer to the start +of the descriptor as operand on the stack. +.LP +The element A[I] is fetched as follows: +.IP 1. +Stack the address of A (e.g., using LAE or LAL) +.IP 2. +Stack the value of I (n-byte integer) +.IP 3. +Stack the pointer to the descriptor (e.g., using LAE) +.IP 4. +LAR n (n is the size of the integers in the descriptor and I) +.LP +All array instructions first pop the address of the descriptor +and the index. +If the index is not within the bounds specified, a trap occurs. +If ok, (I~\-~lower bound) is multiplied +by the number of bytes per element (the third word). The result is added +to the address of A and replaces A on the stack. +.QQ +At this point LAR, SAR and AAR diverge. +AAR is finished. LAR pops the address and fetches the data +item, +the size being specified by the descriptor. +The usual restrictions for memory access must be obeyed. +SAR pops the address and stores the +data item now exposed. +.P2 "Non-local goto descriptors" +.PP +The GTO instruction provides a way of returning directly to any +active procedure invocation. +The argument of the instruction is the address of a descriptor +containing three pointers: +.IP 1. +value of PC after the jump +.IP 2. +value of SP after the jump +.IP 3. +value of LB after the jump +.LP +GTO replaces the loads PC, SP and LB from the descriptor, +thereby jumping to a procedure +and removing zero or more frames from the stack. +The LB, SP and PC in the descriptor must belong to a +dynamically enclosing procedure, +because some EM implementations will need to backtrack through +the dynamic chain and use the implementation dependent data +in frames to restore registers etc. +.P2 "Case descriptors" +.PP +The case jump instructions CSA and CSB both +provide multiway branches selected by a case index. +Both fetch two operands from the stack: +first a pointer to the low address of the case descriptor +and then the case index. +CSA uses the case index as index in the descriptor table, but CSB searches +the table for an occurrence of the case index. +Therefore, the descriptors for CSA and CSB, +as shown in figure 4, are different. +All pointers in the table must be addresses of instructions in the +procedure executing the case instruction. +.PP +CSA selects the new PC by indexing. +If the index, a signed integer, is greater than or equal to +the lower bound and less than or equal to the upper bound, +then fetch the new PC from the list of instruction pointers by indexing with +index-lower. +The table does not contain the value of the upper bound, +but the value of upper-lower as an unsigned integer. +The default instruction pointer is used when the index is out of bounds. +If the resulting PC is 0, then trap. +.PP +CSB selects the new PC by searching. +The table is searched for an entry with index value equal to the case index. +That entry or, if none is found, the default entry contains the +new PC. +When the resulting PC is 0, a trap is performed. +.PP +The choice of which case instruction to use for +each source language case statement +is up to the front end. +If the range of the index value is dense, i.e +.DS +(highest value \- lowest value) / number of cases +.DE +is less than some threshold, then CSA is the obvious choice. +If the range is sparse, CSB is better. +.Dr 30 + |--------------------| |--------------------| high address + | pointer for upb | | pointer n-1 | + |--------------------| |- - - - - - - | + | . | | index n-1 | + | . | |--------------------| + | . | | . | + | . | | . | + | . | | . | + | . | |--------------------| + | . | | pointer 1 | + |--------------------| |- - - - - - - | + | pointer for lwb+1 | | index 1 | + |--------------------| |--------------------| + | pointer for lwb | | pointer 0 | + |--------------------| |- - - - - - - | + | upper - lower | | index 0 | + |--------------------| |--------------------| + | lower bound | | number of entries | + |--------------------| |--------------------| + | default pointer | | default pointer | low address + |--------------------| |--------------------| + + CSA descriptor CSB descriptor +.Df +Figure 4. Descriptor layout for CSA and CSB +.De diff --git a/doc/em/dispat1.sed b/doc/em/dispat1.sed new file mode 100644 index 0000000..c459211 --- /dev/null +++ b/doc/em/dispat1.sed @@ -0,0 +1,6 @@ +1c\ +.TS\ +r l l l l l l l l. +s/-/\\-/g +/DISPATCH2/,$c\ +.TE diff --git a/doc/em/dispat2.sed b/doc/em/dispat2.sed new file mode 100644 index 0000000..8955df5 --- /dev/null +++ b/doc/em/dispat2.sed @@ -0,0 +1,6 @@ +1,/DISPATCH2/c\ +.TS\ +r l l l l l l l l. +s/-/\\-/g +/DISPATCH3/,$c\ +.TE diff --git a/doc/em/dispat3.sed b/doc/em/dispat3.sed new file mode 100644 index 0000000..881881e --- /dev/null +++ b/doc/em/dispat3.sed @@ -0,0 +1,6 @@ +1,/DISPATCH3/c\ +.TS\ +r l l l l l l l l. +s/-/\\-/g +$a\ +.TE diff --git a/doc/em/dspace.nr b/doc/em/dspace.nr new file mode 100644 index 0000000..810520d --- /dev/null +++ b/doc/em/dspace.nr @@ -0,0 +1,376 @@ +.bp +.P1 "DATA ADDRESS SPACE" +.PP +The data address space is divided into three parts, called 'areas', +each with its own addressing method: +global data area, +local data area (including the stack), +and heap data area. +These data areas must be part of the same +address space because all data is accessed by +the same type of pointers. +.PP +Space for global data is reserved using several pseudoinstructions in the +assembly language, as described in +the next paragraph and chapter 11. +The size of the global data area is fixed per program. +.QQ +Global data is addressed absolutely in the machine language. +Many instructions are available to address global data. +They all have an absolute address as argument. +Examples are LOE, LAE and STE. +.PP +Part of the global data area is initialized by the +compiler, the +rest is not initialized at all or is initialized +with a value, typically \-32768 or 0. +Part of the initialized global data may be made read-only +if the implementation supports protection. +.PP +The local data area is used as a stack, +which grows from high to low addresses +and contains some data for each active procedure +invocation, called a 'frame'. +The size of the local data area varies dynamically during +execution. +Below the current procedure frame resides the operand stack. +The stack pointer SP always points to the bottom of +the local data area. +Local data is addressed by offsetting from the local base pointer LB. +LB always points to the frame of the current procedure. +Only the words of the current frame and the parameters +can be addressed directly. +Variables in other active procedures are addressed by following +the chain of statically enclosing procedures using the LXL or LXA instruction. +The variables in dynamically enclosing procedures can be +addressed with the use of the DCH instruction. +.QQ +Many instructions have offsets to LB as argument, +for instance LOL, LAL and STL. +The arguments of these instructions range from \-1 to some +(negative) minimum +for the access of local storage and from 0 to some (positive) +maximum for parameter access. +.PP +The procedure call instructions CAL and CAI each create a new frame +on the stack. +Each procedure has an assembly-time parameter specifying +the number of bytes needed for local storage. +This storage is allocated each time the procedure is called and +must be a multiple of the wordsize. +Each procedure, therefore, starts with a stack with the local variables +already allocated. +The return instructions RET and RTT remove a frame. +The actual parameters must be removed by the calling procedure. +.PP +RET may copy some words from the stack of +the returning procedure to an unnamed 'function return area'. +This area is available for 'READ-ONCE' access using the LFR instruction. +The result of a LFR is only defined if the size used to fetch +is identical to the size used in the last return. +The instruction ASP, used to remove the parameters from the +stack, the branch instruction BRA and the non-local goto +instruction GTO are the only ones that leave the contents of +the 'function return area' intact. +All other instructions are allowed to destroy the function +return area. +Thus parameters can be popped before fetching the function result. +The maximum size of all function return areas is +implementation dependent, +but should allow procedure instance identifiers and all +implemented objects of type integer, unsigned, float +and pointer to be returned. +In most implementations +the maximum size of the function return +area is twice the pointer size, +because we want to be able to handle 'procedure instance +identifiers' which consist of a procedure identifier and the LB +of a frame belonging to that procedure. +.PP +The heap data area grows upwards, to higher numbered +addresses. +It is initially empty. +The initial value of the heap pointer HP +marks the low end. +The heap pointer may be manipulated +by the LOR and STR instructions. +The heap can only be addressed indirectly, +by pointers derived from previous values of HP. +.P2 "Global data area" +.PP +The initial size of the global data area is determined at assembly time. +Global data is allocated by several +pseudoinstructions in the EM assembly +language. +Each pseudoinstruction allocates one or more bytes. +The bytes allocated for a single pseudo form +a 'block'. +A block differs from a fragment, because, +under certain conditions, several blocks are allocated +in a single fragment. +This guarantees that the bytes of these blocks +are consecutive. +.PP +Global data is addressed absolutely in binary +machine language. +Most compilers, however, +cannot assign absolute addresses to their global variables, +especially not if the language +allows programs to be composed of several separately compiled modules. +The assembly language therefore allows the compiler to name +the first address of a global data block with an alphanumeric label. +Moreover, the only way to address such a named global data block +in the assembly language is by using its name. +It is the task of the assembler/loader to +translate these labels into absolute addresses. +These labels may also be used +in CON and ROM pseudoinstructions to initialize pointers. +.PP +The pseudoinstruction CON allocates initialized data. +ROM acts like CON but indicates that the initialized data will +not change during execution of the program. +The pseudoinstruction BSS allocates a block of uninitialized +or identically initialized +data. +The pseudoinstruction HOL is similar to BSS, +but it alters the meaning of subsequent absolute addressing in +the assembly language. +.PP +Another type of global data is a small block, +called the ABS block, with an implementation defined size. +Storage in this type of block can only be addressed +absolutely in assembly language. +The first word has address 0 and is used to maintain the +source line number. +Special instructions LIN and LNI are provided to +update this counter. +A pointer at location 4 points to a string containing the +current source file name. +The instruction FIL can be used to update the pointer. +.PP +All numeric arguments of the instructions that address +the global data area refer to locations in the +ABS block unless +they are preceded by at least one HOL pseudo in the same +module, +in which case they refer to the storage area allocated by the +last HOL pseudoinstruction. +Thus LOE 0 loads the zeroth word of the most recent HOL, unless no HOL has +appeared in the current file so +far, in which case it loads the zeroth word of the +ABS fragment. +.PP +The global data area is highly fragmented. +The ABS block and each HOL and BSS block are separate fragments. +The way fragments are formed from CON and ROM blocks is more complex. +The assemblers group several blocks into a single fragment. +A fragment only contains blocks of the same type: CON or ROM. +It is guaranteed that the bytes allocated for two consecutive CON pseudos are +allocated consecutively in a single fragment, unless +these CON pseudos are separated in the assembly language program +by a data label definition or one or more of the following pseudos: +.DS +ROM, BSS, HOL and END +.DE +An analogous rule holds for ROM pseudos. +.P2 "Local data area" +.PP +The local data area consists of a sequence of frames, one for +each active procedure. +Below the frame of the current procedure resides the +expression stack. +Frames are generated by procedure calls and are +removed by procedure returns. +A procedure frame consists of six 'zones': +.DS +1. The return status block +2. The local variables and compiler temporaries +3. The register save block +4. The dynamic local generators +5. The operand stack. +6. The parameters of a procedure one level deeper +.DE +A sample frame is shown in Figure 1. +.PP +Before a procedure call is performed the actual +parameters are pushed onto the stack of the calling procedure. +The exact details are compiler dependent. +EM allows procedures to be called with a variable number of +parameters. +The implementation of the C-language almost forces its runtime +system to push the parameters in reverse order, that is, +the first positional parameter last. +Most compilers use the C calling convention to be compatible. +The parameters of a procedure belong to the frame of the +calling procedure. +Note that the evaluation of the actual parameters may imply +the calling of procedures. +The parameters can be accessed with certain instructions using +offsets of 0 and greater. +The first byte of the last parameter pushed has offset 0. +Note that the parameter at offset 0 has a special use in the +instructions following the static chain (LXL and LXA). +These instructions assume that this parameter contains the LB of +the statically enclosing procedure. +Procedures that do not have a dynamically enclosing procedure +do not need a static link at offset 0. +.PP +Two instructions are available to perform procedure calls, CAL +and CAI. +Several tasks are performed by these call instructions. +.QQ +First, a part of the status of the calling procedure is +saved on the stack in the return status block. +This block should contain the return address of the calling +procedure, its LB and other implementation dependent data. +The size of this block is fixed for any given implementation +because the lexical instructions LPB, LXL and LXA must be able to +obtain the base addresses of the procedure parameters \fBand\fP local +variables. +An alternative solution can be used on machines with a highly +segmented address space. +The stack frames need not be contiguous then and the first +status save area can contain the parameter base AB, +which has the value of SP just after the last parameter has +been pushed. +.QQ +Second, the LB is changed to point to the +first word above the local variables. +The new LB is a copy of the SP after the return status +block has been pushed. +.QQ +Third, the amount of local storage needed by the procedure is +reserved. +The parameters and local storage are accessed by the same instructions. +Negative offsets are used for access to local variables. +The highest byte, that is the byte nearest +to LB, has to be accessed with offset \-1. +The pseudoinstruction specifying the entry point of a +procedure, has an argument that specifies the amount of local +storage needed. +The local variables allocated by the CAI or CAL instructions +are the only ones that can be accessed with a fixed negative offset. +The initial value of the allocated words is +not defined, but implementations that check for undefined +values will probably initialize them with a +special 'undefined' pattern, typically \-32768. +.QQ +Fourth, any EM implementation is allowed to reserve a variable size +block beneath the local variables. +This block could, for example, be used to save a variable number +of registers. +.QQ +Finally, the address of the entry point of the called procedure +is loaded into the Program Counter. +.PP +The ASP instruction can be used to allocate further (dynamic) +local storage. +The base address of such storage must be obtained with a LOR~SP +instruction. +This same instruction ASP may also be used +to remove some words from the stack. +.PP +There is a version of ASP, called ASS, which fetches the number +of bytes to allocate from the stack. +It can be used to allocate space for local +objects whose size is unknown at compile time, +so called 'dynamic local generators'. +.PP +Control is returned to the calling procedure with a RET instruction. +Any return value is then copied to the 'function return area'. +The frame created by the call is deallocated and the status of +the calling procedure is restored. +The value of SP just after the return value has been popped must +be the same as the +value of SP just before executing the first instruction of this +invocation. +This means that when a RET is executed the operand stack can +only contain the return value and all dynamically generated locals must be +deallocated. +Violating this restriction might result in hard to detect +errors. +The calling procedure has to remove the parameters from the stack. +This can be done with the aforementioned ASP instruction. +.PP +Each procedure frame is a separate fragment. +Because any fragment may be placed anywhere in memory, +procedure frames need not be contiguous. +.Dr 47 + |===============================| + | actual parameter n-1 | + |-------------------------------| + | . | + | . | + | . | + |-------------------------------| + | actual parameter 0 | ( <\- AB ) + |===============================| + + + |===============================| + |///////////////////////////////| + |///// return status block /////| + |///////////////////////////////| <\- LB + |===============================| + | | + | local variables | + | | + |-------------------------------| + | | + | compiler temporaries | + | | + |===============================| + |///////////////////////////////| + |///// register save block /////| + |///////////////////////////////| + |===============================| + | | + | dynamic local generators | + | | + |===============================| + | operand | + |-------------------------------| + | operand | + |===============================| + | parameter m-1 | + |-------------------------------| + | . | + | . | + | . | + |-------------------------------| + | parameter 0 | <\- SP + |===============================| +.Df +Figure 1. A sample procedure frame and parameters. +.De +.P2 "Heap data area" +.PP +The heap area starts empty, with HP +pointing to the low end of it. +HP always contains a word address. +A copy of HP can always be obtained with the LOR instruction. +A new value may be stored in the heap pointer using the STR instruction. +If the new value is greater than the old one, +then the heap grows. +If it is smaller, then the heap shrinks. +HP may never point below its original value. +All words between the current HP and the original HP +are allocated to the heap. +The heap may not grow into a part of memory that is already allocated. +When this is attempted, the STR instruction will cause a trap to occur. +In this case, HP retains its old value. +.PP +The only way to address the heap is indirectly. +Whenever an object is allocated by increasing HP, +then the old HP value must be saved and can be used later to address +the allocated object. +If, in the meantime, HP is decreased so that the object +is no longer part of the heap, then an attempt to access +the object is not allowed. +Furthermore, if the heap pointer is increased again to above +the object address, then access to the old object gives undefined results. +.PP +The heap is a single fragment. +All bytes have consecutive addresses. +No limits are imposed on the size of the heap as long as it fits +in the available data address space. diff --git a/doc/em/em.i b/doc/em/em.i new file mode 100644 index 0000000..20b05fe --- /dev/null +++ b/doc/em/em.i @@ -0,0 +1,1678 @@ +.bp +.AP "EM INTERPRETER" +.nf +.ft CW +.lg 0 +.nr x \w' ' +.ta \nxu +\nxu +\nxu +\nxu +\nxu +\nxu +\nxu +\nxu +\nxu +\nxu + +{ This is an interpreter for EM. It serves as the official machine + definition. This interpreter must run on a machine which supports + arithmetic with words and memory offsets. + + Certain aspects of the definition are over specified. In particular: + + 1. The representation of an address on the stack need not be the + numerical value of the memory location. + + 2. The state of the stack is not defined after a trap has aborted + an instruction in the middle. For example, it is officially un- + defined whether the second operand of an ADD instruction has + been popped or not if the first one is undefined ( -32768 or + unsigned 32768). + + 3. The memory layout is implementation dependent. Only the most + basic checks are performed whenever memory is accessed. + + 4. The representation of an integer or set on the stack is not fixed + in bit order. + + 5. The format and existence of the procedure descriptors depends on + the implementation. + + 6. The result of the compare operators CMI etc. are -1, 0 and 1 + here, but other negative and positive values will do and they + need not be the same each time. + + 7. The shift count for SHL, SHR, ROL and ROR must be in the range 0 + to object size in bits - 1. The effect of a count not in this + range is undefined. +} +.bp +{$i256} {$d+} +program em(tables,prog,input,output); + +label 8888,9999; + +const + t15 = 32768; { 2**15 } + t15m1 = 32767; { 2**15 -1 } + t16 = 65536; { 2**16 } + t16m1 = 65535; { 2**16 -1 } + t31m1 = 2147483647; { 2**31 -1 } + + wsize = 2; { number of bytes in a word } + asize = 2; { number of bytes in an address } + fsize = 4; { number of bytes in a floating point number } + maxret =4; { number of words in the return value area } + + signbit = t15; { the power of two indicating the sign bit } + negoff = t16; { the next power of two } + maxsint = t15m1; { the maximum signed integer } + maxuint = t16m1; { the maximum unsigned integer } + maxdbl = t31m1; { the maximum double signed integer } + maxadr = t16m1; { the maximum address } + maxoffs = t15m1; { the maximum offset from an address } + maxbitnr= 15; { the number of the highest bit } + + lineadr = 0; { address of the line number } + fileadr = 4; { address of the file name } + maxcode = 8191; { highest byte in code address space } + maxdata = 8191; { highest byte in data address space } + + { format of status save area } + statd = 4; { how far is static link from lb } + dynd = 2; { how far is dynamic link from lb } + reta = 0; { how far is the return address from lb } + savsize = 4; { size of save area in bytes } + + { procedure descriptor format } + pdlocs = 0; { offset for size of local variables in bytes } + pdbase = asize; { offset for the procedure base } + pdsize = 4; { size of procedure descriptor in bytes = 2*asize } + + { header words } + NTEXT = 1; + NDATA = 2; + NPROC = 3; + ENTRY = 4; + NLINE = 5; + SZDATA = 6; + + escape1 = 254; { escape to secondary opcodes } + escape2 = 255; { escape to tertiary opcodes } + undef = signbit; { the range of integers is -32767 to +32767 } + + { error codes } + EARRAY = 0; ERANGE = 1; ESET = 2; EIOVFL = 3; EFOVFL = 4; + EFUNFL = 5; EIDIVZ = 6; EFDIVZ = 7; EIUND = 8; EFUND = 9; + ECONV = 10; ESTACK = 16; EHEAP = 17; EILLINS = 18; EODDZ = 19; + ECASE = 20; EMEMFLT = 21; EBADPTR = 22; EBADPC = 23; EBADLAE = 24; + EBADMON = 25; EBADLIN = 26; EBADGTO = 27; +.ne 20 +.bp +{---------------------------------------------------------------------------} +{ Declarations } +{---------------------------------------------------------------------------} + +type + bitval= 0..1; { one bit } + bitnr= 0..maxbitnr; { bits in machine words are numbered 0 to 15 } + byte= 0..255; { memory is an array of bytes } + adr= {0..maxadr} long; { the range of addresses } + word= {0..maxuint} long;{ the range of unsigned integers } + offs= -maxoffs..maxoffs; { the range of signed offsets from addresses } + size= 0..maxoffs; { the range of sizes is the positive offsets } + sword= {-signbit..maxsint} long; { the range of signed integers } + full= {-maxuint..maxuint} long; { intermediate results need this range } + double={-maxdbl..maxdbl} long; { double precision range } + bftype= (andf,iorf,xorf); { tells which boolean operator needed } + insclass=(prim,second,tert); { tells which opcode table is in use } + instype=(implic,explic); { does opcode have implicit or explicit operand } + iflags= (mini,short,sbit,wbit,zbit,ibit); + ifset= set of iflags; + + mnem = ( NON, + AAR, ADF, ADI, ADP, ADS, ADU,XAND, ASP, ASS, BEQ, + BGE, BGT, BLE, BLM, BLS, BLT, BNE, BRA, CAI, CAL, + CFF, CFI, CFU, CIF, CII, CIU, CMF, CMI, CMP, CMS, + CMU, COM, CSA, CSB, CUF, CUI, CUU, DCH, DEC, DEE, + DEL, DUP, DUS, DVF, DVI, DVU, EXG, FEF, FIF, FIL, + GTO, INC, INE, INL, INN, IOR, LAE, LAL, LAR, LDC, + LDE, LDF, LDL, LFR, LIL, LIM, LIN, LNI, LOC, LOE, + LOF, LOI, LOL, LOR, LOS, LPB, LPI, LXA, LXL, MLF, + MLI, MLU, MON, NGF, NGI, NOP, RCK, RET, RMI, RMU, + ROL, ROR, RTT, SAR, SBF, SBI, SBS, SBU, SDE, SDF, + SDL,XSET, SIG, SIL, SIM, SLI, SLU, SRI, SRU, STE, + STF, STI, STL, STR, STS, TEQ, TGE, TGT, TLE, TLT, + TNE, TRP, XOR, ZEQ, ZER, ZGE, ZGT, ZLE, ZLT, ZNE, + ZRE, ZRF, ZRL); + + dispatch = record + iflag: ifset; + instr: mnem; + case instype of + implic: (implicit:sword); + explic: (ilength:byte); + end; + + +var + code: packed array[0..maxcode] of byte; { code space } + data: packed array[0..maxdata] of byte; { data space } + retarea: array[1..maxret ] of word; { return area } + pc,lb,sp,hp,pd: adr; { internal machine registers } + i: integer; { integer scratch variable } + s,t :word; { scratch variables } + sz:size; { scratch variables } + ss,st: sword; { scratch variables } + k :double; { scratch variables } + j:size; { scratch variable used as index } + a,b:adr; { scratch variable used for addresses } + dt,ds:double; { scratch variables for double precision } + rt,rs,x,y:real; { scratch variables for real } + found:boolean; { scratch } + opcode: byte; { holds the opcode during execution } + iclass: insclass; { true for escaped opcodes } + dispat: array[insclass,byte] of dispatch; + retsize:size; { holds size of last LFR } + insr: mnem; { holds the instruction number } + halted: boolean; { normally false } + exitstatus:word; { parameter of MON 1 } + ignmask:word; { ignore mask for traps } + uerrorproc:adr; { number of user defined error procedure } + intrap:boolean; { Set when executing trap(), to catch recursive calls} + trapval:byte; { Set to number of last trap } + header: array[1..8] of adr; + + tables: text; { description of EM instructions } + prog: file of byte; { program and initialized data } +.ne 20 +.sp 2 +{---------------------------------------------------------------------------} +{ Various check routines } +{---------------------------------------------------------------------------} + +{ Only the most basic checks are performed. These routines are inherently + implementation dependent. } + +procedure trap(n:byte); forward; + +procedure memadr(a:adr); +begin if (a>maxdata) or ((a=hp)) then trap(EMEMFLT) end; + +procedure wordadr(a:adr); +begin memadr(a); if (a mod wsize<>0) then trap(EBADPTR) end; + +procedure chkadr(a:adr; s:size); +begin memadr(a); memadr(a+s-1); { assumption: size is ok } + if s0 then trap(EBADPTR) end + else if a mod wsize<>0 then trap(EBADPTR) +end; + +procedure newpc(a:double); +begin if (a<0) or (a>maxcode) then trap(EBADPC); pc:=a end; + +procedure newsp(a:adr); +begin if (a>lb) or (a0) then trap(ESTACK); sp:=a end; + +procedure newlb(a:adr); +begin if (a0) then trap(ESTACK); lb:=a end; + +procedure newhp(a:adr); +begin if (a>sp) or (a>maxdata+1) or (a mod wsize<>0) + then trap(EHEAP) + else hp:=a +end; + +function argc(a:double):sword; +begin if (a<-signbit) or (a>maxsint) then trap(EILLINS); argc:=a end; + +function argd(a:double):double; +begin if (a<-maxdbl) or (a>maxdbl) then trap(EILLINS); argd:=a end; + +function argl(a:double):offs; +begin if (a<-maxoffs) or (a>maxoffs) then trap(EILLINS); argl:=a end; + +function argg(k:double):adr; +begin if (k<0) or (k>maxadr) then trap(EILLINS); argg:=k end; + +function argf(a:double):offs; +begin if (a<-maxoffs) or (a>maxoffs) then trap(EILLINS); argf:=a end; + +function argn(a:double):word; +begin if (a<0) or (a>maxuint) then trap(EILLINS); argn:=a end; + +function args(a:double):size; +begin if (a<=0) or (a>maxoffs) + then trap(EODDZ) + else if (a mod wsize)<>0 then trap(EODDZ); + args:=a ; +end; + +function argz(a:double):size; +begin if (a<0) or (a>maxoffs) + then trap(EODDZ) + else if (a mod wsize)<>0 then trap(EODDZ); + argz:=a ; +end; + +function argo(a:double):size; +begin if (a<=0) or (a>maxoffs) + then trap(EODDZ) + else if (a mod wsize<>0) and (wsize mod a<>0) then trap(EODDZ); + argo:=a ; +end; + +function argw(a:double):size; +begin if (a<=0) or (a>maxoffs) or (a>maxuint) + then trap(EODDZ) + else if (a mod wsize)<>0 then trap(EODDZ); + argw:=a ; +end; + +function argp(a:double):size; +begin if (a<0) or (a>=header[NPROC]) then trap(EILLINS); argp:=a end; + +function argr(a:double):word; +begin if (a<0) or (a>2) then trap(EILLINS); argr:=a end; + +procedure argwf(s:double); +begin if argw(s)<>fsize then trap(EILLINS) end; + +function szindex(s:double):integer; +begin s:=argw(s); if (s mod wsize <> 0) or (s>2*wsize) then trap(EILLINS); + szindex:=s div wsize +end; + +function locadr(l:double):adr; +begin l:=argl(l); if l<0 then locadr:=lb+l else locadr:=lb+l+savsize end; + +function signwd(w:word):sword; +begin if w = undef then trap(EIUND); + if w >= signbit then signwd:=w-negoff else signwd:=w +end; + +function dosign(w:word):sword; +begin if w >= signbit then dosign:=w-negoff else dosign:=w end; + +function unsign(w:sword):word; +begin if w<0 then unsign:=w+negoff else unsign:=w end; + +function chopw(dw:double):word; +begin chopw:=dw mod negoff end; + +function fitsw(w:full;trapno:byte):word; +{ checks whether value fits in signed word, returns unsigned representation} +begin + if (w>maxsint) or (w<-signbit) then + begin trap(trapno); + if w<0 then fitsw:=negoff- (-w)mod negoff + else fitsw:=w mod negoff; + end + else fitsw:=unsign(w) +end; + +function fitd(w:full):double; +begin + if abs(w) > maxdbl then trap(ECONV); + fitd:=w +end; +.ne 20 +.sp 2 +{---------------------------------------------------------------------------} +{ Memory access routines } +{---------------------------------------------------------------------------} + +{ memw returns a machine word as an unsigned integer + memb returns a single byte as a positive integer: 0 <= memb <= 255 + mems(a,s) fetches an object smaller than a word and returns a word + store(a,v) stores the word v at machine address a + storea(a,v) stores the address v at machine address a + storeb(a,b) stores the byte b at machine address a + stores(a,s,v) stores the s least significant bytes of a word at address a + memi returns an offset from the instruction space + Note that the procedure descriptors are part of instruction space. + nextpc returns the next byte addressed by pc, incrementing pc + + lino changes the line number word. + filna changes the pointer to the file name. + + All routines check to make sure the address is within range and valid for + the size of the object. If an addressing error is found, a trap occurs. +} + + +function memw(a:adr):word; +var b:word; i:integer; +begin wordadr(a); b:=0; + for i:=wsize-1 downto 0 do b:=256*b + data[a+i] ; + memw:=b +end; + +function memd(a:adr):double; { Always signed } +var b:double; i:integer; +begin wordadr(a); b:=data[a+2*wsize-1]; + if b>=128 then b:=b-256; + for i:=2*wsize-2 downto 0 do b:=256*b + data[a+i] ; + memd:=b +end; + +function mema(a:adr):adr; +var b:adr; i:integer; +begin wordadr(a); b:=0; + for i:=asize-1 downto 0 do b:=256*b + data[a+i] ; + mema:=b +end; + +function mems(a:adr;s:size):word; +var i:integer; b:word; +begin chkadr(a,s); b:=0; for i:=1 to s do b:=b*256+data[a+s-i]; mems:=b end; + +function memb(a:adr):byte; +begin memadr(a); memb:=data[a] end; + +procedure store(a:adr; x:word); +var i:integer; +begin wordadr(a); + for i:=0 to wsize-1 do + begin data[a+i]:=x mod 256; x:=x div 256 end +end; + +procedure storea(a:adr; x:adr); +var i:integer; +begin wordadr(a); + for i:=0 to asize-1 do + begin data[a+i]:=x mod 256; x:=x div 256 end +end; + +procedure stores(a:adr;s:size;v:word); +var i:integer; +begin chkadr(a,s); + for i:=0 to s-1 do begin data[a+i]:=v mod 256; v:=v div 256 end; +end; + +procedure storeb(a:adr; b:byte); +begin memadr(a); data[a]:=b end; + +function memi(a:adr):adr; +var b:adr; i:integer; +begin if (a mod wsize<>0) or (a+asize-1>maxcode) then trap(EBADPTR); b:=0; + for i:=asize-1 downto 0 do b:=256*b + code[a+i] ; + memi:=b +end; + +function nextpc:byte; +begin if pc>=pd then trap(EBADPC); nextpc:=code[pc]; newpc(pc+1) end; + +procedure lino(w:word); +begin store(lineadr,w) end; + +procedure filna(a:adr); +begin storea(fileadr,a) end; +.ne 20 +.sp 2 +{---------------------------------------------------------------------------} +{ Stack Manipulation Routines } +{---------------------------------------------------------------------------} + +{ push puts a word on the stack + pushsw takes a signed one word integer and pushes it on the stack + pop removes a machine word from the stack and delivers it as a word + popsw removes a machine word from the stack and delivers a signed integer + pusha pushes an address on the stack + popa removes a machine word from the stack and delivers it as an address + pushd pushes a double precision number on the stack + popd removes two machine words and returns a double precision integer + pushr pushes a float (floating point) number on the stack + popr removes several machine words and returns a float number + pushx puts an object of arbitrary size on the stack + popx removes an object of arbitrary size + } + +procedure push(x:word); +begin newsp(sp-wsize); store(sp,x) end; + +procedure pushsw(x:sword); +begin newsp(sp-wsize); store(sp,unsign(x)) end; + +function pop:word; +begin pop:=memw(sp); newsp(sp+wsize) end; + +function popsw:sword; +begin popsw:=signwd(pop) end; + +procedure pusha(x:adr); +begin newsp(sp-asize); storea(sp,x) end; + +function popa:adr; +begin popa:=mema(sp); newsp(sp+asize) end; + +procedure pushd(y:double); +begin { push double integer onto the stack } newsp(sp-2*wsize) end; + +function popd:double; +begin { pop double integer from the stack } newsp(sp+2*wsize); popd:=0 end; + +procedure pushr(z:real); +begin { Push a float onto the stack } newsp(sp-fsize) end; + +function popr:real; +begin { pop float from the stack } newsp(sp+fsize); popr:=0.0 end; + +procedure pushx(objsize:size; a:adr); +var i:integer; +begin + if objsize= 0 then w := w div 2 else w := (w-1) div 2 end; + +procedure suright(var w:word); { 1 bit right shift without sign extension } +begin w := w div 2 end; + +procedure sdright(var d:double); { 1 bit right shift } +begin { shift two word signed integer } end; + +procedure rleft(var w:word); { 1 bit left rotate } +begin if w >= t15 + then w:=(w-t15)*2 + 1 + else w:=w*2 +end; + +procedure rright(var w:word); { 1 bit right rotate } +begin if w mod 2 = 1 + then w:=w div 2 + t15 + else w:=w div 2 +end; + +function sextend(w:word;s:size):word; +var i:size; +begin + for i:=1 to (wsize-s)*8 do rleft(w); + for i:=1 to (wsize-s)*8 do sright(w); + sextend:=w; +end; + +function bit(b:bitnr; w:word):bitval; { return bit b of the word w } +var i:bitnr; +begin for i:= 1 to b do rright(w); bit:= w mod 2 end; + +function bf(ty:bftype; w1,w2:word):word; { return boolean fcn of 2 words } +var i:bitnr; j:word; +begin j:=0; + for i:= maxbitnr downto 0 do + begin j := 2*j; + case ty of + andf: if bit(i,w1)+bit(i,w2) = 2 then j:=j+1; + iorf: if bit(i,w1)+bit(i,w2) > 0 then j:=j+1; + xorf: if bit(i,w1)+bit(i,w2) = 1 then j:=j+1 + end + end; + bf:=j +end; + +{---------------------------------------------------------------------------} +{ Array indexing } +{---------------------------------------------------------------------------} + +function arraycalc(c:adr):adr; { subscript calculation } +var j:full; objsize:size; a:adr; +begin j:= popsw - signwd(memw(c)); + if (j<0) or (j>memw(c+wsize)) then trap(EARRAY); + objsize := argo(memw(c+wsize+wsize)); + a := j*objsize+popa; chkadr(a,objsize); + arraycalc:=a +end; +.ne 20 +.sp 2 +{---------------------------------------------------------------------------} +{ Double and Real Arithmetic } +{---------------------------------------------------------------------------} + +{ All routines for doubles and floats are dummy routines, since the format of + doubles and floats is not defined in EM. +} + +function doadi(ds,dt:double):double; +begin { add two doubles } doadi:=0 end; + +function dosbi(ds,dt:double):double; +begin { subtract two doubles } dosbi:=0 end; + +function domli(ds,dt:double):double; +begin { multiply two doubles } domli:=0 end; + +function dodvi(ds,dt:double):double; +begin { divide two doubles } dodvi:=0 end; + +function dormi(ds,dt:double):double; +begin { modulo of two doubles } dormi:=0 end; + +function dongi(ds:double):double; +begin { negative of a double } dongi:=0 end; + +function doadf(x,y:real):real; +begin { add two floats } doadf:=0.0 end; + +function dosbf(x,y:real):real; +begin { subtract two floats } dosbf:=0.0 end; + +function domlf(x,y:real):real; +begin { multiply two floats } domlf:=0.0 end; + +function dodvf(x,y:real):real; +begin { divide two floats } dodvf:=0.0 end; + +function dongf(x:real):real; +begin { negate a float } dongf:=0.0 end; + +procedure dofif(x,y:real;var intpart,fraction:real); +begin { dismember x*y into integer and fractional parts } + intpart:=0.0; { integer part of x*y, same sign as x*y } + fraction:=0.0; + { fractional part of x*y, 0<=abs(fraction)<1 and same sign as x*y } +end; + +procedure dofef(x:real;var mantissa:real;var exponent:sword); +begin { dismember x into mantissa and exponent parts } + mantissa:=0.0; { mantissa of x , >= 1/2 and <1 } + exponent:=0; { base 2 exponent of x } +end; +.bp +{---------------------------------------------------------------------------} +{ Trap and Call } +{---------------------------------------------------------------------------} + +procedure call(p:adr); { Perform the call } +begin + pusha(lb);pusha(pc); + newlb(sp);newsp(sp - memi(pd + pdsize*p + pdlocs)); + newpc(memi(pd + pdsize*p+ pdbase)) +end; + +procedure dotrap(n:byte); +var i:size; +begin + if (uerrorproc=0) or intrap then + begin + if intrap then + writeln('Recursive trap, first trap number was ', trapval:1); + writeln('Error ', n:1); + writeln('With',ord(insr):4,' arg ',k:1); + goto 9999 + end; + { Deposit all interpreter variables that need to be saved on + the stack. This includes all scratch variables that can + be in use at the moment and ( not possible in this interpreter ) + the internal address of the interpreter where the error occurred. + This would make it possible to execute an RTT instruction totally + transparent to the user program. + It can, for example, occur within an ADD instruction that both + operands are undefined and that the result overflows. + Although this will generate 3 error traps it must be possible + to ignore them all. +} + intrap:=true; trapval:=n; + for i:=retsize div wsize downto 1 do push(retarea[i]); + push(retsize); { saved return area } + pusha(mema(fileadr)); { saved current file name pointer } + push(memw(lineadr)); { saved line number } + push(n); { push error number } + a:=argp(uerrorproc); + uerrorproc:=0; { reset signal } + call(a); { call the routine } + intrap:=false; { Don't catch recursive traps anymore } + goto 8888; { reenter main loop } +end; + +procedure trap; +{ This routine is invoked for overflow, and other run time errors. + For non-fatal errors, trap returns to the calling routine +} +begin + if n>=16 then dotrap(n) else if bit(n,ignmask)=0 then dotrap(n); +end; + +procedure dortt; +{ The restoration of file address and line number is not essential. + The restoration of the return save area is. +} +var i:size; + n:word; +begin + newsp(lb); lb:=maxdata+1 ; { to circumvent ESTACK for the popa + pop } + newpc(popa); newlb(popa); { So far a plain RET 0 } + n:=pop; if (n>=16) and (n<64) then goto 9999 ; + lino(pop); filna(popa); retsize:=pop; + for i:=1 to retsize div wsize do retarea[i]:=pop ; +end; +.sp 2 +{---------------------------------------------------------------------------} +{ monitor calls } +{---------------------------------------------------------------------------} + + +procedure domon(entry:word); +var index: 1..63; + dummy: double; + count,rwptr: adr; + token: byte; + i: integer; +begin + if (entry<=0) or (entry>63) then entry:=63 ; + index:=entry; + case index of + 1: begin { exit } exitstatus:=pop; halted:=true end; + 3: begin { read } dummy:=pop; { All input is from stdin } + rwptr:=popa; count:=popa; + i:=0 ; + while (not eof(input)) and (i0 then + begin i:=20; found:=false; + while (i<>0) and not found do + begin c:=memb(a); a:=a+1; found:=true; i:=i-1; + if (c>=48) and (c<=57) then + begin found:=false; write(chr(ord('0')+c-48)) end; + if (c>=65) and (c<=90) then + begin found:=false; write(chr(ord('A')+c-65)) end; + if (c>=97) and (c<=122) then + begin found:=false; write(chr(ord('a')+c-97)) end; + end; + end; + writeln; +end; + +procedure initialize; { start the ball rolling } +{ This is not part of the machine definition } +var cset:set of char; + f:ifset; + iclass:insclass; + insno:byte; + nops:integer; + opcode:byte; + i,j,n:integer; + wtemp:sword; + count:integer; + repc:adr; + nexta,firsta:adr; + elem:byte; + amount,ofst:size; + c:char; + + function readb(n:integer):double; + var b:byte; + begin read(prog,b); if n>1 then readb:=readb(n-1)*256+b else readb:=b end; + + function readbyte:byte; + begin readbyte:=readb(1) end; + + function readword:word; + begin readword:=readb(wsize) end; + + function readadr:adr; + begin readadr:=readb(asize) end; + + function ifind(ordinal:byte):mnem; + var loopvar:mnem; + found:boolean; + begin ifind:=NON; + loopvar:=insr; found:=false; + repeat + if ordinal=ord(loopvar) then + begin found:=true; ifind:=loopvar end; + if loopvar<>ZRL then loopvar:=succ(loopvar) else loopvar:=NON; + until found or (loopvar=insr) ; + end; + + procedure readhdr; + type hdrw=0..32767 ; { 16 bit header words } + var hdr: hdrw; + i: integer; + begin + for i:=0 to 7 do + begin hdr:=readb(2); + case i of + 0: if hdr<>3757 then { 07255 } + begin writeln('Not an em load file'); halt end; + 2: if hdr<>0 then + begin writeln('Unsolved references'); halt end; + 3: if hdr<>3 then + begin writeln('Incorrect load file version'); halt end; + 4: if hdr<>wsize then + begin writeln('Incorrect word size'); halt end; + 5: if hdr<>asize then + begin writeln('Incorrect pointer size'); halt end; + 1,6,7:; + end + end + end; + + procedure noinit; + begin writeln('Illegal initialization'); halt end; + + procedure readint(a:adr;s:size); + var i:size; + begin { construct integer out of byte sequence } + for i:=1 to s do { construct the value and initialize at a } + begin storeb(a,readbyte); a:=a+1 end + end; + + procedure readuns(a:adr;s:size); + begin { construct unsigned out of byte sequence } + readint(a,s) { identical to readint } + end; + + procedure readfloat(a:adr;s:size); + var i:size; b:byte; + begin { construct float out of string} + if (s<>4) and (s<>8) then noinit; i:=0; + repeat { eat the bytes, construct the value and intialize at a } + b:=readbyte; i:=i+1; + until b=0 ; + end; + +begin + halted:=false; + exitstatus:=undef; + uerrorproc:=0; intrap:=false; + + { initialize tables } + for i:=0 to maxcode do code[i]:=0; + for i:=0 to maxdata do data[i]:=0; + for iclass:=prim to tert do + for i:=0 to 255 do + with dispat[iclass][i] do + begin instr:=NON; iflag:=[zbit] end; + + { read instruction table file. see appendix B } + { The table read here is a simple transformation of the table on page xx } + { - instruction names were transformed to numbers } + { - the '-' flag was transformed to an 'i' flag for 'w' type instructions } + { - the 'S' flag was added for instructions having signed operands } + reset(tables); + insr:=NON; + repeat + read(tables,insno) ; cset:=[]; f:=[]; + insr:=ifind(insno); + if insr=NON then begin writeln('Incorrect table'); halt end; + repeat read(tables,c) until c<>' ' ; + repeat + cset:=cset+[c]; + read(tables,c) + until c=' ' ; + if 'm' in cset then f:=f+[mini]; + if 's' in cset then f:=f+[short]; + if '-' in cset then f:=f+[zbit]; + if 'i' in cset then f:=f+[ibit]; + if 'S' in cset then f:=f+[sbit]; + if 'w' in cset then f:=f+[wbit]; + if (mini in f) or (short in f) then read(tables,nops) else nops:=1 ; + readln(tables,opcode); + if ('4' in cset) or ('8' in cset) then + begin iclass:=tert end + else if 'e' in cset then + begin iclass:=second end + else iclass:=prim; + for i:=0 to nops-1 do + begin + with dispat[iclass,opcode+i] do + begin + iflag:=f; instr:=insr; + if '2' in cset then ilength:=2 + else if 'u' in cset then ilength:=2 + else if '4' in cset then ilength:=4 + else if '8' in cset then ilength:=8 + else if (mini in f) or (short in f) then + begin + if 'N' in cset then wtemp:=-1-i else wtemp:=i ; + if 'o' in cset then wtemp:=wtemp+1 ; + if short in f then wtemp:=wtemp*256 ; + implicit:=wtemp + end + end + end + until eof(tables); + + { read in program text, data and procedure descriptors } + reset(prog); + readhdr; { verify first header } + for i:=1 to 8 do header[i]:=readadr; { read second header } + hp:=maxdata+1; sp:=maxdata+1; lino(0); + { read program text } + if header[NTEXT]+header[NPROC]*pdsize>maxcode then + begin writeln('Text size too large'); halt end; + if header[SZDATA]>maxdata then + begin writeln('Data size too large'); halt end; + for i:=0 to header[NTEXT]-1 do code[i]:=readbyte; + { read data blocks } + nexta:=0; + for i:=1 to header[NDATA] do + begin + n:=readbyte; + if n<>0 then + begin + elem:=readbyte; firsta:=nexta; + case n of + 1: { uninitialized words } + for j:=1 to elem do + begin store(nexta,undef); nexta:=nexta+wsize end; + 2: { initialized bytes } + for j:=1 to elem do + begin storeb(nexta,readbyte); nexta:=nexta+1 end; + 3: { initialized words } + for j:=1 to elem do + begin store(nexta,readword); nexta:=nexta+wsize end; + 4,5: { instruction and data pointers } + for j:=1 to elem do + begin storea(nexta,readadr); nexta:=nexta+asize end; + 6: { signed integers } + begin readint(nexta,elem); nexta:=nexta+elem end; + 7: { unsigned integers } + begin readuns(nexta,elem); nexta:=nexta+elem end; + 8: { floating point numbers } + begin readfloat(nexta,elem); nexta:=nexta+elem end; + end + end + else + begin + repc:=readadr; amount:=nexta-firsta; + for count:=1 to repc do + begin + for ofst:=0 to amount-1 do data[nexta+ofst]:=data[firsta+ofst]; + nexta:=nexta+amount; + end + end + end; + if header[SZDATA]<>nexta then writeln('Data initialization error'); + hp:=nexta; + { read descriptor table } + pd:=header[NTEXT]; + for i:=1 to header[NPROC]*pdsize do code[pd+i-1]:=readbyte; + { call the entry point routine } + ignmask:=0; { catch all traps, higher numbered traps cannot be ignored} + retsize:=0; + lb:=maxdata; { illegal dynamic link } + pc:=maxcode; { illegal return address } + push(0); a:=sp; { No environment } + push(0); b:=sp; { No args } + pusha(a); { envp } + pusha(b); { argv } + push(0); { argc } + call(argp(header[ENTRY])); +end; +.bp +{---------------------------------------------------------------------------} +{ MAIN LOOP OF THE INTERPRETER } +{---------------------------------------------------------------------------} +{ It should be noted that the interpreter (microprogram) for an EM + machine can be written in two fundamentally different ways: (1) the + instruction operands are fetched in the main loop, or (2) the in- + struction operands are fetched after the 256 way branch, by the exe- + cution routines themselves. In this interpreter, method (1) is used + to simplify the description of execution routines. The dispatch + table dispat is used to determine how the operand is encoded. There + are 4 possibilities: + + 0. There is no operand + 1. The operand and instruction are together in 1 byte (mini) + 2. The operand is one byte long and follows the opcode byte(s) + 3. The operand is two bytes long and follows the opcode byte(s) + 4. The operand is four bytes long and follows the opcode byte(s) + + In this interpreter, the main loop determines the operand type, + fetches it, and leaves it in the global variable k for the execution + routines to use. Consequently, instructions such as LOL, which use + three different formats, need only be described once in the body of + the interpreter. + However, for a production interpreter, or a hardware EM + machine, it is probably better to use method (2), i.e. to let the + execution routines themselves fetch their own operands. The reason + for this is that each opcode uniquely determines the operand format, + so no table lookup in the dispatch table is needed. The whole table + is not needed. Method (2) therefore executes much faster. + However, separate execution routines will be needed for LOL with + a one byte offset, and LOL with a two byte offset. It is to avoid + this additional clutter that method (1) is used here. In a produc- + tion interpreter, it is envisioned that the main loop will fetch the + next instruction byte, and use it as an index into a 256 word table + to find the address of the interpreter routine to jump to. The + routine jumped to will begin by fetching its operand, if any, + without any table lookup, since it knows which format to expect. + After doing the work, it returns to the main loop by jumping in- + directly to a register that contains the address of the main loop. + A slight variation on this idea is to have the register contain + the address of the branch table, rather than the address of the main + loop. + Another issue is whether the execution routines for LOL 0, LOL + 2, LOL 4, etc. should all be have distinct execution routines. Doing + so provides for the maximum speed, since the operand is implicit in + the routine itself. The disadvantage is that many nearly identical + execution routines will then be needed. Another way of doing it is + to keep the instruction byte fetched from memory (LOL 0, LOL 2, LOL + 4, etc.) in some register, and have all the LOL mini format instruc- + tions branch to a common routine. This routine can then determine + the operand by subtracting the code for LOL 0 from the register, + leaving the true operand in the register (as a word quantity of + course). This method makes the interpreter smaller, but is a bit + slower. +.bp + To make this important point a little clearer, consider how a + production interpreter for the PDP-11 might appear. Let us assume the + following opcodes have been assigned: + + 31: LOL -2 (2 bytes, i.e. next word) + 32: LOL -4 + 33: LOL -6 + 34: LOL b (format with a one byte offset) + 35: LOL w (format with a one word, i.e. two byte offset) + + Further assume that each of the 5 opcodes will have its own execution + routine, i.e. we are making a tradeoff in favor of fast execution and + a slightly larger interpreter. + Register r5 is the em program counter. + Register r4 is the em LB register + Register r3 is the em SP register (the stack grows toward low core) + Register r2 contains the interpreter address of the main loop + + The main loop looks like this: + + movb (r5)+,r0 /fetch the opcode into r0 and increment r5 + asl r0 /shift r0 left 1 bit. Now: -256<=r0<=+254 + jmp *table(r0) /jump to execution routine + + Notice that no operand fetching has been done. The execution routines for + the 5 sample instructions given above might be as follows: + + lol2: mov -2(r4),-(sp) /push local -2 onto stack + jmp (r2) /go back to main loop + lol4: mov -4(r4),-(sp) /push local -4 onto stack + jmp (r2) /go back to main loop + lol6: mov -6(r4),-(sp) /push local -6 onto stack + jmp (r2) /go back to main loop + lolb: mov $177400,r0 /prepare to fetch the 1 byte operand + bisb (r5)+,r0 /operand is now in r0 + asl r0 /r0 is now offset from LB in bytes, not words + add r4,r0 /r0 is now address of the needed local + mov (r0),-(sp) /push the local onto the stack + jmp (r2) + lolw: clr r0 /prepare to fetch the 2 byte operand + bisb (r5)+,r0 /fetch high order byte first !!! + swab r0 /insert high order byte in place + bisb (r5)+,r0 /insert low order byte in place + asl r0 /convert offset to bytes, from words + add r4,r0 /r0 is now address of needed local + mov (r0),-(sp) /stack the local + jmp (r2) /done + + The important thing to notice is where and how the operand fetch occurred: + lol2, lol4, and lol6, (the mini's) have implicit operands + lolb knew it had to fetch one byte, and did so without any table lookup + lolw knew it had to fetch a word, and did so, high order byte first } +.bp +.sp 4 +{---------------------------------------------------------------------------} +{ Routines for the individual instructions } +{---------------------------------------------------------------------------} +procedure loadops; +var j:integer; +begin + case insr of + { LOAD GROUP } + LDC: pushd(argd(k)); + LOC: pushsw(argc(k)); + LOL: push(memw(locadr(k))); + LOE: push(memw(argg(k))); + LIL: push(memw(mema(locadr(k)))); + LOF: push(memw(popa+argf(k))); + LAL: pusha(locadr(k)); + LAE: pusha(argg(k)); + LXL: begin a:=lb; for j:=1 to argn(k) do a:=mema(a+savsize); pusha(a) end; + LXA: begin a:=lb; + for j:=1 to argn(k) do a:= mema(a+savsize); + pusha(a+savsize) + end; + LOI: pushx(argo(k),popa); + LOS: begin k:=argw(k); if k<>wsize then trap(EILLINS); + k:=pop; pushx(argo(k),popa) + end; + LDL: begin a:=locadr(k); push(memw(a+wsize)); push(memw(a)) end; + LDE: begin k:=argg(k); push(memw(k+wsize)); push(memw(k)) end; + LDF: begin k:=argf(k); + a:=popa; push(memw(a+k+wsize)); push(memw(a+k)) + end; + LPI: push(argp(k)) + end +end; + +procedure storeops; +begin + case insr of + { STORE GROUP } + STL: store(locadr(k),pop); + STE: store(argg(k),pop); + SIL: store(mema(locadr(k)),pop); + STF: begin a:=popa; store(a+argf(k),pop) end; + STI: popx(argo(k),popa); + STS: begin k:=argw(k); if k<>wsize then trap(EILLINS); + k:=popa; popx(argo(k),popa) + end; + SDL: begin a:=locadr(k); store(a,pop); store(a+wsize,pop) end; + SDE: begin k:=argg(k); store(k,pop); store(k+wsize,pop) end; + SDF: begin k:=argf(k); a:=popa; store(a+k,pop); store(a+k+wsize,pop) end + end +end; + +procedure intarith; +var i:integer; +begin + case insr of + { SIGNED INTEGER ARITHMETIC } + ADI: case szindex(argw(k)) of + 1: begin st:=popsw; ss:=popsw; push(fitsw(ss+st,EIOVFL)) end; + 2: begin dt:=popd; ds:=popd; pushd(doadi(ds,dt)) end; + end ; + SBI: case szindex(argw(k)) of + 1: begin st:=popsw; ss:= popsw; push(fitsw(ss-st,EIOVFL)) end; + 2: begin dt:=popd; ds:=popd; pushd(dosbi(ds,dt)) end; + end ; + MLI: case szindex(argw(k)) of + 1: begin st:=popsw; ss:= popsw; push(fitsw(ss*st,EIOVFL)) end; + 2: begin dt:=popd; ds:=popd; pushd(domli(ds,dt)) end; + end ; + DVI: case szindex(argw(k)) of + 1: begin st:= popsw; ss:= popsw; + if st=0 then trap(EIDIVZ) else pushsw(ss div st) + end; + 2: begin dt:=popd; ds:=popd; pushd(dodvi(ds,dt)) end; + end; + RMI: case szindex(argw(k)) of + 1: begin st:= popsw; ss:=popsw; + if st=0 then trap(EIDIVZ) else pushsw(ss - (ss div st)*st) + end; + 2: begin dt:=popd; ds:=popd; pushd(dormi(ds,dt)) end + end; + NGI: case szindex(argw(k)) of + 1: begin st:=popsw; pushsw(-st) end; + 2: begin ds:=popd; pushd(dongi(ds)) end + end; + SLI: begin t:=pop; + case szindex(argw(k)) of + 1: begin ss:=popsw; + for i:= 1 to t do sleft(ss); pushsw(ss) + end + end + end; + SRI: begin t:=pop; + case szindex(argw(k)) of + 1: begin ss:=popsw; + for i:= 1 to t do sright(ss); pushsw(ss) + end; + 2: begin ds:=popd; + for i:= 1 to t do sdright(ss); pushd(ss) + end + end + end + end +end; + +procedure unsarith; +var i:integer; +begin + case insr of + { UNSIGNED INTEGER ARITHMETIC } + ADU: case szindex(argw(k)) of + 1: begin t:=pop; s:= pop; push(chopw(s+t)) end; + 2: trap(EILLINS); + end ; + SBU: case szindex(argw(k)) of + 1: begin t:=pop; s:= pop; push(chopw(s-t)) end; + 2: trap(EILLINS); + end ; + MLU: case szindex(argw(k)) of + 1: begin t:=pop; s:= pop; push(chopw(s*t)) end; + 2: trap(EILLINS); + end ; + DVU: case szindex(argw(k)) of + 1: begin t:= pop; s:= pop; + if t=0 then trap(EIDIVZ) else push(s div t) + end; + 2: trap(EILLINS); + end; + RMU: case szindex(argw(k)) of + 1: begin t:= pop; s:=pop; + if t=0 then trap(EIDIVZ) else push(s - (s div t)*t) + end; + 2: trap(EILLINS); + end; + SLU: case szindex(argw(k)) of + 1: begin t:=pop; s:=pop; + for i:= 1 to t do suleft(s); push(s) + end; + 2: trap(EILLINS); + end; + SRU: case szindex(argw(k)) of + 1: begin t:=pop; s:=pop; + for i:= 1 to t do suright(s); push(s) + end; + 2: trap(EILLINS); + end + end +end; + +procedure fltarith; +begin + case insr of + { FLOATING POINT ARITHMETIC } + ADF: begin argwf(k); rt:=popr; rs:=popr; pushr(doadf(rs,rt)) end; + SBF: begin argwf(k); rt:=popr; rs:=popr; pushr(dosbf(rs,rt)) end; + MLF: begin argwf(k); rt:=popr; rs:=popr; pushr(domlf(rs,rt)) end; + DVF: begin argwf(k); rt:=popr; rs:=popr; pushr(dodvf(rs,rt)) end; + NGF: begin argwf(k); rt:=popr; pushr(dongf(rt)) end; + FIF: begin argwf(k); rt:=popr; rs:=popr; + dofif(rt,rs,x,y); pushr(y); pushr(x) + end; + FEF: begin argwf(k); rt:=popr; dofef(rt,x,ss); pushr(x); pushsw(ss) end + end +end; + +procedure ptrarith; +begin + case insr of + { POINTER ARITHMETIC } + ADP: pusha(popa+argf(k)); + ADS: case szindex(argw(k)) of + 1: begin st:=popsw; pusha(popa+st) end; + 2: begin dt:=popd; pusha(popa+dt) end; + end; + SBS: begin + a:=popa; b:=popa; + case szindex(argw(k)) of + 1: push(fitsw(b-a,EIOVFL)); + 2: pushd(b-a) + end + end + end +end; + +procedure incops; +var j:integer; +begin + case insr of + { INCREMENT/DECREMENT/ZERO } + INC: push(fitsw(popsw+1,EIOVFL)); + INL: begin a:=locadr(k); store(a,fitsw(signwd(memw(a))+1,EIOVFL)) end; + INE: begin a:=argg(k); store(a,fitsw(signwd(memw(a))+1,EIOVFL)) end; + DEC: push(fitsw(popsw-1,EIOVFL)); + DEL: begin a:=locadr(k); store(a,fitsw(signwd(memw(a))-1,EIOVFL)) end; + DEE: begin a:=argg(k); store(a,fitsw(signwd(memw(a))-1,EIOVFL)) end; + ZRL: store(locadr(k),0); + ZRE: store(argg(k),0); + ZER: for j:=1 to argw(k) div wsize do push(0); + ZRF: pushr(0); + end +end; + +procedure convops; +begin + case insr of + { CONVERT GROUP } + CII: begin s:=pop; t:=pop; + if tmaxsint then trap(ECONV); push(s) end; + 2: trap(EILLINS); + end; + 2: case szindex(argw(pop)) of + 1: pushd(pop); + 2: trap(EILLINS); + end; + end; + CUU: case szindex(argw(pop)) of + 1: if szindex(argw(pop))=2 then trap(EILLINS); + 2: trap(EILLINS); + end; + CUF: begin argwf(pop); + if szindex(argw(pop))=1 then pushr(pop) else trap(EILLINS) + end; + CFI: begin sz:=argw(pop); argwf(pop); rt:=popr; + case szindex(sz) of + 1: push(fitsw(trunc(rt),ECONV)); + 2: pushd(fitd(trunc(rt))); + end + end; + CFU: begin sz:=argw(pop); argwf(pop); rt:=popr; + case szindex(sz) of + 1: push( chopw(trunc(abs(rt)-0.5)) ); + 2: trap(EILLINS); + end + end; + CFF: begin argwf(pop); argwf(pop) end + end +end; + +procedure logops; +var i,j:integer; +begin + case insr of + { LOGICAL GROUP } + XAND: + begin k:=argw(k); + for j:= 1 to k div wsize do + begin a:=sp+k; t:=pop; store(a,bf(andf,memw(a),t)) end; + end; + IOR: + begin k:=argw(k); + for j:= 1 to k div wsize do + begin a:=sp+k; t:=pop; store(a,bf(iorf,memw(a),t)) end; + end; + XOR: + begin k:=argw(k); + for j:= 1 to k div wsize do + begin a:=sp+k; t:=pop; store(a,bf(xorf,memw(a),t)) end; + end; + COM: + begin k:=argw(k); + for j:= 1 to k div wsize do + begin + store(sp+k-wsize*j, bf(xorf,memw(sp+k-wsize*j), negoff-1)) + end + end; + ROL: begin k:=argw(k); if k<>wsize then trap(EILLINS); + t:=pop; s:=pop; for i:= 1 to t do rleft(s); push(s) + end; + ROR: begin k:=argw(k); if k<>wsize then trap(EILLINS); + t:=pop; s:=pop; for i:= 1 to t do rright(s); push(s) + end + end +end; + +procedure setops; +var i,j:integer; +begin + case insr of + { SET GROUP } + INN: + begin k:=argw(k); + t:=pop; + i:= t mod 8; t:= t div 8; + if t>=k then + begin trap(ESET); s:=0 end + else + begin s:=memb(sp+t) end; + newsp(sp+k); push(bit(i,s)); + end; + XSET: + begin k:=argw(k); + t:=pop; + i:= t mod 8; t:= t div 8; + for j:= 1 to k div wsize do push(0); + if t>=k then + trap(ESET) + else + begin s:=1; for j:= 1 to i do rleft(s); storeb(sp+t,s) end + end + end +end; + +procedure arrops; +begin + case insr of + { ARRAY GROUP } + LAR: + begin k:=argw(k); if k<>wsize then trap(EILLINS); a:=popa; + pushx(argo(memw(a+2*k)),arraycalc(a)) + end; + SAR: + begin k:=argw(k); if k<>wsize then trap(EILLINS); a:=popa; + popx(argo(memw(a+2*k)),arraycalc(a)) + end; + AAR: + begin k:=argw(k); if k<>wsize then trap(EILLINS); a:=popa; + push(arraycalc(a)) + end + end +end; + +procedure cmpops; +begin + case insr of + { COMPARE GROUP } + CMI: case szindex(argw(k)) of + 1: begin st:=popsw; ss:=popsw; + if ss memw(sp+k+j) then t:=1; + j:=j+wsize + end; + newsp(sp+wsize*k); push(t); + end; + + TLT: if popsw < 0 then push(1) else push(0); + TLE: if popsw <= 0 then push(1) else push(0); + TEQ: if pop = 0 then push(1) else push(0); + TNE: if pop <> 0 then push(1) else push(0); + TGE: if popsw >= 0 then push(1) else push(0); + TGT: if popsw > 0 then push(1) else push(0); + end +end; + +procedure branchops; +begin + case insr of + { BRANCH GROUP } + BRA: newpc(pc+k); + + BLT: begin st:=popsw; if popsw < st then newpc(pc+k) end; + BLE: begin st:=popsw; if popsw <= st then newpc(pc+k) end; + BEQ: begin t :=pop ; if pop = t then newpc(pc+k) end; + BNE: begin t :=pop ; if pop <> t then newpc(pc+k) end; + BGE: begin st:=popsw; if popsw >= st then newpc(pc+k) end; + BGT: begin st:=popsw; if popsw > st then newpc(pc+k) end; + + ZLT: if popsw < 0 then newpc(pc+k); + ZLE: if popsw <= 0 then newpc(pc+k); + ZEQ: if pop = 0 then newpc(pc+k); + ZNE: if pop <> 0 then newpc(pc+k); + ZGE: if popsw >= 0 then newpc(pc+k); + ZGT: if popsw > 0 then newpc(pc+k) + end +end; + +procedure callops; +var j:integer; +begin + case insr of + { PROCEDURE CALL GROUP } + CAL: call(argp(k)); + CAI: begin call(argp(popa)) end; + RET: begin k:=argz(k); if k div wsize>maxret then trap(EILLINS); + for j:= 1 to k div wsize do retarea[j]:=pop; retsize:=k; + newsp(lb); lb:=maxdata+1; { To circumvent stack overflow error } + newpc(popa); + if pc=maxcode then + begin + halted:=true; + if retsize=wsize then exitstatus:=retarea[1] + else exitstatus:=undef + end + else + newlb(popa); + end; + LFR: begin k:=args(k); if k<>retsize then trap(EILLINS); + for j:=k div wsize downto 1 do push(retarea[j]); + end + end +end; + +procedure miscops; +var i,j:integer; +begin + case insr of + { MISCELLANEOUS GROUP } + ASP,ASS: + begin if insr=ASS then + begin k:=argw(k); if k<>wsize then trap(EILLINS); k:=popsw end; + k:=argf(k); + if k<0 + then for j:= 1 to -k div wsize do push(undef) + else newsp(sp+k); + end; + BLM,BLS: + begin if insr=BLS then + begin k:=argw(k); if k<>wsize then trap(EILLINS); k:=pop end; + k:=argz(k); + b:=popa; a:=popa; + for j := 1 to k div wsize do + store(b-wsize+wsize*j,memw(a-wsize+wsize*j)) + end; + CSA: begin k:=argw(k); if k<>wsize then trap(EILLINS); + a:=popa; + st:= popsw - signwd(memw(a+asize)); + if (st>=0) and (st<=memw(a+wsize+asize)) then + b:=mema(a+2*wsize+asize+asize*st) else b:=mema(a); + if b=0 then trap(ECASE) else newpc(b) + end; + CSB: begin k:=argw(k); if k<>wsize then trap(EILLINS); a:=popa; + t:=pop; i:=1; found:=false; + while (i<=memw(a+asize)) and not found do + if t=memw(a+(asize+wsize)*i) then found:=true else i:=i+1; + if found then b:=memw(a+(asize+wsize)*i+wsize) else b:=memw(a); + if b=0 then trap(ECASE) else newpc(b); + end; + DCH: begin pusha(mema(popa+dynd)) end; + DUP,DUS: + begin if insr=DUS then + begin k:=argw(k); if k<>wsize then trap(EILLINS); k:=pop end; + k:=args(k); + for i:=1 to k div wsize do push(memw(sp+k-wsize)); + end; + EXG: begin + k:=argw(k); + for i:=1 to k div wsize do push(memw(sp+k-wsize)); + for i:=0 to k div wsize - 1 do + store(sp+k+i*wsize,memw(sp+k+k+i*wsize)); + for i:=1 to k div wsize do + begin t:=pop ; store(sp+k+k-wsize,t) end; + end; + FIL: filna(argg(k)); + GTO: begin k:=argg(k); + newlb(mema(k+2*asize)); newsp(mema(k+asize)); newpc(mema(k)) + end; + LIM: push(ignmask); + LIN: lino(argn(k)); + LNI: lino(memw(0)+1); + LOR: begin i:=argr(k); + case i of 0:pusha(lb); 1:pusha(sp); 2:pusha(hp) end; + end; + LPB: pusha(popa+statd); + MON: domon(pop); + NOP: writeln('NOP at line ',memw(0):5) ; + RCK: begin a:=popa; + case szindex(argw(k)) of + 1: if (signwd(memw(sp))signwd(memw(a+wsize))) then trap(ERANGE); + 2: if (memd(sp)memd(a+2*wsize)) then trap(ERANGE); + end + end; + RTT: dortt; + SIG: begin a:=popa; pusha(uerrorproc); uerrorproc:=a end; + SIM: ignmask:=pop; + STR: begin i:=argr(k); + case i of 0: newlb(popa); 1: newsp(popa); 2: newhp(popa) end; + end; + TRP: trap(pop) + end +end; +.bp +{---------------------------------------------------------------------------} +{ Main Loop } +{---------------------------------------------------------------------------} + +begin initialize; +8888: + repeat + opcode := nextpc; { fetch the first byte of the instruction } + if opcode=escape1 then iclass:=second + else if opcode=escape2 then iclass:=tert + else iclass:=prim; + if iclass<>prim then opcode := nextpc; + with dispat[iclass][opcode] do + begin insr:=instr; + if not (zbit in iflag) then + if ibit in iflag then k:=pop else + begin + if mini in iflag then k:=implicit else + begin + if short in iflag then k:=implicit+nextpc else + begin k:=nextpc; + if (sbit in iflag) and (k>=128) then k:=k-256; + for i:=2 to ilength do k:=256*k + nextpc + end + end; + if wbit in iflag then k:=k*wsize; + end + end; +case insr of + + NON: trap(EILLINS); + + { LOAD GROUP } + LDC,LOC,LOL,LOE,LIL,LOF,LAL,LAE,LXL,LXA,LOI,LOS,LDL,LDE,LDF,LPI: + loadops; + + { STORE GROUP } + STL,STE,SIL,STF,STI,STS,SDL,SDE,SDF: + storeops; + + { SIGNED INTEGER ARITHMETIC } + ADI,SBI,MLI,DVI,RMI,NGI,SLI,SRI: + intarith; + + { UNSIGNED INTEGER ARITHMETIC } + ADU,SBU,MLU,DVU,RMU,SLU,SRU: + unsarith; + + { FLOATING POINT ARITHMETIC } + ADF,SBF,MLF,DVF,NGF,FIF,FEF: + fltarith; + + { POINTER ARITHMETIC } + ADP,ADS,SBS: + ptrarith; + + { INCREMENT/DECREMENT/ZERO } + INC,INL,INE,DEC,DEL,DEE,ZRL,ZRE,ZER,ZRF: + incops; + + { CONVERT GROUP } + CII,CIU,CIF,CUI,CUU,CUF,CFI,CFU,CFF: + convops; + + { LOGICAL GROUP } + XAND,IOR,XOR,COM,ROL,ROR: + logops; + + { SET GROUP } + INN,XSET: + setops; + + { ARRAY GROUP } + LAR,SAR,AAR: + arrops; + + { COMPARE GROUP } + CMI,CMU,CMP,CMF,CMS, TLT,TLE,TEQ,TNE,TGE,TGT: + cmpops; + + { BRANCH GROUP } + BRA, BLT,BLE,BEQ,BNE,BGE,BGT, ZLT,ZLE,ZEQ,ZNE,ZGE,ZGT: + branchops; + + { PROCEDURE CALL GROUP } + CAL,CAI,RET,LFR: + callops; + + { MISCELLANEOUS GROUP } + ASP,ASS,BLM,BLS,CSA,CSB,DCH,DUP,DUS,EXG,FIL,GTO,LIM, + LIN,LNI,LOR,LPB,MON,NOP,RCK,RTT,SIG,SIM,STR,TRP: + miscops; + + end; { end of case statement } + if not ( (insr=RET) or (insr=ASP) or (insr=BRA) or (insr=GTO) ) then + retsize:=0 ; + until halted; +9999: + writeln('halt with exit status: ',exitstatus:1); + doident; +end. +.ft P +.lg 1 +.fi diff --git a/doc/em/env.nr b/doc/em/env.nr new file mode 100644 index 0000000..45ead04 --- /dev/null +++ b/doc/em/env.nr @@ -0,0 +1,193 @@ +.bp +.P1 "ENVIRONMENT INTERACTIONS" +.PP +EM programs can interact with their environment in three ways. +Two, starting/stopping and monitor calls, are dealt with in this chapter. +The remaining way to interact, interrupts, will be treated +together with traps in chapter 9. +.P2 "Program starting and stopping" +.PP +EM user programs start with a call to a procedure called +_m_a_i_n. +The assembler and backends look for the definition of a procedure +with this name in their input. +The call passes three parameters to the procedure. +The parameters are similar to the parameters supplied by the +.UX +operating system to C programs. +These parameters are often called \fBargc\fP, \fBargv\fP and \fBenvp\fP. +Argc is the parameter nearest to LB and is a wordsized integer. +The other two are pointers to the first element of an array of +string pointers. +The \fBargv\fP array contains \fBargc\fP +strings, the first of which contains the program call name. +The other strings in the \fBargv\fP +array are the program parameters. +.PP +The \fBenvp\fP +array contains strings in the form "name=string", where 'name' +is the name of an environment variable and string its value. +The \fBenvp\fP +is terminated by a zero pointer. +.PP +An EM user program stops if the program returns from the first +invocation of _m_a_i_n. +The contents of the function return area are used to procure a +wordsized program return code. +EM programs also stop when traps and interrupts occur that are +not caught and when the exit monitor call is executed. +.P2 "Input/Output and other monitor calls" +.PP +EM differs from most conventional machines in that it has high level i/o +instructions. +Typical instructions are OPEN FILE and READ FROM FILE instead +of low level instructions such as setting and clearing +bits in device registers. +By providing such high level i/o primitives, the task of implementing +EM on various non EM machines is made considerably easier. +.PP +I/O is initiated by the MON instruction, which expects an iocode on top +of the stack. +Often there are also parameters which are pushed on the +stack in reverse order, that is: last +parameter first. +Some i/o functions also provide results, which are returned on the stack. +In the list of monitor calls we use several types of parameters and results, +these types consist of integers and unsigneds of varying sizes, but never +smaller than the wordsize, and the two pointer types. +.LP +The names of the types used are: +.DS +.TS +tab(:); +l l. +int:an integer of wordsize +int2:an integer whose size is the maximum of the wordsize and 2 bytes +int4:an integer whose size is the maximum of the wordsize and 4 bytes +intp:an integer with the size of a pointer +uns2:an unsigned integer whose size is the maximum of the wordsize and 2 +unsp:an unsigned integer with the size of a pointer +ptr:a pointer into data space +.TE +.DE +.LP +The table below lists the i/o codes with their results and +parameters. +This list is similar to the system calls of the UNIX Version 7 +operating system. +.QQ +To execute a monitor call, proceed as follows: +.IP a) +Stack the parameters, in reverse order, last parameter first. +.IP b) +Push the monitor call number (iocode) onto the stack. +.IP c) +Execute the MON instruction. +.LP +An error code is present on the top of the stack after +execution of most monitor calls. +If this error code is zero, the call performed the action +requested and the results are available on top of the stack. +Non-zero error codes indicate a failure, in this case no +results are available and the error code has been pushed twice. +This construction enables programs to test for failure with a +single instruction (~TEQ or TNE~) and still find out the cause of +the failure. +The result name 'e' is reserved for the error code. +.ne 5 +.LP +List of monitor calls. +.LP +.nf +.na +.ta 4n 13n 29n 52n +nr name parameters results function + +1 Exit status:int Terminate this process +2 Fork e,flag,pid:int Spawn new process +3 Read fildes:int;buf:ptr;nbytes:unsp + e:int;rbytes:unsp Read from file +4 Write fildes:int;buf:ptr;nbytes:unsp + e:int;wbytes:unsp Write on a file +5 Open string:ptr;flag:int + e,fildes:int Open file for read and/or write +6 Close fildes:int e:int Close a file +7 Wait e:int;status,pid:int2 + Wait for child +8 Creat string:ptr;mode:int + e,fildes:int Create a new file +9 Link string1,string2:ptr + e:int Link to a file +10 Unlink string:ptr e:int Remove directory entry +12 Chdir string:ptr e:int Change default directory +14 Mknod string:ptr;mode,addr:int2 + e:int Make a special file +15 Chmod string:ptr;mode:int2 + e:int Change mode of file +16 Chown string:ptr;owner,group:int2 + e:int Change owner/group of a file +18 Stat string,statbuf:ptr + e:int Get file status +19 Lseek fildes:int;off:int4;whence:int + e:int;oldoff:int4 Move read/write pointer +20 Getpid pid:int2 Get process identification +21 Mount special,string:ptr;rwflag:int + e:int Mount file system +22 Umount special:ptr e:int Unmount file system +23 Setuid userid:int2 e:int Set user ID +24 Getuid e_uid,r_uid:int2 Get user ID +25 Stime time:int4 e:int Set time and date +26 Ptrace request:int;pid:int2;addr:ptr;data:int + e,value:int Process trace +27 Alarm seconds:uns2 previous:uns2 Schedule signal +28 Fstat fildes:int;statbuf:ptr + e:int Get file status +29 Pause Stop until signal +30 Utime string,timep:ptr + e:int Set file times +33 Access string:ptr;mode:int + e:int Determine file accessibility +34 Nice incr:int Set program priority +35 Ftime bufp:ptr e:int Get date and time +36 Sync Update filesystem +37 Kill pid:int2;sig:int + e:int Send signal to a process +41 Dup fildes,newfildes:int + e,fildes:int Duplicate a file descriptor +42 Pipe e,w_des,r_des:int Create a pipe +43 Times buffer:ptr Get process times +44 Profil buff:ptr;bufsiz,offset,scale:intp + Execution time profile +46 Setgid gid:int2 e:int Set group ID +47 Getgid e_gid,r_gid:int Get group ID +48 Sigtrp trapno,signo:int + e,prevtrap:int See below +51 Acct file:ptr e:int Turn accounting on or off +53 Lock flag:int e:int Lock a process +54 Ioctl fildes,request:int;argp:ptr + e:int Control device +56 Mpxcall cmd:int;vec:ptr e:int Multiplexed file handling +59 Exece name,argv,envp:ptr + e:int Execute a file +60 Umask mask:int2 oldmask:int2 Set file creation mode mask +61 Chroot string:ptr e:int Change root directory +.fi +.ad +.LP +Codes 0, 11, 13, 17, 31, 32, 38, 39, 40, 45, 49, 50, 52, +55, 57, 58, 62, and 63 are +not used. +.PP +All monitor calls, except fork and sigtrp +are the same as the UNIX version 7 system calls. +.PP +The sigtrp entry maps UNIX signals onto EM interrupts. +Normally, trapno is in the range 0 to 252. +In that case it requests that signal signo +will cause trap trapno to occur. +When given trap number \-2, default signal handling is reset, and when given +trap number \-3, the signal is ignored. +.PP +The flag returned by fork is 1 in the child process and 0 in +the parent. +The pid returned is the process-id of the other process. diff --git a/doc/em/even.c b/doc/em/even.c new file mode 100644 index 0000000..645d9b6 --- /dev/null +++ b/doc/em/even.c @@ -0,0 +1,9 @@ +main() { + register int l,j ; + + for ( j=0 ; (l=getchar()) != -1 ; j++ ) { + if ( j%16 == 15 ) printf("%3d\n",l&0377 ) ; + else printf("%3d ",l&0377 ) ; + } + printf("\n") ; +} diff --git a/doc/em/exam.e b/doc/em/exam.e new file mode 100644 index 0000000..ff5e210 --- /dev/null +++ b/doc/em/exam.e @@ -0,0 +1,178 @@ + mes 2,2,2 ; wordsize 2, pointersize 2 + .1 + rom 't.p\000' ; the name of the source file + hol 552,-32768,0 ; externals and buf occupy 552 bytes + exp $sum ; sum can be called from other modules + pro $sum,2 ; procedure sum; 2 bytes local storage + lin 8 ; code from source line 8 + ldl 0 ; load two locals ( a and b ) + adi 2 ; add them + ret 2 ; return the result + end 2 ; end of procedure ( still two bytes local storage ) + .2 + rom 1,99,2 ; descriptor of array a[] + exp $test ; the compiler exports all level 0 procedures + pro $test,226 ; procedure test, 226 bytes local storage + .3 + rom 4.8F8 ; assemble Floating point 4.8 (8 bytes) in + .4 ; global storage + rom 0.5F8 ; same for 0.5 + mes 3,-226,2,2 ; compiler temporary not referenced indirect + mes 3,-24,2,0 ; the same is true for i, j, b and c in test + mes 3,-22,2,0 + mes 3,-4,2,0 + mes 3,-2,2,0 + mes 3,-20,8,0 ; and for x and y + mes 3,-12,8,0 + lin 20 ; maintain source line number + loc 1 + stl -4 ; j := 1 + lni ; was lin 21 prior to optimization + lol -4 + loc 3 + mli 2 + loc 6 + adi 2 + stl -2 ; i := 3 * j + 6 + lni ; was lin 22 prior to optimization + lae .3 + loi 8 + lal -12 + sti 8 ; x := 4.8 + lni ; was lin 23 prior to optimization + lal -12 + loi 8 + lae .4 + loi 8 + dvf 8 + lal -20 + sti 8 ; y := x / 0.5 + lni ; was lin 24 prior to optimization + loc 1 + stl -22 ; b := true + lni ; was lin 25 prior to optimization + loc 122 + stl -24 ; c := 'z' + lni ; was lin 26 prior to optimization + loc 1 + stl -2 ; for i:= 1 + 2 + lol -2 + dup 2 + mli 2 ; i*i + lal -224 + lol -2 + lae .2 + sar 2 ; a[i] := + lol -2 + loc 100 + beq *3 ; to 100 do + inl -2 ; increment i and loop + bra *2 + 3 + lin 27 + lol -4 + loc 27 + adi 2 ; j + 27 + sil 0 ; r.r1 := + lni ; was lin 28 prior to optimization + lol -22 ; b + lol 0 + stf 10 ; r.r3 := + lni ; was lin 29 prior to optimization + lal -20 + loi 16 + adf 8 ; x + y + lol 0 + adp 2 + sti 8 ; r.r2 := + lni ; was lin 30 prior to optimization + lal -224 + lol -4 + lae .2 + lar 2 ; a[j] + lil 0 ; r.r1 + cal $sum ; call now + asp 4 ; remove parameters from stack + lfr 2 ; get function result + stl -2 ; i := + 4 + lin 31 + lol -2 + zle *5 ; while i > 0 do + lol -4 + lil 0 + adi 2 + stl -4 ; j := j + r.r1 + del -2 ; i := i - 1 + bra *4 ; loop + 5 + lin 32 + lol 0 + stl -226 ; make copy of address of r + lol -22 + lol -226 + stf 10 ; r3 := b + lal -20 + loi 16 + adf 8 + lol -226 + adp 2 + sti 8 ; r2 := x + y + loc 0 + sil -226 ; r1 := 0 + lin 34 ; note the absence of the unnecessary jump + lae 22 ; address of output structure + lol -4 + cal $_wri ; write integer with default width + asp 4 ; pop parameters + lae 22 + lol -2 + loc 6 + cal $_wsi ; write integer width 6 + asp 6 + lae 22 + lal -12 + loi 8 + loc 9 + loc 3 + cal $_wrf ; write fixed format real, width 9, precision 3 + asp 14 + lae 22 + lol -22 + cal $_wrb ; write boolean, default width + asp 4 + lae 22 + cal $_wln ; writeln + asp 2 + ret 0 ; return, no result + end 226 + exp $_main + pro $_main,0 ; main program + .6 + con 2,-1,22 ; description of external files + .5 + rom 15.96F8 + fil .1 ; maintain source file name + lae .6 ; description of external files + lae 0 ; base of hol area to relocate buffer addresses + cal $_ini ; initialize files, etc... + asp 4 + lin 37 + lae .5 + loi 8 + lae 2 + sti 8 ; x := 15.9 + lni ; was lin 38 prior to optimization + loc 99 + ste 0 ; mi := 99 + lni ; was lin 39 prior to optimization + lae 10 ; address of r + cal $test + asp 2 + loc 0 ; normal exit + cal $_hlt ; cleanup and finish + asp 2 + end 0 + mes 4,40 ; length of source file is 40 lines + mes 5 ; reals were used diff --git a/doc/em/exam.p b/doc/em/exam.p new file mode 100644 index 0000000..5d2e985 --- /dev/null +++ b/doc/em/exam.p @@ -0,0 +1,40 @@ + program example(output); + {This program just demonstrates typical EM code.} + type rec = record r1: integer; r2:real; r3: boolean end; + var mi: integer; mx:real; r:rec; + + function sum(a,b:integer):integer; + begin + sum := a + b + end; + + procedure test(var r: rec); + label 1; + var i,j: integer; + x,y: real; + b: boolean; + c: char; + a: array[1..100] of integer; + + begin + j := 1; + i := 3 * j + 6; + x := 4.8; + y := x/0.5; + b := true; + c := 'z'; + for i:= 1 to 100 do a[i] := i * i; + r.r1 := j+27; + r.r3 := b; + r.r2 := x+y; + i := sum(r.r1, a[j]); + while i > 0 do begin j := j + r.r1; i := i - 1 end; + with r do begin r3 := b; r2 := x+y; r1 := 0 end; + goto 1; + 1: writeln(j, i:6, x:9:3, b) + end; {test} + begin {main program} + mx := 15.96; + mi := 99; + test(r) + end. diff --git a/doc/em/int/READ_ME b/doc/em/int/READ_ME new file mode 100644 index 0000000..bd14ade --- /dev/null +++ b/doc/em/int/READ_ME @@ -0,0 +1,5 @@ +This interpreter is meant for inclusion in the EM manual. +Although slow, it showed decent behaviour on several tests. +The only monitor calls implemented are exit, read(untested), +write and ioctl - just reurns the correct code for telling it's +a terminal - diff --git a/doc/em/int/em.p b/doc/em/int/em.p new file mode 100644 index 0000000..1a6cbcf --- /dev/null +++ b/doc/em/int/em.p @@ -0,0 +1,1768 @@ +# +{ This is an interpreter for EM. It serves as a specification for the + EM machine. This interpreter must run on a machine which supports + arithmetic with words and memory offsets. + + Certain aspects are over specified. In particular: + + 1. The representation of an address on the stack need not be the + numerical value of the memory location. + + 2. The state of the stack is not defined after a trap has aborted + an instruction in the middle. For example, it is officially un- + defined whether the second operand of an ADD instruction has + been popped or not if the first one is undefined ( -32768 or + unsigned 32768). + + 3. The memory layout is implementation dependent. Only the most + basic checks are performed whenever memory is accessed. + + 4. The representation of an integer or set on the stack is not fixed + in bit order. + + 5. The format and existence of the procedure descriptors depends on + the implementation. + + 6. The result of the compare operators CMI etc. are -1, 0 and 1 + here, but other negative and positive values will do and they + need not be the same each time. + + 7. The shift count for SHL, SHR, ROL and ROR must be in the range 0 + to object size in bits - 1. The effect of a count not in this + range is undefined. + + 8. This interpreter does not work for double word integers, although + any decent EM implementation will include double word arithmetic. + } + + + + + + + + + + + + + + + + + + + + + + +{$i256} +{$d+} +#ifndef DOC +program em(tables,prog,core,input,output); +#else +program em(tables,prog,input,output); +#endif + + +label 8888,9999; + +const + t15 = 32768; { 2**15 } + t15m1 = 32767; { 2**15 -1 } + t16 = 65536; { 2**16 } + t16m1 = 65535; { 2**16 -1 } + t31m1 = 2147483647; { 2**31 -1 } + + { constants indicating the size of words and addresses } + wsize = 2; { number of bytes in a word } + asize = 2; { number of bytes in an address } + fsize = 4; { number of bytes in a floating point number } + maxret =4; { number of words in the return value area } + + signbit = t15; { the power of two indicating the sign bit } + negoff = t16; { the next power of two } + maxsint = t15m1; { the maximum signed integer } + maxuint = t16m1; { the maximum unsigned integer } + maxdbl = t31m1; { the maximum double signed integer } + maxadr = t16m1; { the maximum address } + maxoffs = t15m1; { the maximum offset from an address } + maxbitnr= 15; { the number of the highest bit } + + lineadr = 0; { address of the line number } + fileadr = 4; { address of the file name } + maxcode = 8191; { highest byte in code address space } + maxdata = 8191; { highest byte in data address space } + + { format of status save area } + statd = 4; { how far is static link from lb } + dynd = 2; { how far is dynamic link from lb } + reta = 0; { how far is the return address from lb } + savsize = 4; { size of save area in bytes } + + { procedure descriptor format } + pdlocs = 0; { offset for size of local variables in bytes } + pdbase = asize; { offset for the procedure base } + pdsize = 4; { size of procedure descriptor in bytes = 2*asize } + + { header words } + NTEXT = 1; + NDATA = 2; + NPROC = 3; + ENTRY = 4; + NLINE = 5; + SZDATA = 6; + + escape1 = 254; { escape to secondary opcodes } + escape2 = 255; { escape to tertiary opcodes } + undef = signbit; { the range of integers is -32767 to +32767 } + + { error codes } + EARRAY = 0; ERANGE = 1; ESET = 2; EIOVFL = 3; + EFOVFL = 4; EFUNFL = 5; EIDIVZ = 6; EFDIVZ = 7; + EIUND = 8; EFUND = 9; ECONV = 10; ESTACK = 16; + EHEAP = 17; EILLINS = 18; EODDZ = 19; ECASE = 20; + EMEMFLT = 21; EBADPTR = 22; EBADPC = 23; EBADLAE = 24; + EBADMON = 25; EBADLIN = 26; EBADGTO = 27; +{ +.ne 20 +.bp +----------------------------------------------------------------------------} +{ Declarations } +{---------------------------------------------------------------------------} + +type + bitval= 0..1; { one bit } + bitnr= 0..maxbitnr; { bits in machine words are numbered 0 to 15 } + byte= 0..255; { memory is an array of bytes } + adr= {0..maxadr} long; { the range of addresses } + word= {0..maxuint} long;{ the range of unsigned integers } + offs= -maxoffs..maxoffs; { the range of signed offsets from addresses } + size= 0..maxoffs; { the range of sizes is the positive offsets } + sword= {-signbit..maxsint} long; { the range of signed integers } + full= {-maxuint..maxuint} long; { intermediate results need this range } + double={-maxdbl..maxdbl} long; { double precision range } + bftype= (andf,iorf,xorf); { tells which boolean operator needed } + insclass=(prim,second,tert); { tells which opcode table is in use } + instype=(implic,explic); { does opcode have implicit or explicit operand } + iflags= (mini,short,sbit,wbit,zbit,ibit); + ifset= set of iflags; + + mnem = ( NON, + AAR, ADF, ADI, ADP, ADS, ADU,XAND, ASP, ASS, BEQ, + BGE, BGT, BLE, BLM, BLS, BLT, BNE, BRA, CAI, CAL, + CFF, CFI, CFU, CIF, CII, CIU, CMF, CMI, CMP, CMS, + CMU, COM, CSA, CSB, CUF, CUI, CUU, DCH, DEC, DEE, + DEL, DUP, DUS, DVF, DVI, DVU, EXG, FEF, FIF, FIL, + GTO, INC, INE, INL, INN, IOR, LAE, LAL, LAR, LDC, + LDE, LDF, LDL, LFR, LIL, LIM, LIN, LNI, LOC, LOE, + LOF, LOI, LOL, LOR, LOS, LPB, LPI, LXA, LXL, MLF, + MLI, MLU, MON, NGF, NGI, NOP, RCK, RET, RMI, RMU, + ROL, ROR, RTT, SAR, SBF, SBI, SBS, SBU, SDE, SDF, + SDL,XSET, SIG, SIL, SIM, SLI, SLU, SRI, SRU, STE, + STF, STI, STL, STR, STS, TEQ, TGE, TGT, TLE, TLT, + TNE, TRP, XOR, ZEQ, ZER, ZGE, ZGT, ZLE, ZLT, ZNE, + ZRE, ZRF, ZRL); + + dispatch = record + iflag: ifset; + instr: mnem; + case instype of + implic: (implicit:sword); + explic: (ilength:byte); + end; + + +var + code: packed array[0..maxcode] of byte; { code space } + data: packed array[0..maxdata] of byte; { data space } + retarea: array[1..maxret ] of word; { return area } + pc,lb,sp,hp,pd: adr; { internal machine registers } + i: integer; { integer scratch variable } + s,t :word; { scratch variables } + sz:size; { scratch variables } + ss,st: sword; { scratch variables } + k :double; { scratch variables } + j:size; { scratch variable used as index } + a,b:adr; { scratch variable used for addresses } + dt,ds:double; { scratch variables for double precision } + rt,rs,x,y:real; { scratch variables for real } + found:boolean; { scratch } + opcode: byte; { holds the opcode during execution } + iclass: insclass; { true for escaped opcodes } + dispat: array[insclass,byte] of dispatch; + retsize:size; { holds size of last LFR } + insr: mnem; { holds the instruction number } + halted: boolean; { normally false } + exitstatus:word; { parameter of MON 1 } + ignmask:word; { ignore mask for traps } + uerrorproc:adr; { number of user defined error procedure } + intrap:boolean; { Set when executing trap(), to catch recursive calls} + trapval:byte; { Set to number of last trap } + header: array[1..8] of adr; + + tables: text; { description of EM instructions } + prog: file of byte; { program and initialized data } +#ifndef DOC + core: file of byte; { post mortem dump } +#endif +{ +.ne 20 +.sp 5 +{---------------------------------------------------------------------------} +{ Various check routines } +{---------------------------------------------------------------------------} + +{ Only the most basic checks are performed. These routines are inherently + implementation dependent. } + +procedure trap(n:byte); forward; +#ifndef DOC +procedure writecore(n:byte); forward; +#endif + +procedure memadr(a:adr); +begin if (a>maxdata) or ((a=hp)) then trap(EMEMFLT) end; + +procedure wordadr(a:adr); +begin memadr(a); if (a mod wsize<>0) then trap(EBADPTR) end; + +procedure chkadr(a:adr; s:size); +begin memadr(a); memadr(a+s-1); { assumption: size is ok } + if s0 then trap(EBADPTR) end + else if a mod wsize<>0 then trap(EBADPTR) +end; + +procedure newpc(a:double); +begin if (a<0) or (a>maxcode) then trap(EBADPC); pc:=a end; + +procedure newsp(a:adr); +begin if (a>lb) or (a0) then trap(ESTACK); sp:=a end; + +procedure newlb(a:adr); +begin if (a0) then trap(ESTACK); lb:=a end; + +procedure newhp(a:adr); +begin if (a>sp) or (a>maxdata+1) or (a mod wsize<>0) + then trap(EHEAP) + else hp:=a +end; + +function argc(a:double):sword; +begin if (a<-signbit) or (a>maxsint) then trap(EILLINS); argc:=a end; + +function argd(a:double):double; +begin if (a<-maxdbl) or (a>maxdbl) then trap(EILLINS); argd:=a end; + +function argl(a:double):offs; +begin if (a<-maxoffs) or (a>maxoffs) then trap(EILLINS); argl:=a end; + +function argg(k:double):adr; +begin if (k<0) or (k>maxadr) then trap(EILLINS); argg:=k end; + +function argf(a:double):offs; +begin if (a<-maxoffs) or (a>maxoffs) then trap(EILLINS); argf:=a end; + +function argn(a:double):word; +begin if (a<0) or (a>maxuint) then trap(EILLINS); argn:=a end; + +function args(a:double):size; +begin if (a<=0) or (a>maxoffs) + then trap(EODDZ) + else if (a mod wsize)<>0 then trap(EODDZ); + args:=a ; +end; + +function argz(a:double):size; +begin if (a<0) or (a>maxoffs) + then trap(EODDZ) + else if (a mod wsize)<>0 then trap(EODDZ); + argz:=a ; +end; + +function argo(a:double):size; +begin if (a<=0) or (a>maxoffs) + then trap(EODDZ) + else if (a mod wsize<>0) and (wsize mod a<>0) then trap(EODDZ); + argo:=a ; +end; + +function argw(a:double):size; +begin if (a<=0) or (a>maxoffs) or (a>maxuint) + then trap(EODDZ) + else if (a mod wsize)<>0 then trap(EODDZ); + argw:=a ; +end; + +function argp(a:double):size; +begin if (a<0) or (a>=header[NPROC]) then trap(EILLINS); argp:=a end; + +function argr(a:double):word; +begin if (a<0) or (a>2) then trap(EILLINS); argr:=a end; + +procedure argwf(s:double); +begin if argw(s)<>fsize then trap(EILLINS) end; + +function szindex(s:double):integer; +begin s:=argw(s); if (s mod wsize <> 0) or (s>2*wsize) then trap(EILLINS); + szindex:=s div wsize +end; + +function locadr(l:double):adr; +begin l:=argl(l); if l<0 then locadr:=lb+l else locadr:=lb+l+savsize end; + +function signwd(w:word):sword; +begin if w = undef then trap(EIUND); + if w >= signbit then signwd:=w-negoff else signwd:=w +end; + +function dosign(w:word):sword; +begin if w >= signbit then dosign:=w-negoff else dosign:=w end; + +function unsign(w:sword):word; +begin if w<0 then unsign:=w+negoff else unsign:=w end; + +function chopw(dw:double):word; +begin chopw:=dw mod negoff end; + +function fitsw(w:full;trapno:byte):word; +{ checks whether value fits in signed word, returns unsigned representation} +begin + if (w>maxsint) or (w<-signbit) then + begin trap(trapno); + if w<0 then fitsw:=negoff- (-w)mod negoff + else fitsw:=w mod negoff; + end + else fitsw:=unsign(w) +end; + +function fitd(w:full):double; +begin + if abs(w) > maxdbl then trap(ECONV); + fitd:=w +end; + +{ +.ne 20 +.sp 5 +{---------------------------------------------------------------------------} +{ Memory access routines } +{---------------------------------------------------------------------------} + +{ memw returns a machine word as an unsigned integer + memb returns a single byte as a positive integer: 0 <= memb <= 255 + mems(a,s) fetches an object smaller than a word and returns a word + store(a,v) stores the word v at machine address a + storea(a,v) stores the address v at machine address a + storeb(a,b) stores the byte b at machine address a + stores(a,s,v) stores the s least significant bytes of a word at address a + memi returns an offset from the instruction space + Note that the procedure descriptors are part of instruction space. + nextpc returns the next byte addressed by pc, incrementing pc + + lino changes the line number word. + filna changes the pointer to the file name. + + All routines check to make sure the address is within range and valid for + the size of the object. If an addressing error is found, a trap occurs. +} + + +function memw(a:adr):word; +var b:word; i:integer; +begin wordadr(a); b:=0; + for i:=wsize-1 downto 0 do b:=256*b + data[a+i] ; + memw:=b +end; + +function memd(a:adr):double; { Always signed } +var b:double; i:integer; +begin wordadr(a); b:=data[a+2*wsize-1]; + if b>=128 then b:=b-256; + for i:=2*wsize-2 downto 0 do b:=256*b + data[a+i] ; + memd:=b +end; + +function mema(a:adr):adr; +var b:adr; i:integer; +begin wordadr(a); b:=0; + for i:=asize-1 downto 0 do b:=256*b + data[a+i] ; + mema:=b +end; + +function mems(a:adr;s:size):word; +var i:integer; b:word; +begin chkadr(a,s); b:=0; for i:=1 to s do b:=b*256+data[a+s-i]; mems:=b end; + +function memb(a:adr):byte; +begin memadr(a); memb:=data[a] end; + +procedure store(a:adr; x:word); +var i:integer; +begin wordadr(a); + for i:=0 to wsize-1 do + begin data[a+i]:=x mod 256; x:=x div 256 end +end; + +procedure storea(a:adr; x:adr); +var i:integer; +begin wordadr(a); + for i:=0 to asize-1 do + begin data[a+i]:=x mod 256; x:=x div 256 end +end; + +procedure stores(a:adr;s:size;v:word); +var i:integer; +begin chkadr(a,s); + for i:=0 to s-1 do begin data[a+i]:=v mod 256; v:=v div 256 end; +end; + +procedure storeb(a:adr; b:byte); +begin memadr(a); data[a]:=b end; + +function memi(a:adr):adr; +var b:adr; i:integer; +begin if (a mod wsize<>0) or (a+asize-1>maxcode) then trap(EBADPTR); b:=0; + for i:=asize-1 downto 0 do b:=256*b + code[a+i] ; + memi:=b +end; + +function nextpc:byte; +begin if pc>=pd then trap(EBADPC); nextpc:=code[pc]; newpc(pc+1) end; + +procedure lino(w:word); +begin store(lineadr,w) end; + +procedure filna(a:adr); +begin storea(fileadr,a) end; +{ +.ne 20 +.sp 5 +{---------------------------------------------------------------------------} +{ Stack Manipulation Routines } +{---------------------------------------------------------------------------} + +{ push puts a word on the stack + pushsw takes a signed one word integer and pushes it on the stack + pop removes a machine word from the stack and delivers it as a word + popsw removes a machine word from the stack and delivers a signed integer + pusha pushes an address on the stack + popa removes a machine word from the stack and delivers it as an address + pushd pushes a double precision number on the stack + popd removes two machine words and returns a double precision integer + pushr pushes a float (floating point) number on the stack + popr removes several machine words and returns a float number + pushx puts an object of arbitrary size on the stack + popx removes an object of arbitrary size + } + +procedure push(x:word); +begin newsp(sp-wsize); store(sp,x) end; + +procedure pushsw(x:sword); +begin newsp(sp-wsize); store(sp,unsign(x)) end; + +function pop:word; +begin pop:=memw(sp); newsp(sp+wsize) end; + +function popsw:sword; +begin popsw:=signwd(pop) end; + +procedure pusha(x:adr); +begin newsp(sp-asize); storea(sp,x) end; + +function popa:adr; +begin popa:=mema(sp); newsp(sp+asize) end; + +procedure pushd(y:double); +begin { push double integer onto the stack } newsp(sp-2*wsize) end; + +function popd:double; +begin { pop double integer from the stack } newsp(sp+2*wsize); popd:=0 end; + +procedure pushr(z:real); +begin { Push a float onto the stack } newsp(sp-fsize) end; + +function popr:real; +begin { pop float from the stack } newsp(sp+fsize); popr:=0.0 end; + +procedure pushx(objsize:size; a:adr); +var i:integer; +begin + if objsize= 0 then w := w div 2 else w := (w-1) div 2 end; + +procedure suright(var w:word); { 1 bit right shift without sign extension } +begin w := w div 2 end; + +procedure sdright(var d:double); { 1 bit right shift } +begin { shift two word signed integer } end; + +procedure rleft(var w:word); { 1 bit left rotate } +begin if w >= t15 + then w:=(w-t15)*2 + 1 + else w:=w*2 +end; + +procedure rright(var w:word); { 1 bit right rotate } +begin if w mod 2 = 1 + then w:=w div 2 + t15 + else w:=w div 2 +end; + +function sextend(w:word;s:size):word; +var i:size; +begin + for i:=1 to (wsize-s)*8 do rleft(w); + for i:=1 to (wsize-s)*8 do sright(w); + sextend:=w; +end; + +function bit(b:bitnr; w:word):bitval; { return bit b of the word w } +var i:bitnr; +begin for i:= 1 to b do rright(w); bit:= w mod 2 end; + +function bf(ty:bftype; w1,w2:word):word; { return boolean fcn of 2 words } +var i:bitnr; j:word; +begin j:=0; + for i:= maxbitnr downto 0 do + begin j := 2*j; + case ty of + andf: if bit(i,w1)+bit(i,w2) = 2 then j:=j+1; + iorf: if bit(i,w1)+bit(i,w2) > 0 then j:=j+1; + xorf: if bit(i,w1)+bit(i,w2) = 1 then j:=j+1 + end + end; + bf:=j +end; + +{---------------------------------------------------------------------------} +{ Array indexing +{---------------------------------------------------------------------------} + +function arraycalc(c:adr):adr; { subscript calculation } +var j:full; objsize:size; a:adr; +begin j:= popsw - signwd(memw(c)); + if (j<0) or (j>memw(c+wsize)) then trap(EARRAY); + objsize := argo(memw(c+wsize+wsize)); + a := j*objsize+popa; chkadr(a,objsize); + arraycalc:=a +end; +{ +.ne 20 +.sp 5 +{---------------------------------------------------------------------------} +{ Double and Real Arithmetic } +{---------------------------------------------------------------------------} + +{ All routines for doubles and floats are dummy routines, since the format of + doubles and floats is not defined in EM. +} + +function doadi(ds,dt:double):double; +begin { add two doubles } doadi:=0 end; + +function dosbi(ds,dt:double):double; +begin { subtract two doubles } dosbi:=0 end; + +function domli(ds,dt:double):double; +begin { multiply two doubles } domli:=0 end; + +function dodvi(ds,dt:double):double; +begin { divide two doubles } dodvi:=0 end; + +function dormi(ds,dt:double):double; +begin { modulo of two doubles } dormi:=0 end; + +function dongi(ds:double):double; +begin { negative of a double } dongi:=0 end; + +function doadf(x,y:real):real; +begin { add two floats } doadf:=0.0 end; + +function dosbf(x,y:real):real; +begin { subtract two floats } dosbf:=0.0 end; + +function domlf(x,y:real):real; +begin { multiply two floats } domlf:=0.0 end; + +function dodvf(x,y:real):real; +begin { divide two floats } dodvf:=0.0 end; + +function dongf(x:real):real; +begin { negate a float } dongf:=0.0 end; + +procedure dofif(x,y:real;var intpart,fraction:real); +begin { dismember x*y into integer and fractional parts } + intpart:=0.0; { integer part of x*y, same sign as x*y } + fraction:=0.0; + { fractional part of x*y, 0<=abs(fraction)<1 and same sign as x*y } +end; + +procedure dofef(x:real;var mantissa:real;var exponent:sword); +begin { dismember x into mantissa and exponent parts } + mantissa:=0.0; { mantissa of x , >= 1/2 and <1 } + exponent:=0; { base 2 exponent of x } +end; + +{ +.ne 20 +.sp 5 +.bp +{---------------------------------------------------------------------------} +{ Trap and Call } +{---------------------------------------------------------------------------} + +procedure call(p:adr); { Perform the call } +begin + pusha(lb);pusha(pc); + newlb(sp);newsp(sp - memi(pd + pdsize*p + pdlocs)); + newpc(memi(pd + pdsize*p+ pdbase)) +end; + +procedure dotrap(n:byte); +var i:size; +begin + if (uerrorproc=0) or intrap then + begin + if intrap then + writeln('Recursive trap, first trap number was ', trapval:1); + writeln('Error ', n:1); + writeln('With',ord(insr):4,' arg ',k:1); +#ifndef DOC + writecore(n); +#endif + goto 9999 + end; + { Deposit all interpreter variables that need to be saved on + the stack. This includes all scratch variables that can + be in use at the moment and ( not possible in this interpreter ) + the internal address of the interpreter where the error occurred. + This would make it possible to execute an RTT instruction totally + transparent to the user program. + It can, for example, occur within an ADD instruction that both + operands are undefined and that the result overflows. + Although this will generate 3 error traps it must be possible + to ignore them all. + + } + intrap:=true; trapval:=n; + for i:=retsize div wsize downto 1 do push(retarea[i]); + push(retsize); { saved return area } + pusha(mema(fileadr)); { saved current file name pointer } + push(memw(lineadr)); { saved line number } + push(n); { push error number } + a:=argp(uerrorproc); + uerrorproc:=0; { reset signal } + call(a); { call the routine } + intrap:=false; { Do not catch recursive traps anymore } + goto 8888; { reenter main loop } +end; + +procedure trap; +{ This routine is invoked for overflow, and other run time errors. + For non-fatal errors, trap returns to the calling routine +} +begin + if n>=16 then dotrap(n) else if bit(n,ignmask)=0 then dotrap(n); +end; + +procedure dortt; +{ The restoration of file address and line number is not essential. + The restoration of the return save area is. +} +var i:size; + n:word; +begin + newsp(lb); lb:=maxdata+1 ; { to circumvent ESTACK for the popa + pop } + newpc(popa); newlb(popa); { So far a plain RET 0 } + n:=pop; if (n>=16) and (n<64) then + begin +#ifndef DOC + writecore(n); +#endif + goto 9999 + end; + lino(pop); filna(popa); retsize:=pop; + for i:=1 to retsize div wsize do retarea[i]:=pop ; +end; +{ +.sp 5 +{---------------------------------------------------------------------------} +{ monitor calls } +{---------------------------------------------------------------------------} + + +procedure domon(entry:word); +var index: 1..63; + dummy: double; + count,rwptr: adr; + token: byte; + i: integer; +begin + if (entry<=0) or (entry>63) then entry:=63 ; + index:=entry; + case index of + 1: begin { exit } exitstatus:=pop; halted:=true end; + 3: begin { read } dummy:=pop; { All input is from stdin } + rwptr:=popa; count:=popa; + i:=0 ; + while (not eof(input)) and (i0 then + begin i:=20; found:=false; + while (i<>0) and not found do + begin c:=memb(a); a:=a+1; found:=true; i:=i-1; + if (c>=48) and (c<=57) then + begin found:=false; write(chr(ord('0')+c-48)) end; + if (c>=65) and (c<=90) then + begin found:=false; write(chr(ord('A')+c-65)) end; + if (c>=97) and (c<=122) then + begin found:=false; write(chr(ord('a')+c-97)) end; + end; + end; + writeln; +end; + +#ifndef DOC +{---------------------------------------------------------------------------} +{ Post Mortem Dump } +{ } +{This a not a part of the machine definition, but an ad hoc debugging method} +{---------------------------------------------------------------------------} + +procedure writecore; +var ncoreb,i:integer; + +procedure wrbyte(b:byte); +begin write(core,b); ncoreb:=ncoreb+1 end; + +procedure wradr(a:adr); +var i:integer; +begin for i:=1 to asize do begin wrbyte(a mod 256); a:=a div 256 end end; + +begin + rewrite(core); ncoreb:=0; + wrbyte(173); wrbyte(16); { Magic } + wrbyte(3);wrbyte(0); { Version } + wrbyte(wsize);wrbyte(0); { Wordsize } + wrbyte(asize);wrbyte(0); { Address size } + wradr(0); { Text size in dump } + wradr(maxdata+1); { Data size in dump } + wradr(ignmask); + wradr(uerrorproc); + wradr(n); { Cause } + wradr(pc); wradr(sp); wradr(lb); wradr(hp); wradr(pd); wradr(0){pb} ; + while ncoreb<>512 do wradr(0); { Fill } + for i:=0 to maxdata do wrbyte(data[i]) +end; + +#endif + +procedure initialize; { start the ball rolling } +{ This is not part of the machine definition } +var cset:set of char; + f:ifset; + iclass:insclass; + insno:byte; + nops:integer; + opcode:byte; + i,j,n:integer; + wtemp:sword; + count:integer; + repc:adr; + nexta,firsta:adr; + elem:byte; + amount,ofst:size; + c:char; + + function readb(n:integer):double; + var b:byte; + begin read(prog,b); if n>1 then readb:=readb(n-1)*256+b else readb:=b end; + + function readbyte:byte; + begin readbyte:=readb(1) end; + + function readword:word; + begin readword:=readb(wsize) end; + + function readadr:adr; + begin readadr:=readb(asize) end; + + function ifind(ordinal:byte):mnem; + var loopvar:mnem; + found:boolean; + begin ifind:=NON; + loopvar:=insr; found:=false; + repeat + if ordinal=ord(loopvar) then + begin found:=true; ifind:=loopvar end; + if loopvar<>ZRL then loopvar:=succ(loopvar) else loopvar:=NON; + until found or (loopvar=insr) ; + end; + + procedure readhdr; + type hdrw=0..32767 ; { 16 bit header words } + var hdr: hdrw; + i: integer; + begin + for i:=0 to 7 do + begin hdr:=readb(2); + case i of + 0: if hdr<>3757 then { 07255 } + begin writeln('Not an em load file'); halt end; + 2: if hdr<>0 then + begin writeln('Unsolved references'); halt end; + 3: if hdr<>3 then + begin writeln('Incorrect load file version'); halt end; + 4: if hdr<>wsize then + begin writeln('Incorrect word size'); halt end; + 5: if hdr<>asize then + begin writeln('Incorrect pointer size'); halt end; + 1,6,7:; + end + end + end; + + procedure noinit; + begin writeln('Illegal initialization'); halt end; + + procedure readint(a:adr;s:size); + var i:size; + begin { construct integer out of byte sequence } + for i:=1 to s do { construct the value and initialize at a } + begin storeb(a,readbyte); a:=a+1 end + end; + + procedure readuns(a:adr;s:size); + begin { construct unsigned out of byte sequence } + readint(a,s) { identical to readint } + end; + + procedure readfloat(a:adr;s:size); + var i:size; b:byte; + begin { construct float out of string} + if (s<>4) and (s<>8) then noinit; i:=0; + repeat { eat the bytes, construct the value and intialize at a } + b:=readbyte; i:=i+1; + until b=0 ; + end; + +begin + halted:=false; + exitstatus:=undef; + uerrorproc:=0; intrap:=false; + + { initialize tables } + for i:=0 to maxcode do code[i]:=0; + for i:=0 to maxdata do data[i]:=0; + for iclass:=prim to tert do + for i:=0 to 255 do + with dispat[iclass][i] do + begin instr:=NON; iflag:=[zbit] end; + + { read instruction table file. see appendix B } + { The table read here is a simple transformation of the table on page xx } + { - instruction names were transformed to numbers } + { - the '-' flag was transformed to an 'i' flag for 'w' type instructions } + { - the 'S' flag was added for instructions having signed operands } + reset(tables); + insr:=NON; + repeat + read(tables,insno) ; cset:=[]; f:=[]; + insr:=ifind(insno); + if insr=NON then begin writeln('Incorrect table'); halt end; + repeat read(tables,c) until c<>' ' ; + repeat + cset:=cset+[c]; + read(tables,c) + until c=' ' ; + if 'm' in cset then f:=f+[mini]; + if 's' in cset then f:=f+[short]; + if '-' in cset then f:=f+[zbit]; + if 'i' in cset then f:=f+[ibit]; + if 'S' in cset then f:=f+[sbit]; + if 'w' in cset then f:=f+[wbit]; + if (mini in f) or (short in f) then read(tables,nops) else nops:=1 ; + readln(tables,opcode); + if ('4' in cset) or ('8' in cset) then + begin iclass:=tert end + else if 'e' in cset then + begin iclass:=second end + else iclass:=prim; + for i:=0 to nops-1 do + begin + with dispat[iclass,opcode+i] do + begin + iflag:=f; instr:=insr; + if '2' in cset then ilength:=2 + else if 'u' in cset then ilength:=2 + else if '4' in cset then ilength:=4 + else if '8' in cset then ilength:=8 + else if (mini in f) or (short in f) then + begin + if 'N' in cset then wtemp:=-1-i else wtemp:=i ; + if 'o' in cset then wtemp:=wtemp+1 ; + if short in f then wtemp:=wtemp*256 ; + implicit:=wtemp + end + end + end + until eof(tables); + + { read in program text, data and procedure descriptors } + reset(prog); + readhdr; { verify first header } + for i:=1 to 8 do header[i]:=readadr; { read second header } + hp:=maxdata+1; sp:=maxdata+1; lino(0); + { read program text } + if header[NTEXT]+header[NPROC]*pdsize>maxcode then + begin writeln('Text size too large'); halt end; + if header[SZDATA]>maxdata then + begin writeln('Data size too large'); halt end; + for i:=0 to header[NTEXT]-1 do code[i]:=readbyte; + { read data blocks } + nexta:=0; + for i:=1 to header[NDATA] do + begin + n:=readbyte; + if n<>0 then + begin + elem:=readbyte; firsta:=nexta; + case n of + 1: { uninitialized words } + for j:=1 to elem do + begin store(nexta,undef); nexta:=nexta+wsize end; + 2: { initialized bytes } + for j:=1 to elem do + begin storeb(nexta,readbyte); nexta:=nexta+1 end; + 3: { initialized words } + for j:=1 to elem do + begin store(nexta,readword); nexta:=nexta+wsize end; + 4,5: { instruction and data pointers } + for j:=1 to elem do + begin storea(nexta,readadr); nexta:=nexta+asize end; + 6: { signed integers } + begin readint(nexta,elem); nexta:=nexta+elem end; + 7: { unsigned integers } + begin readuns(nexta,elem); nexta:=nexta+elem end; + 8: { floating point numbers } + begin readfloat(nexta,elem); nexta:=nexta+elem end; + end + end + else + begin + repc:=readadr; + amount:=nexta-firsta; + for count:=1 to repc do + begin + for ofst:=0 to amount-1 do data[nexta+ofst]:=data[firsta+ofst]; + nexta:=nexta+amount; + end + end + end; + if header[SZDATA]<>nexta then writeln('Data initialization error'); + hp:=nexta; + { read descriptor table } + pd:=header[NTEXT]; + for i:=1 to header[NPROC]*pdsize do code[pd+i-1]:=readbyte; + { call the entry point routine } + ignmask:=0; { catch all traps, higher numbered traps cannot be ignored} + retsize:=0; + lb:=maxdata; { illegal dynamic link } + pc:=maxcode; { illegal return address } + push(0); a:=sp; { No environment } + push(0); b:=sp; { No args } + pusha(a); { envp } + pusha(b); { argv } + push(0); { argc } + call(argp(header[ENTRY])); +end; +{ +.bp +{---------------------------------------------------------------------------} +{ MAIN LOOP OF THE INTERPRETER } +{---------------------------------------------------------------------------} +{ It should be noted that the interpreter (microprogram) for an EM + machine can be written in two fundamentally different ways: (1) the + instruction operands are fetched in the main loop, or (2) the in- + struction operands are fetched after the 256 way branch, by the exe- + cution routines themselves. In this interpreter, method (1) is used + to simplify the description of execution routines. The dispatch + table dispat is used to determine how the operand is encoded. There + are 4 possibilities: + + 0. There is no operand + 1. The operand and instruction are together in 1 byte (mini) + 2. The operand is one byte long and follows the opcode byte(s) + 3. The operand is two bytes long and follows the opcode byte(s) + 4. The operand is four bytes long and follows the opcode byte(s) + + In this interpreter, the main loop determines the operand type, + fetches it, and leaves it in the global variable k for the execution + routines to use. Consequently, instructions such as LOL, which use + three different formats, need only be described once in the body of + the interpreter. + However, for a production interpreter, or a hardware EM + machine, it is probably better to use method (2), i.e. to let the + execution routines themselves fetch their own operands. The reason + for this is that each opcode uniquely determines the operand format, + so no table lookup in the dispatch table is needed. The whole table + is not needed. Method (2) therefore executes much faster. + However, separate execution routines will be needed for LOL with + a one byte offset, and LOL with a two byte offset. It is to avoid + this additional clutter that method (1) is used here. In a produc- + tion interpreter, it is envisioned that the main loop will fetch the + next instruction byte, and use it as an index into a 256 word table + to find the address of the interpreter routine to jump to. The + routine jumped to will begin by fetching its operand, if any, + without any table lookup, since it knows which format to expect. + After doing the work, it returns to the main loop by jumping in- + directly to a register that contains the address of the main loop. + A slight variation on this idea is to have the register contain + the address of the branch table, rather than the address of the main + loop. + Another issue is whether the execution routines for LOL 0, LOL + 2, LOL 4, etc. should all be have distinct execution routines. Doing + so provides for the maximum speed, since the operand is implicit in + the routine itself. The disadvantage is that many nearly identical + execution routines will then be needed. Another way of doing it is + to keep the instruction byte fetched from memory (LOL 0, LOL 2, LOL + 4, etc.) in some register, and have all the LOL mini format instruc- + tions branch to a common routine. This routine can then determine + the operand by subtracting the code for LOL 0 from the register, + leaving the true operand in the register (as a word quantity of + course). This method makes the interpreter smaller, but is a bit + slower. +.bp + To make this important point a little clearer, consider how a + production interpreter for the PDP-11 might appear. Let us assume the + following opcodes have been assigned: + + 31: LOL -2 (2 bytes, i.e. next word) + 32: LOL -4 + 33: LOL -6 + 34: LOL b (format with a one byte offset) + 35: LOL w (format with a one word, i.e. two byte offset) + + Further assume that each of the 5 opcodes will have its own execution + routine, i.e. we are making a tradeoff in favor of fast execution and + a slightly larger interpreter. + Register r5 is the em program counter. + Register r4 is the em LB register + Register r3 is the em SP register (the stack grows toward low core) + Register r2 contains the interpreter address of the main loop + + The main loop looks like this: + + movb (r5)+,r0 /fetch the opcode into r0 and increment r5 + asl r0 /shift r0 left 1 bit. Now: -256<=r0<=+254 + jmp *table(r0) /jump to execution routine + + Notice that no operand fetching has been done. The execution routines for + the 5 sample instructions given above might be as follows: + + lol2: mov -2(r4),-(sp) /push local -2 onto stack + jmp (r2) /go back to main loop + lol4: mov -4(r4),-(sp) /push local -4 onto stack + jmp (r2) /go back to main loop + lol6: mov -6(r4),-(sp) /push local -6 onto stack + jmp (r2) /go back to main loop + lolb: mov $177400,r0 /prepare to fetch the 1 byte operand + bisb (r5)+,r0 /operand is now in r0 + asl r0 /r0 is now offset from LB in bytes, not words + add r4,r0 /r0 is now address of the needed local + mov (r0),-(sp) /push the local onto the stack + jmp (r2) + lolw: clr r0 /prepare to fetch the 2 byte operand + bisb (r5)+,r0 /fetch high order byte first !!! + swab r0 /insert high order byte in place + bisb (r5)+,r0 /insert low order byte in place + asl r0 /convert offset to bytes, from words + add r4,r0 /r0 is now address of needed local + mov (r0),-(sp) /stack the local + jmp (r2) /done + + The important thing to notice is where and how the operand fetch occurred: + lol2, lol4, and lol6, (the minis) have implicit operands + lolb knew it had to fetch one byte, and did so without any table lookup + lolw knew it had to fetch a word, and did so, high order byte first } +{ +.bp +.sp 4 +{---------------------------------------------------------------------------} +{ Routines for the individual instructions } +{---------------------------------------------------------------------------} +procedure loadops; +var j:integer; +begin + case insr of + { LOAD GROUP } + LDC: pushd(argd(k)); + LOC: pushsw(argc(k)); + LOL: push(memw(locadr(k))); + LOE: push(memw(argg(k))); + LIL: push(memw(mema(locadr(k)))); + LOF: push(memw(popa+argf(k))); + LAL: pusha(locadr(k)); + LAE: pusha(argg(k)); + LXL: begin a:=lb; for j:=1 to argn(k) do a:=mema(a+savsize); pusha(a) end; + LXA: begin a:=lb; + for j:=1 to argn(k) do a:= mema(a+savsize); + pusha(a+savsize) + end; + LOI: pushx(argo(k),popa); + LOS: begin k:=argw(k); if k<>wsize then trap(EILLINS); + k:=pop; pushx(argo(k),popa) + end; + LDL: begin a:=locadr(k); push(memw(a+wsize)); push(memw(a)) end; + LDE: begin k:=argg(k); push(memw(k+wsize)); push(memw(k)) end; + LDF: begin k:=argf(k); + a:=popa; push(memw(a+k+wsize)); push(memw(a+k)) + end; + LPI: push(argp(k)) + end +end; + +procedure storeops; +begin + case insr of + { STORE GROUP } + STL: store(locadr(k),pop); + STE: store(argg(k),pop); + SIL: store(mema(locadr(k)),pop); + STF: begin a:=popa; store(a+argf(k),pop) end; + STI: popx(argo(k),popa); + STS: begin k:=argw(k); if k<>wsize then trap(EILLINS); + k:=popa; popx(argo(k),popa) + end; + SDL: begin a:=locadr(k); store(a,pop); store(a+wsize,pop) end; + SDE: begin k:=argg(k); store(k,pop); store(k+wsize,pop) end; + SDF: begin k:=argf(k); a:=popa; store(a+k,pop); store(a+k+wsize,pop) end + end +end; + +procedure intarith; +var i:integer; +begin + case insr of + { SIGNED INTEGER ARITHMETIC } + ADI: case szindex(argw(k)) of + 1: begin st:=popsw; ss:=popsw; push(fitsw(ss+st,EIOVFL)) end; + 2: begin dt:=popd; ds:=popd; pushd(doadi(ds,dt)) end; + end ; + SBI: case szindex(argw(k)) of + 1: begin st:=popsw; ss:= popsw; push(fitsw(ss-st,EIOVFL)) end; + 2: begin dt:=popd; ds:=popd; pushd(dosbi(ds,dt)) end; + end ; + MLI: case szindex(argw(k)) of + 1: begin st:=popsw; ss:= popsw; push(fitsw(ss*st,EIOVFL)) end; + 2: begin dt:=popd; ds:=popd; pushd(domli(ds,dt)) end; + end ; + DVI: case szindex(argw(k)) of + 1: begin st:= popsw; ss:= popsw; + if st=0 then trap(EIDIVZ) else pushsw(ss div st) + end; + 2: begin dt:=popd; ds:=popd; pushd(dodvi(ds,dt)) end; + end; + RMI: case szindex(argw(k)) of + 1: begin st:= popsw; ss:=popsw; + if st=0 then trap(EIDIVZ) else pushsw(ss - (ss div st)*st) + end; + 2: begin dt:=popd; ds:=popd; pushd(dormi(ds,dt)) end + end; + NGI: case szindex(argw(k)) of + 1: begin st:=popsw; pushsw(-st) end; + 2: begin ds:=popd; pushd(dongi(ds)) end + end; + SLI: begin t:=pop; + case szindex(argw(k)) of + 1: begin ss:=popsw; + for i:= 1 to t do sleft(ss); pushsw(ss) + end + end + end; + SRI: begin t:=pop; + case szindex(argw(k)) of + 1: begin ss:=popsw; + for i:= 1 to t do sright(ss); pushsw(ss) + end; + 2: begin ds:=popd; + for i:= 1 to t do sdright(ss); pushd(ss) + end + end + end + end +end; + +procedure unsarith; +var i:integer; +begin + case insr of + { UNSIGNED INTEGER ARITHMETIC } + ADU: case szindex(argw(k)) of + 1: begin t:=pop; s:= pop; push(chopw(s+t)) end; + 2: trap(EILLINS); + end ; + SBU: case szindex(argw(k)) of + 1: begin t:=pop; s:= pop; push(chopw(s-t)) end; + 2: trap(EILLINS); + end ; + MLU: case szindex(argw(k)) of + 1: begin t:=pop; s:= pop; push(chopw(s*t)) end; + 2: trap(EILLINS); + end ; + DVU: case szindex(argw(k)) of + 1: begin t:= pop; s:= pop; + if t=0 then trap(EIDIVZ) else push(s div t) + end; + 2: trap(EILLINS); + end; + RMU: case szindex(argw(k)) of + 1: begin t:= pop; s:=pop; + if t=0 then trap(EIDIVZ) else push(s - (s div t)*t) + end; + 2: trap(EILLINS); + end; + SLU: case szindex(argw(k)) of + 1: begin t:=pop; s:=pop; + for i:= 1 to t do suleft(s); push(s) + end; + 2: trap(EILLINS); + end; + SRU: case szindex(argw(k)) of + 1: begin t:=pop; s:=pop; + for i:= 1 to t do suright(s); push(s) + end; + 2: trap(EILLINS); + end + end +end; + +procedure fltarith; +begin + case insr of + { FLOATING POINT ARITHMETIC } + ADF: begin argwf(k); rt:=popr; rs:=popr; pushr(doadf(rs,rt)) end; + SBF: begin argwf(k); rt:=popr; rs:=popr; pushr(dosbf(rs,rt)) end; + MLF: begin argwf(k); rt:=popr; rs:=popr; pushr(domlf(rs,rt)) end; + DVF: begin argwf(k); rt:=popr; rs:=popr; pushr(dodvf(rs,rt)) end; + NGF: begin argwf(k); rt:=popr; pushr(dongf(rt)) end; + FIF: begin argwf(k); rt:=popr; rs:=popr; + dofif(rt,rs,x,y); pushr(y); pushr(x) + end; + FEF: begin argwf(k); rt:=popr; dofef(rt,x,ss); pushr(x); pushsw(ss) end + end +end; + +procedure ptrarith; +begin + case insr of + { POINTER ARITHMETIC } + ADP: pusha(popa+argf(k)); + ADS: case szindex(argw(k)) of + 1: begin st:=popsw; pusha(popa+st) end; + 2: begin dt:=popd; pusha(popa+dt) end; + end; + SBS: begin + a:=popa; b:=popa; + case szindex(argw(k)) of + 1: push(fitsw(b-a,EIOVFL)); + 2: pushd(b-a) + end + end + end +end; + +procedure incops; +var j:integer; +begin + case insr of + { INCREMENT/DECREMENT/ZERO } + INC: push(fitsw(popsw+1,EIOVFL)); + INL: begin a:=locadr(k); store(a,fitsw(signwd(memw(a))+1,EIOVFL)) end; + INE: begin a:=argg(k); store(a,fitsw(signwd(memw(a))+1,EIOVFL)) end; + DEC: push(fitsw(popsw-1,EIOVFL)); + DEL: begin a:=locadr(k); store(a,fitsw(signwd(memw(a))-1,EIOVFL)) end; + DEE: begin a:=argg(k); store(a,fitsw(signwd(memw(a))-1,EIOVFL)) end; + ZRL: store(locadr(k),0); + ZRE: store(argg(k),0); + ZER: for j:=1 to argw(k) div wsize do push(0); + ZRF: pushr(0); + end +end; + +procedure convops; +begin + case insr of + { CONVERT GROUP } + CII: begin s:=pop; t:=pop; + if tmaxsint then trap(ECONV); push(s) end; + 2: trap(EILLINS); + end; + 2: case szindex(argw(pop)) of + 1: pushd(pop); + 2: trap(EILLINS); + end; + end; + CUU: case szindex(argw(pop)) of + 1: if szindex(argw(pop))=2 then trap(EILLINS); + 2: trap(EILLINS); + end; + CUF: begin argwf(pop); + if szindex(argw(pop))=1 then pushr(pop) else trap(EILLINS) + end; + CFI: begin sz:=argw(pop); argwf(pop); rt:=popr; + case szindex(sz) of + 1: push(fitsw(trunc(rt),ECONV)); + 2: pushd(fitd(trunc(rt))); + end + end; + CFU: begin sz:=argw(pop); argwf(pop); rt:=popr; + case szindex(sz) of + 1: push( chopw(trunc(abs(rt)-0.5)) ); + 2: trap(EILLINS); + end + end; + CFF: begin argwf(pop); argwf(pop) end + end +end; + +procedure logops; +var i,j:integer; +begin + case insr of + { LOGICAL GROUP } + XAND: + begin k:=argw(k); + for j:= 1 to k div wsize do + begin a:=sp+k; t:=pop; store(a,bf(andf,memw(a),t)) end; + end; + IOR: + begin k:=argw(k); + for j:= 1 to k div wsize do + begin a:=sp+k; t:=pop; store(a,bf(iorf,memw(a),t)) end; + end; + XOR: + begin k:=argw(k); + for j:= 1 to k div wsize do + begin a:=sp+k; t:=pop; store(a,bf(xorf,memw(a),t)) end; + end; + COM: + begin k:=argw(k); + for j:= 1 to k div wsize do + begin + store(sp+k-wsize*j, bf(xorf,memw(sp+k-wsize*j), negoff-1)) + end + end; + ROL: begin k:=argw(k); if k<>wsize then trap(EILLINS); + t:=pop; s:=pop; for i:= 1 to t do rleft(s); push(s) + end; + ROR: begin k:=argw(k); if k<>wsize then trap(EILLINS); + t:=pop; s:=pop; for i:= 1 to t do rright(s); push(s) + end + end +end; + +procedure setops; +var i,j:integer; +begin + case insr of + { SET GROUP } + INN: + begin k:=argw(k); + t:=pop; + i:= t mod 8; t:= t div 8; + if t>=k then + begin trap(ESET); s:=0 end + else + begin s:=memb(sp+t) end; + newsp(sp+k); push(bit(i,s)); + end; + XSET: + begin k:=argw(k); + t:=pop; + i:= t mod 8; t:= t div 8; + for j:= 1 to k div wsize do push(0); + if t>=k then + trap(ESET) + else + begin s:=1; for j:= 1 to i do rleft(s); storeb(sp+t,s) end + end + end +end; + +procedure arrops; +begin + case insr of + { ARRAY GROUP } + LAR: + begin k:=argw(k); if k<>wsize then trap(EILLINS); a:=popa; + pushx(argo(memw(a+2*k)),arraycalc(a)) + end; + SAR: + begin k:=argw(k); if k<>wsize then trap(EILLINS); a:=popa; + popx(argo(memw(a+2*k)),arraycalc(a)) + end; + AAR: + begin k:=argw(k); if k<>wsize then trap(EILLINS); a:=popa; + push(arraycalc(a)) + end + end +end; + +procedure cmpops; +begin + case insr of + { COMPARE GROUP } + CMI: case szindex(argw(k)) of + 1: begin st:=popsw; ss:=popsw; + if ss memw(sp+k+j) then t:=1; + j:=j+wsize + end; + newsp(sp+wsize*k); push(t); + end; + + TLT: if popsw < 0 then push(1) else push(0); + TLE: if popsw <= 0 then push(1) else push(0); + TEQ: if pop = 0 then push(1) else push(0); + TNE: if pop <> 0 then push(1) else push(0); + TGE: if popsw >= 0 then push(1) else push(0); + TGT: if popsw > 0 then push(1) else push(0); + end +end; + +procedure branchops; +begin + case insr of + { BRANCH GROUP } + BRA: newpc(pc+k); + + BLT: begin st:=popsw; if popsw < st then newpc(pc+k) end; + BLE: begin st:=popsw; if popsw <= st then newpc(pc+k) end; + BEQ: begin t :=pop ; if pop = t then newpc(pc+k) end; + BNE: begin t :=pop ; if pop <> t then newpc(pc+k) end; + BGE: begin st:=popsw; if popsw >= st then newpc(pc+k) end; + BGT: begin st:=popsw; if popsw > st then newpc(pc+k) end; + + ZLT: if popsw < 0 then newpc(pc+k); + ZLE: if popsw <= 0 then newpc(pc+k); + ZEQ: if pop = 0 then newpc(pc+k); + ZNE: if pop <> 0 then newpc(pc+k); + ZGE: if popsw >= 0 then newpc(pc+k); + ZGT: if popsw > 0 then newpc(pc+k) + end +end; + +procedure callops; +var j:integer; +begin + case insr of + { PROCEDURE CALL GROUP } + CAL: call(argp(k)); + CAI: begin call(argp(popa)) end; + RET: begin k:=argz(k); if k div wsize>maxret then trap(EILLINS); + for j:= 1 to k div wsize do retarea[j]:=pop; retsize:=k; + newsp(lb); lb:=maxdata+1; { To circumvent stack overflow error } + newpc(popa); + if pc=maxcode then + begin + halted:=true; + if retsize=wsize then exitstatus:=retarea[1] + else exitstatus:=undef + end + else + newlb(popa); + end; + LFR: begin k:=args(k); if k<>retsize then trap(EILLINS); + for j:=k div wsize downto 1 do push(retarea[j]); + end + end +end; + +procedure miscops; +var i,j:integer; +begin + case insr of + { MISCELLANEOUS GROUP } + ASP,ASS: + begin if insr=ASS then + begin k:=argw(k); if k<>wsize then trap(EILLINS); k:=popsw end; + k:=argf(k); + if k<0 + then for j:= 1 to -k div wsize do push(undef) + else newsp(sp+k); + end; + BLM,BLS: + begin if insr=BLS then + begin k:=argw(k); if k<>wsize then trap(EILLINS); k:=pop end; + k:=argz(k); + b:=popa; a:=popa; + for j := 1 to k div wsize do + store(b-wsize+wsize*j,memw(a-wsize+wsize*j)) + end; + CSA: begin k:=argw(k); if k<>wsize then trap(EILLINS); + a:=popa; + st:= popsw - signwd(memw(a+asize)); + if (st>=0) and (st<=memw(a+wsize+asize)) then + b:=mema(a+2*wsize+asize+asize*st) else b:=mema(a); + if b=0 then trap(ECASE) else newpc(b) + end; + CSB: begin k:=argw(k); if k<>wsize then trap(EILLINS); a:=popa; + t:=pop; i:=1; found:=false; + while (i<=memw(a+asize)) and not found do + if t=memw(a+(asize+wsize)*i) then found:=true else i:=i+1; + if found then b:=memw(a+(asize+wsize)*i+wsize) else b:=memw(a); + if b=0 then trap(ECASE) else newpc(b); + end; + DCH: begin pusha(mema(popa+dynd)) end; + DUP,DUS: + begin if insr=DUS then + begin k:=argw(k); if k<>wsize then trap(EILLINS); k:=pop end; + k:=args(k); + for i:=1 to k div wsize do push(memw(sp+k-wsize)); + end; + EXG: begin + k:=argw(k); + for i:=1 to k div wsize do push(memw(sp+k-wsize)); + for i:=0 to k div wsize - 1 do + store(sp+k+i*wsize,memw(sp+k+k+i*wsize)); + for i:=1 to k div wsize do + begin t:=pop ; store(sp+k+k-wsize,t) end; + end; + FIL: filna(argg(k)); + GTO: begin k:=argg(k); + newlb(mema(k+2*asize)); newsp(mema(k+asize)); newpc(mema(k)) + end; + LIM: push(ignmask); + LIN: lino(argn(k)); + LNI: lino(memw(0)+1); + LOR: begin i:=argr(k); + case i of 0:pusha(lb); 1:pusha(sp); 2:pusha(hp) end; + end; + LPB: pusha(popa+statd); + MON: domon(pop); + NOP: writeln('NOP at line ',memw(0):5) ; + RCK: begin a:=popa; + case szindex(argw(k)) of + 1: if (signwd(memw(sp))signwd(memw(a+wsize))) then trap(ERANGE); + 2: if (memd(sp)memd(a+2*wsize)) then trap(ERANGE); + end + end; + RTT: dortt; + SIG: begin a:=popa; pusha(uerrorproc); uerrorproc:=a end; + SIM: ignmask:=pop; + STR: begin i:=argr(k); + case i of 0: newlb(popa); 1: newsp(popa); 2: newhp(popa) end; + end; + TRP: trap(pop) + end +end; +{ +.bp +{---------------------------------------------------------------------------} +{ Main Loop } +{---------------------------------------------------------------------------} + +begin initialize; +8888: + repeat + opcode := nextpc; { fetch the first byte of the instruction } + if opcode=escape1 then iclass:=second + else if opcode=escape2 then iclass:=tert + else iclass:=prim; + if iclass<>prim then opcode := nextpc; + with dispat[iclass][opcode] do + begin insr:=instr; + if not (zbit in iflag) then + if ibit in iflag then k:=pop else + begin + if mini in iflag then k:=implicit else + begin + if short in iflag then k:=implicit+nextpc else + begin k:=nextpc; + if (sbit in iflag) and (k>=128) then k:=k-256; + for i:=2 to ilength do k:=256*k + nextpc + end + end; + if wbit in iflag then k:=k*wsize; + end + end; +case insr of + + NON: trap(EILLINS); + + { LOAD GROUP } + LDC,LOC,LOL,LOE,LIL,LOF,LAL,LAE,LXL,LXA,LOI,LOS,LDL,LDE,LDF,LPI: + loadops; + + { STORE GROUP } + STL,STE,SIL,STF,STI,STS,SDL,SDE,SDF: + storeops; + + { SIGNED INTEGER ARITHMETIC } + ADI,SBI,MLI,DVI,RMI,NGI,SLI,SRI: + intarith; + + { UNSIGNED INTEGER ARITHMETIC } + ADU,SBU,MLU,DVU,RMU,SLU,SRU: + unsarith; + + { FLOATING POINT ARITHMETIC } + ADF,SBF,MLF,DVF,NGF,FIF,FEF: + fltarith; + + { POINTER ARITHMETIC } + ADP,ADS,SBS: + ptrarith; + + { INCREMENT/DECREMENT/ZERO } + INC,INL,INE,DEC,DEL,DEE,ZRL,ZRE,ZER,ZRF: + incops; + + { CONVERT GROUP } + CII,CIU,CIF,CUI,CUU,CUF,CFI,CFU,CFF: + convops; + + { LOGICAL GROUP } + XAND,IOR,XOR,COM,ROL,ROR: + logops; + + { SET GROUP } + INN,XSET: + setops; + + { ARRAY GROUP } + LAR,SAR,AAR: + arrops; + + { COMPARE GROUP } + CMI,CMU,CMP,CMF,CMS, TLT,TLE,TEQ,TNE,TGE,TGT: + cmpops; + + { BRANCH GROUP } + BRA, BLT,BLE,BEQ,BNE,BGE,BGT, ZLT,ZLE,ZEQ,ZNE,ZGE,ZGT: + branchops; + + { PROCEDURE CALL GROUP } + CAL,CAI,RET,LFR: + callops; + + { MISCELLANEOUS GROUP } + ASP,ASS,BLM,BLS,CSA,CSB,DCH,DUP,DUS,EXG,FIL,GTO,LIM, + LIN,LNI,LOR,LPB,MON,NOP,RCK,RTT,SIG,SIM,STR,TRP: + miscops; + + end; { end of case statement } + if not ( (insr=RET) or (insr=ASP) or (insr=BRA) or (insr=GTO) ) then + retsize:=0 ; + until halted; +9999: + writeln('halt with exit status: ',exitstatus:1); + doident; +end. diff --git a/doc/em/int/emdmp.c b/doc/em/int/emdmp.c new file mode 100644 index 0000000..61a38a2 --- /dev/null +++ b/doc/em/int/emdmp.c @@ -0,0 +1,200 @@ +/* $Id: emdmp.c,v 1.3 1994/06/24 10:03:31 ceriel Exp $ */ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + * + */ + +/* Author: E.G. Keizer */ + +/* Print a readable version of the data in the post mortem dump */ +/* dmpc [-s] [-dn,m] [file] */ + +#include +#include +#include + +int dflag = 0 ; +long l_low,l_high; + +int sflag; + +int wsize,asize; +long tsize,dsize; +long ignmask,uerrorproc,cause; +long pc,sp,lb,hp,pd,pb; + +char *cstr[] = { + "Array bound error", + "Range bound error", + "Set error", + "Integer overflow", + "Float overflow", + "Float underflow", + "Divide by 0", + "Divide by 0.0", + "Integer undefined", + "Float undefined", + "Conversion error", + "User error 11", + "User error 12", + "User error 13", + "User error 14", + "User error 15", + "Stack overflow", + "Heap overflow", + "Illegal instruction", + "Illegal size parameter", + "Case error", + "Memory fault", + "Illegal pointer", + "Illegal pc", + "Bad argument of LAE", + "Bad monitor call", + "Bad line number", + "GTO descriptor error" +}; + +FILE *fcore; +char *core = "core" ; +int nbyte=0; + +char *pname; + +int readbyte(); +int read2(); +long readaddr(); +long readword(); +unsigned getbyte(); +long getword(); +long getaddr(); + +main(argc,argv) char **argv; +{ + register i ; + long line,fileaddr; + char tok ; + + scanargs(argc,argv); fcore=fopen(core,"r") ; + if ( fcore==NULL ) fatal("Can't open %s",core) ; + + if ( read2()!=010255 ) fatal("not a post mortem dump"); + if ( read2()!=VERSION ) fatal("wrong version dump file"); + wsize=read2(); asize=read2(); + if ( wsize>4 ) fatal("cannot handle word size %d",wsize) ; + if ( asize>4 ) fatal("cannot handle pointer size %d",asize) ; + tsize=readaddr(); dsize=readaddr(); + ignmask=readaddr(); uerrorproc=readaddr(); cause=readaddr(); + pc=readaddr(); sp=readaddr(); lb=readaddr(); hp=readaddr(); + pd=readaddr(); pb=readaddr(); + if ( sflag==0 ) { + line=getword(0L); + fileaddr=getaddr(4L); + if ( fileaddr ) { + for ( i=0 ; i<40 ; i++ ) { + tok=getbyte(fileaddr++) ; + if ( !isprint(tok) ) break ; + putc(tok,stdout); + } + printf(" "); + } + if ( line ) { + printf("line %ld",line) ; + } + if ( fileaddr || line ) printf(", "); + fseek(fcore,512L,0); + + if ( cause>27 ) { + printn("cause",cause) ; + } else { + prints("cause",cstr[(int)cause]); + } + printn("pc",pc);printn("sp",sp);printn("lb",lb); + printn("hp",hp); + if ( pd ) printn("pd",pd) ; + if ( pb ) printn("pb",pb) ; + printn("errproc",uerrorproc) ; + printn("ignmask",ignmask) ; + if ( tsize ) printn("Text size",tsize) ; + if ( dsize ) printn("Data size",dsize) ; + } + if ( dflag==0 ) exit(0); + fatal("d-flag not implemeted (yet)"); + exit(1) ; +} + +scanargs(argc,argv) char **argv ; { + pname=argv[0]; + while ( argv++, argc-- > 1 ) { + switch( argv[0][0] ) { + case '-': switch( argv[0][1] ) { + case 's': sflag++ ; break ; + case 'l': dflag++ ; break ; + default : fatal(": [-s] [-ln.m] [file]") ; + } ; + break ; + default :core=argv[0] ; + } + } +} + +prints(s1,s2) char *s1,*s2; { + printf("%-15s %s\n",s1,s2); +} + +printn(s1,d) char *s1; long d; { + printf("%-15s %15ld\n",s1,d); +} + +/* VARARGS1 */ +fatal(s1,p1,p2,p3,p4,p5) char *s1 ; { + fprintf(stderr,"%s: ",pname); + fprintf(stderr,s1,p1,p2,p3,p4,p5) ; + fprintf(stderr,"\n") ; + exit(1) ; +} + +int getb() { + int i ; + i=getc(fcore) ; + if ( i==EOF ) fatal("Premature EOF"); + return i&0377 ; +} + +int read2() { + int i ; + i=getb() ; return getb()*256 + i ; +} + +long readaddr() { + long res ; + register int i ; + + res=0 ; + for (i=0 ; i +#include +#include +#include + +/* This program reads the human readable interpreter specification + and produces a efficient machine representation that can be + translated by a C-compiler. +*/ + +#define ESCAP 256 + +int nerror = 0 ; +int atend = 0 ; +int line = 1 ; +int maxinsl= 0 ; + +extern char em_mnem[][4] ; +char esca[] = "escape" ; +#define ename(no) ((no)==ESCAP?esca:em_mnem[(no)]) + +extern char em_flag[] ; + +main(argc,argv) char **argv ; { + if ( argc>1 ) { + if ( freopen(argv[1],"r",stdin)==NULL) { + fatal("Cannot open %s",argv[1]) ; + } + } + if ( argc>2 ) { + if ( freopen(argv[2],"w",stdout)==NULL) { + fatal("Cannot create %s",argv[2]) ; + } + } + if ( argc>3 ) { + fatal("%s [ file [ file ] ]",argv[0]) ; + } + atend=0 ; + readin(); + atend=1 ; + exit(nerror) ; +} + +readin() { + char *ident(); + char *firstid ; + int opcode,flags; + int c; + + while ( !feof(stdin) ) { + firstid=ident() ; + if ( *firstid=='\n' || feof(stdin) ) continue ; + opcode = getmnem(firstid) ; + printf("%d ",opcode+1) ; + flags = decflag(ident(),opcode) ; + switch(em_flag[opcode]&EM_PAR) { + case PAR_D: case PAR_F: case PAR_B: case PAR_L: case PAR_C: + putchar('S') ; + } + putchar(' '); + while ( (c=readchar())!='\n' && c!=EOF ) putchar(c) ; + putchar('\n') ; + } +} + +char *ident() { + /* skip spaces and tabs, anything up to space,tab or eof is + a identifier. + Anything from # to end-of-line is an end-of-line. + End-of-line is an identifier all by itself. + */ + + static char array[200] ; + register int c ; + register char *cc ; + + do { + c=readchar() ; + } while ( c==' ' || c=='\t' ) ; + for ( cc=array ; cc<&array[(sizeof array) - 1] ; cc++ ) { + if ( c=='#' ) { + do { + c=readchar(); + } while ( c!='\n' && c!=EOF ) ; + } + *cc = c ; + if ( c=='\n' && cc==array ) break ; + c=readchar() ; + if ( c=='\n' ) { + pushback(c) ; + break ; + } + if ( c==' ' || c=='\t' || c==EOF ) break ; + } + *++cc=0 ; + return array ; +} + +int getmnem(str) char *str ; { + char (*ptr)[4] ; + + for ( ptr = em_mnem ; *ptr<= &em_mnem[sp_lmnem][0] ; ptr++ ) { + if ( strcmp(*ptr,str)==0 ) return (ptr-em_mnem) ; + } + error("Illegal mnemonic") ; + return 0 ; +} + +error(str,a1,a2,a3,a4,a5,a6) /* VARARGS1 */ char *str ; { + if ( !atend ) fprintf(stderr,"line %d: ",line) ; + fprintf(stderr,str,a1,a2,a3,a4,a5,a6) ; + fprintf(stderr,"\n"); + nerror++ ; +} + +mess(str,a1,a2,a3,a4,a5,a6) /* VARARGS1 */ char *str ; { + if ( !atend ) fprintf(stderr,"line %d: ",line) ; + fprintf(stderr,str,a1,a2,a3,a4,a5,a6) ; + fprintf(stderr,"\n"); +} + +fatal(str,a1,a2,a3,a4,a5,a6) /* VARARGS1 */ char *str ; { + error(str,a1,a2,a3,a4,a5,a6) ; + exit(1) ; +} + +#define ILLGL -1 + +check(val) int val ; { + if ( val!=ILLGL ) error("Illegal flag combination") ; +} + +int decflag(str,opc) char *str ; { + int type ; + int escape ; + int range ; + int wordm ; + int notzero ; + char c; + + type=escape=range=wordm=notzero= ILLGL ; + while ( c= *str++ ) { + switch ( c ) { + case 'm' : + check(type) ; type=OPMINI ; break ; + case 's' : + check(type) ; type=OPSHORT ; break ; + case '-' : + check(type) ; type=OPNO ; + if ( (em_flag[opc]&EM_PAR)==PAR_W ) c='i' ; + break ; + case '1' : + check(type) ; type=OP8 ; break ; + case '2' : + check(type) ; type=OP16 ; break ; + case '4' : + check(type) ; type=OP32 ; break ; + case '8' : + check(type) ; type=OP64 ; break ; + case 'u' : + check(type) ; type=OP16U ; break ; + case 'e' : + check(escape) ; escape=0 ; break ; + case 'N' : + check(range) ; range= 2 ; break ; + case 'P' : + check(range) ; range= 1 ; break ; + case 'w' : + check(wordm) ; wordm=0 ; break ; + case 'o' : + check(notzero) ; notzero=0 ; break ; + default : + error("Unknown flag") ; + } + putchar(c); + } + if ( type==ILLGL ) error("Type must be specified") ; + switch ( type ) { + case OP64 : + case OP32 : + if ( escape!=ILLGL ) error("Conflicting escapes") ; + escape=ILLGL ; + case OP16 : + case OP16U : + case OP8 : + case OPSHORT : + case OPNO : + if ( notzero!=ILLGL ) mess("Improbable OPNZ") ; + if ( type==OPNO && range!=ILLGL ) { + mess("No operand in range") ; + } + } + if ( escape!=ILLGL ) type|=OPESC ; + if ( wordm!=ILLGL ) type|=OPWORD ; + switch ( range) { + case ILLGL : type|=OP_BOTH ; break ; + case 1 : type|=OP_POS ; break ; + case 2 : type|=OP_NEG ; break ; + } + if ( notzero!=ILLGL ) type|=OPNZ ; + return type ; +} + +static int pushchar ; +static int pushf ; + +int readchar() { + int c ; + + if ( pushf ) { + pushf=0 ; + c = pushchar ; + } else { + if ( feof(stdin) ) return EOF ; + c=getc(stdin) ; + } + if ( c=='\n' ) line++ ; + return c ; +} + +pushback(c) { + if ( pushf ) { + fatal("Double pushback") ; + } + pushf++ ; + pushchar=c ; + if ( c=='\n' ) line-- ; +} diff --git a/doc/em/int/proto.make b/doc/em/int/proto.make new file mode 100644 index 0000000..bf90fd2 --- /dev/null +++ b/doc/em/int/proto.make @@ -0,0 +1,33 @@ +# $Id: proto.make,v 1.3 2003/01/08 09:39:52 ceriel Exp $ + +#PARAMS do not remove this line! + +SRC_DIR = $(SRC_HOME)/doc/em/int + +install \ +all: em emdmp tables + +tables: mktables $(SRC_HOME)/etc/ip_spec.t + mktables $(SRC_HOME)/etc/ip_spec.t tables + +mktables: $(SRC_DIR)/mktables.c $(TARGET_HOME)/h/em_spec.h \ + $(TARGET_HOME)/h/em_flag.h \ + $(TARGET_HOME)/lib.bin/em_data.$(LIBSUF) $(TARGET_HOME)/h/ip_spec.h + $(CC) -I$(TARGET_HOME)/h -O -o mktables $(SRC_DIR)/mktables.c $(TARGET_HOME)/lib.bin/em_data.$(LIBSUF) + +em: $(SRC_DIR)/em.p + apc -O $(SRC_DIR)/em.p >emerrs ; mv a.out em + +nem.p: $(SRC_DIR)/em.p + sed -e '/maxadr = t16/s//maxadr =t15/' -e '/maxdata = 8191; /s//maxdata = 14335;/' -e '/ adr=.*long/s// adr= 0..maxadr/' <$(SRC_DIR)/em.p >nem.p + +nem: nem.p + apc -O nem.p >emerrs ; mv a.out nem + +emdmp: $(SRC_DIR)/emdmp.c + $(CC) -I$(TARGET_HOME)/h -I$(TARGET_HOME)/config -o emdmp -O $(SRC_DIR)/emdmp.c + +cmp: + +pr: + @pr $(SRC_DIR)/em.p $(SRC_DIR)/mktables.c $(SRC_DIR)/emdmp.c diff --git a/doc/em/intro.nr b/doc/em/intro.nr new file mode 100644 index 0000000..7f7c711 --- /dev/null +++ b/doc/em/intro.nr @@ -0,0 +1,173 @@ +.bp +.P1 "INTRODUCTION" +.PP +EM is a family of intermediate languages designed for producing +portable compilers. +The general strategy is for a program called \fBfront end\fP +to translate the source program to EM. +Another program, \fBback end\fP, +translates EM to target assembly language. +Alternatively, the EM code can be assembled to a binary form +and interpreted. +These considerations led to the following goals: +.IP 1 +The design should allow translation to, +or interpretation on, a wide range of existing machines. +Design decisions should be delayed as far as possible +and the implications of these decisions should +be localized as much as possible. +.br +The current microcomputer technology offers 8, 16 and 32 bit machines +with various sizes of address space. +EM should be flexible enough to be useful on most of these +machines. +The differences between the members of the EM family should only +concern the wordsize and address space size. +.IP 2 +The architecture should ease the task of code generation for +high level languages such as Pascal, C, Ada, Algol 68, BCPL. +.IP 3 +The instruction set used by the interpreter should be compact, +to reduce the amount of memory needed +for program storage, and to reduce the time needed to transmit +programs over communication lines. +.IP 3 +It should be designed with microprogrammed implementations in +mind; in particular, the use of many short fields within +instruction opcodes should be avoided, because their extraction by the +microprogram or conversion to other instruction formats is inefficient. +.PP +The basic architecture is based on the concept of a stack. The stack +is used for procedure return addresses, actual parameters, local variables, +and arithmetic operations. +There are several built-in object types, +for example, signed and unsigned integers, +floating point numbers, pointers and sets of bits. +There are instructions to push and pop objects +to and from the stack. +The push and pop instructions are not typed. +They only care about the size of the objects. +For each built-in type there are +reverse Polish type instructions that pop one or more +objects from the top of +the stack, perform an operation, and push the result back onto the +stack. +For all types except pointers, +these instructions have the object size +as argument. +.PP +There are no visible general registers used for arithmetic operands +etc. This is in contrast to most third generation computers, which usually +have 8 or 16 general registers. The decision not to have a group of +general registers was fully intentional, and follows W.L. Van der +Poel's dictum that a machine should have 0, 1, or an infinite +number of any feature. General registers have two primary uses: to hold +intermediate results of complicated expressions, e.g. +.DS +((a*b + c*d)/e + f*g/h) * i +.DE +and to hold local variables. +.PP +Various studies +have shown that the average expression has fewer than two operands, +making the former use of registers of doubtful value. The present trend +toward structured programs consisting of many small +procedures greatly reduces the value of registers to hold local variables +because the large number of procedure calls implies a large overhead in +saving and restoring the registers at every call. +.PP +Although there are no general purpose registers, there are a +few internal registers with specific functions as follows: +.TS +tab(:); +l 1 l l l. +PC:\-:Program Counter:Pointer to next instruction +LB:\-:Local Base:Points to base of the local variables +:::in the current procedure. +SP:\-:Stack Pointer:Points to the highest occupied word on the stack. +HP:\-:Heap Pointer:Points to the top of the heap area. +.TE +.PP +Furthermore, reverse Polish code is much easier to generate than +multi-register machine code, especially if highly efficient code is +desired. +When translating to assembly language the back end can make +good use of the target machine's registers. +An EM machine can +achieve high performance by keeping part of the stack +in high speed storage (a cache or microprogram scratchpad memory) rather +than in primary memory. +.PP +Again according to van der Poel's dictum, +all EM instructions have zero or one argument. +We believe that instructions needing two arguments +can be split into two simpler ones. +The simpler ones can probably be used in other +circumstances as well. +Moreover, these two instructions together often +have a shorter encoding than the single +instruction before. +.PP +This document describes EM at three different levels: +the abstract level, the assembly language level and +the machine language level. +.QQ +The most important level is that of the abstract EM architecture. +This level deals with the basic design issues. +Only the functional capabilities of instructions are relevant, not their +format or encoding. +Most chapters of this document refer to the abstract level +and it is explicitly stated whenever +another level is described. +.QQ +The assembly language is intended for the compiler writer. +It presents a more or less orthogonal instruction +set and provides symbolic names for data. +Moreover, it facilitates the linking of +separately compiled 'modules' into a single program +by providing several pseudoinstructions. +.QQ +The machine language is designed for interpretation with a compact +program text and easy decoding. +The binary representation of the machine language instruction set is +far from orthogonal. +Frequent instructions have a short opcode. +The encoding is fully byte oriented. +These bytes do not contain small bit fields, because +bit fields would slow down decoding considerably. +.PP +A common use for EM is for producing portable (cross) compilers. +When used this way, the compilers produce +EM assembly language as their output. +To run the compiled program on the target machine, +the back end, translates the EM assembly language to +the target machine's assembly language. +When this approach is used, the format of the EM +machine language instructions is irrelevant. +On the other hand, when writing an interpreter for EM machine language +programs, the interpreter must deal with the machine language +and not with the symbolic assembly language. +.PP +As mentioned above, the +current microcomputer technology offers 8, 16 and 32 bit +machines with address spaces ranging from +.Ex 2 16 +to +.Ex 2 32 +bytes. +Having one size of pointers and integers restricts +the usefulness of the language. +We decided to have a different language for each combination of +word and pointer size. +All languages offer the same instruction set and differ only in +memory alignment restrictions and the implicit size assumed in +several instructions. +The languages +differ slightly for the +different size combinations. +For example: the +size of any object on the stack and alignment restrictions. +The wordsize is restricted to powers of 2 and +the pointer size must be a multiple of the wordsize. +Almost all programs handling EM will be parametrized with word +and pointer size. diff --git a/doc/em/ip.awk b/doc/em/ip.awk new file mode 100644 index 0000000..6c36586 --- /dev/null +++ b/doc/em/ip.awk @@ -0,0 +1,11 @@ +BEGIN { printf(".TS\n"); + for (i = 0; i < 3; i++) + printf("lw(4) 0 lw(6) 0 rw(2) 0 rw(5) 8 "); + printf(".\n"); + } +NF == 4 { printf "%s\t%s\t%d\t%d",$1,$2,$3,$4 } +NF == 3 { printf "%s\t%s\t\t%d",$1,$2,$3 } + { if ( NR%3 == 0 ) printf("\n") ; else printf("\t"); } +END { if ( NR%3 != 0 ) printf("\n"); + printf(".TE\n"); + } diff --git a/doc/em/ispace.nr b/doc/em/ispace.nr new file mode 100644 index 0000000..db1c71f --- /dev/null +++ b/doc/em/ispace.nr @@ -0,0 +1,57 @@ +.bp +.P1 "INSTRUCTION ADDRESS SPACE" +The instruction space of the EM machine contains +the code for procedures. +Tables necessary for the execution of this code, for example, procedure +descriptor tables, may also be present. +The instruction space does not change during +the execution of a program, so that it may be +protected. +No further restrictions to the instruction address space are +necessary for the abstract and assembly language level. +.PP +Each procedure has a single entry point: the first instruction. +A special type of pointer identifies a procedure. +Pointers into the instruction +address space have the same size as pointers into data space and +can, for example, contain the address of the first instruction +or an index in a procedure descriptor table. +.QQ +There is a single EM program counter, PC, pointing +to the next instruction to be executed. +The procedure pointed to by PC is +called the 'current' procedure. +A procedure may call another procedure using the CAL or CAI +instruction. +The calling procedure remains 'active' and is resumed whenever the called +procedure returns. +Note that a procedure has several 'active' invocations when +called recursively. +.PP +Each procedure must return properly. +It is not allowed to fall through to the +code of the next procedure. +There are several ways to exit from a procedure: +.IP - +the RET instruction, which returns to the +calling procedure. +.IP - +the RTT instruction, which exits a trap handling routine and resumes +the trapping instruction (see next chapter). +.IP - +the GTO instruction, which is used for non-local goto's. +It can remove several frames from the stack and transfer +control to an active procedure. +(see also MES~11 in paragraph 11.1.4.4) +.PP +All branch instructions can transfer control +to any label within the same procedure. +Branch instructions can never jump out of a procedure. +.PP +Several language implementations use a so called procedure +instance identifier, a combination of a procedure identifier and +the LB of a stack frame, also called static link. +.PP +The program text for each procedure, as well as any tables, +are fragments and can be allocated anywhere +in the instruction address space. diff --git a/doc/em/mach.nr b/doc/em/mach.nr new file mode 100644 index 0000000..2f82202 --- /dev/null +++ b/doc/em/mach.nr @@ -0,0 +1,360 @@ +.bp +.P1 "EM MACHINE LANGUAGE" +.PP +The EM machine language is designed to make program text compact +and to make decoding easy. +Compact program text has many advantages: programs execute faster, +programs occupy less primary and secondary storage and loading +programs into satellite processors is faster. +The decoding of EM machine language is so simple, +that it is feasible to use interpreters as long as EM hardware +machines are not available. +This chapter is irrelevant when back ends are used to +produce executable target machine code. +.P2 "Instruction encoding" +.PP +A design goal of EM is to make the +program text as compact as possible. +Decoding must be easy, however. +The encoding is fully byte oriented, without any small bit fields. +There are 256 primary opcodes, two of which are an escape to +two groups of 256 secondary opcodes each. +.QQ +EM instructions without arguments have a single opcode assigned, +possibly escaped: +.ta 12n 24n +.Dr 6 + |--------------| + | opcode | + |--------------| +.De + or +.Dr 6 + |--------------|--------------| + | escape | opcode | + |--------------|--------------| +.De +The encoding for instructions with an argument is more complex. +Several instructions have an address from the global data area +as argument. +Other instructions have different opcodes for positive +and negative arguments. +.LP +There is always an opcode that takes the next two bytes as argument, +high byte first: +.Dr 6 + |--------------|--------------|--------------| + | opcode | hibyte | lobyte | + |--------------|--------------|--------------| +.De + or +.Dr 6 + |--------------|--------------|--------------|--------------| + | escape | opcode | hibyte | lobyte | + |--------------|--------------|--------------|--------------| +.De +An extra escape is provided for instructions with four or eight byte arguments. +.Dr 6 + |--------------|--------------|--------------| |--------------| + | ESCAPE | opcode | hibyte |...| lobyte | + |--------------|--------------|--------------| |--------------| +.De +For most instructions some argument values predominate. +The most frequent combinations of instruction and argument +will be encoded in a single byte, called a mini: +.Dr 6 + |---------------| + |opcode+argument| (mini) + |---------------| +.De +The number of minis is restricted, because only +254 primary opcodes are available. +Many instructions have the bulk of their arguments +fall in the range 0 to 255. +Instructions that address global data have their arguments +distributed over a wider range, +but small values of the high byte are common. +For all these cases there is another encoding +that combines the instruction and the high byte of the argument +into a single opcode. +These opcodes are called shorties. +Shorties may be escaped. +.Dr 6 + |--------------|--------------| + | opcode+high | lobyte | (shortie) + |--------------|--------------| +.De + or +.Dr 6 + |--------------|--------------|--------------| + | escape | opcode+high | lobyte | + |--------------|--------------|--------------| +.De +Escaped shorties are useless if the normal encoding has a primary opcode. +Note that for some instruction-argument combinations +several different encodings are available. +It is the task of the assembler to select the shortest of these. +The savings by these mini and shortie +opcodes are considerable, about 55%. +.PP +Further improvements are possible: +the arguments of +many instructions are a multiple of the wordsize. +Some do also not allow zero as an argument. +If these arguments are divided by the wordsize and, +when zero is not allowed, then decremented by 1, more of them can +be encoded as shortie or mini. +The arguments of some other instructions +rarely or never assume the value 0, but start at 1. +The value 1 is then encoded as 0, +2 as 1 and so on. +.PP +Assigning opcodes to instructions by the assembler is completely +table driven. +For details see appendix B. +.P2 "Procedure descriptors" +.PP +The procedure identifiers used in the interpreter are indices +into a table of procedure descriptors. +Each descriptor contains: +.IP 1. +the number of bytes to be reserved for locals at each +invocation. +.br +This is a pointer-sized integer. +.IP 2. +the start address of the procedure +.P2 "Load format" +.PP +The EM machine language load format defines the interface between +the EM assembler/loader and the EM machine itself. +A load file consists of a header, the program text to be executed, +a description of the global data area and the procedure descriptor table, +in this order. +All integers in the load file are presented with the +least significant byte first. +.PP +The header has two parts: the first half (eight 16-bit integers) +aids in selecting +the correct EM machine or interpreter. +Some EM machines, for instance, may have hardware floating point +instructions. +.N +The header entries are as follows (bit 0 is rightmost): +.IP 1: +magic number (07255) +.IP 2: +flag bits with the following meaning: +.RS +.IP "bit 0" +TEST; test for integer overflow etc. +.IP "bit 1" +PROFILE; for each source line: count the number of memory +cycles executed. +.IP "bit 2" +FLOW; for each source line: set a bit in a bit map table if +instructions on that line are executed. +.IP "bit 3" +COUNT; for each source line: increment a counter if that line +is entered. +.IP "bit 4" +REALS; set if a program uses floating point instructions. +.IP "bit 5" +EXTRA; more tests during compiler debugging. +.RE +.IP 3: +number of unresolved references. +.IP 4: +version number; used to detect obsolete EM load files. +.IP 5: +wordsize ; the number of bytes in each machine word. +.IP 6: +pointer size ; the number of bytes available for addressing. +.IP 7: +unused +.IP 8: +unused +.LP +The second part of the header (eight entries, of pointer size bytes each) +describes the load file itself: +.IP 1: +NTEXT; the program text size in bytes. +.IP 2: +NDATA; the number of load-file descriptors (see below). +.IP 3: +NPROC; the number of entries in the procedure descriptor table. +.IP 4: +ENTRY; procedure number of the procedure to start with. +.IP 5: +NLINE; the maximum source line number. +.IP 6: +SZDATA; the address of the lowest uninitialized data byte. +.IP 7: +unused +.IP 8: +unused +.PP +The program text consists of NTEXT bytes. +NTEXT is always a multiple of the wordsize. +The first byte of the program text is the +first byte of the instruction address +space, i.e. it has address 0. +Pointers into the program text are found in the procedure descriptor +table where relocation is simple and in the global data area. +The initialization of the global data area allows easy +relocation of pointers into both address spaces. +.PP +The global data area is described by the NDATA descriptors. +Each descriptor describes a number of consecutive words (of~wordsize) +and consists of a sequence of bytes. +While reading the descriptors from the load file, one can +initialize the global data area from low to high addresses. +The size of the initialized data area is given by SZDATA, +this number can be used to check the initialization. +.br +The header of each descriptor consists of a byte, describing the type, +and a count. +The number of bytes used for this (unsigned) count depends on the +type of the descriptor and +is either a pointer-sized integer +or one byte. +The meaning of the count depends on the descriptor type. +At load time an interpreter can +perform any conversion deemed necessary, such as +reordering bytes in integers +and pointers and adding base addresses to pointers. +.QQ +In the following pictures we show a graphical notation of the +initializers. +The leftmost rectangle represents the leading byte. +.LP +Fields marked with +.TS +tab(:); +l l. +n:contain a pointer-sized integer used as a count +m:contain a one-byte integer used as a count +b:contain a one-byte integer +w:contain a wordsized integer +p:contain a data or instruction pointer +s:contain a null terminated ASCII string +.TE +.Dr 6 + ------------------- + | 0 | n | repeat last initialization n times + ------------------- +.De +.Dr 4 + --------- + | 1 | m | m uninitialized words + --------- +.De +.Dr 6 + ____________ + / bytes \e + ----------------- ----- + | 2 | m | b | b |...| b | m initialized bytes + ----------------- ----- +.De +.Dr 6 + _________ + / word \e + ----------------------- + | 3 | m | w |... m initialized wordsized integers + ----------------------- +.De +.Dr 6 + _________ + / pointer \e + ----------------------- + | 4 | m | p |... m initialized data pointers + ----------------------- +.De +.Dr 6 + _________ + / pointer \e + ----------------------- + | 5 | m | p |... m initialized instruction pointers + ----------------------- +.De +.Dr 6 + ____________ + / bytes \e + ------------------------- + | 6 | m | b | b |...| b | initialized integer of size m + ------------------------- +.De +.Dr 6 + ____________ + / bytes \e + ------------------------- + | 7 | m | b | b |...| b | initialized unsigned of size m + ------------------------- +.De +.Dr 6 + ____________ + / string \e + ------------------------- + | 8 | m | s | initialized float of size m + ------------------------- +.De +.IP type~0: 10 +If the last initialization initialized k bytes starting +at address \fIa\fP, do the same initialization again n times, +starting at \fIa\fP+k, \fIa\fP+2*k, .... \fIa\fP+n*k. +This is the only descriptor whose starting byte +is followed by an integer with the +size of a +pointer, +in all other descriptors the first byte is followed by a one-byte count. +This descriptor must be preceded by a descriptor of +another type. +.IP type~1: 10 +Reserve m words, not explicitly initialized (BSS and HOL). +.IP type~2: 10 +The m bytes following the descriptor header are +initializers for the next m bytes of the +global data area. +m is divisible by the wordsize. +.IP type~3: 10 +The m words following the header are initializers for the next m words of the +global data area. +.IP type~4: 10 +The m data address space pointers following the header are +initializers for the next +m data pointers in the global data area. +Interpreters that represent EM pointers by +target machine addresses must relocate all data pointers. +.IP type~5: 10 +The m instruction address space pointers following the header are +initializers for the next +m instruction pointers in the global data area. +Interpreters that represent EM instruction pointers by +target machine addresses must relocate these pointers. +.IP type~6: 10 +The m bytes following the header form +a signed integer number with a size of m bytes, +which is an initializer for the next m bytes +of the global data area. +m is governed by the same restrictions as for +transfer of objects to/from memory. +.IP type~7: 10 +The m bytes following the header form +an unsigned integer number with a size of m bytes, +which is an initializer for the next m bytes +of the global data area. +m is governed by the same restrictions as for +transfer of objects to/from memory. +.IP type~8: 10 +The header is followed by an ASCII string, null terminated, to +initialize, in global data, +a floating point number with a size of m bytes. +m is governed by the same restrictions as for +transfer of objects to/from memory. +The ASCII string contains the notation of a real as used in the +Pascal language. +.PP +The NPROC procedure descriptors on the load file consist of +an instruction space address (of~pointer~size) and +an integer (of~pointer~size) specifying the number of bytes for +locals. diff --git a/doc/em/macr.nr b/doc/em/macr.nr new file mode 100644 index 0000000..f13e8e1 --- /dev/null +++ b/doc/em/macr.nr @@ -0,0 +1,113 @@ +.LP +.if n \{\ +.nr LL 78 +.ll 78 \} +.tr ~ +.\" below are three simple macros to get the drawings right +.\" added by Dick Grune +.de Dr \" Drawing $1 (size) +.sp 1 +.ne \\$1 +.na +.ft CW \" constant spacing +.lg 0 \" no ligatures +.. +.de Df \" Drawing Footer +.br +.sp 1 +.ft R +.ce 1000 +.lg 1 +.. +.de De \" Drawing End $1 (lines) +.br +.ft R +.lg 1 +.ce 0 +.ad +.sp \\$1 +.. +.\" macro for exponents, added by Ceriel Jacobs +.de Ex \" Exponent $1 $2 [$3] +\\$1\v'-0.5m'\s-2\\$2\s+2\v'0.5m'\\$3 +.. +.\" QQ is like PP, but without space +. \" use .PP, with PD 0. +.de QQ +.nr xx \\n(PD +.nr PD 0 +.PP +.nr PD \\n(xx +.. +.nr N1 0 +.nr N2 0 +.nr N3 0 +.nr N4 0 +.nr N5 0 +.nr A5 0 +.af A5 A +.de P1 +.nr N2 0 +.nr N1 \\n(N1+1 +.ds Tl "\\n(N1. \\$1 +.Ca 0 +.sp +.LP +\\fB\\n(N1. \\$1\\fP +.sp +.. +.de P2 +.nr N3 0 +.nr N2 \\n(N2+1 +.ds Tl "\\n(N1.\\n(N2 \\$1 +.ne 5 +.Ca 2 +.sp +.LP +\\fB\\n(N1.\\n(N2 \\$1\fP +.. +.de P3 +.nr N4 0 +.nr N3 \\n(N3+1 +.ds Tl "\\n(N1.\\n(N2.\\n(N3 \\$1 +.Ca 4 +.LP +\\fI\\n(N1.\\n(N2.\\n(N3 \\$1\fP +.. +.de P4 +.nr N4 \\n(N4+1 +.ds Tl "\\n(N1.\\n(N2.\\n(N3.\\n(N4 \\$1 +.ne 5 +.Ca 6 +.LP +\\fI\\n(N1.\\n(N2.\\n(N3.\\n(N4 \\$1\fP +.. +.de AP +.nr N5 \\n(N5+1 +.nr A5 \\n(N5 +.ds Tl "\\n(A5. \\$1 +.ne 5 +.Ca 0 +.LP +\\fB\\n(A5. \\$1\\fP +.sp +.. +.de Ca +.da Cc +.if \\$1=0 \!.sp \\\\n(PDu +\!\l\&\\$1n\ \&\\*(Tl \l\&|\\\\n(LLu-\w\&\ \\n(PN\&u.\&\ \\n(PN +\!.br +.da +.. +.de Ct +.Cc +.rm Cc +.. +.de PT +.lt \\n(LLu +.pc % +.nr PN \\n%-1 +.if \\n(PN%2=1 .tl '''\\n(PN' +.if (\\n(PN%2=0)&(\\n(PN) .tl '\\n(PN''' +.lt \\n(.lu +.. diff --git a/doc/em/mapping.nr b/doc/em/mapping.nr new file mode 100644 index 0000000..22a75c8 --- /dev/null +++ b/doc/em/mapping.nr @@ -0,0 +1,232 @@ +.bp +.P1 "MAPPING OF EM DATA MEMORY ONTO TARGET MACHINE MEMORY" +.PP +The EM architecture is designed to be implemented +on many existing and future machines. +EM memory is highly fragmented to make +adaptation to various memory architectures possible. +Format and encoding of pointers is explicitly undefined. +.PP +This chapter gives solutions to some of the +anticipated problems. +First, we describe a possible memory layout for machines +with 64K bytes of address space. +Here we use a member of the EM family with 2-byte word and pointer +size. +The most straightforward layout is shown in figure 2. +.Dr 40 + 65534 \-> |-------------------------------| + |///////////////////////////////| + |//// unimplemented memory /////| + |///////////////////////////////| + ML \-> |-------------------------------| + | | + | | <\- LB + | stack and local area | + | | + |-------------------------------| <\- SP + |///////////////////////////////| + |//////// inaccessible /////////| + |///////////////////////////////| + |-------------------------------| <\- HP + | | + | heap area | + | | + | | + HB \-> |-------------------------------| + | | + | global data area | + | | + EB \-> |-------------------------------| + | | + | program text | <\- PC + | | + | ( and tables ) | + | | + | | + PB \-> |-------------------------------| + |///////////////////////////////| + |////////// undefined //////////| + |///////////////////////////////| + 0 \-> |-------------------------------| +.Df +Figure 2. Memory layout showing typical register +positions during execution of an EM program. +.De +.sp 1 +The base registers for the various memory pieces can be stored +in target machine registers or memory. +.TS +tab(;); +l 1 l l l. +PB;:;program base;points to the base of the instruction address space. +EB;:;external base;points to the base of the data address space. +HB;:;heap base;points to the base of the heap area. +ML;:;memory limit;marks the high end of the addressable data space. +.TE +.LP +The stack grows from high +EM addresses to low EM addresses, and the heap the +other way. +The memory between SP and HP is not accessible, +but may be allocated later to the stack or the heap if needed. +The local data area is allocated starting at the high end of +memory. +.PP +Because EM address 0 is not mapped onto target +address 0, a problem arises when pointers are used. +If a program pushed a constant, say 6, onto the stack, +and then tried to indirect through it, +the wrong word would be fetched, +because EM address 6 is mapped onto target address EB+6 +and not target address 6 itself. +This particular problem is solved by explicitly declaring +the format of a pointer to be undefined, +so that using a constant as a pointer is completely illegal. +However, the general problem of mapping pointers still exists. +.PP +There are two possible solutions. +In the first solution, EM pointers are represented +in the target machine as true EM addresses, +for example, a pointer to EM address 6 really is +stored as a 6 in the target machine. +This solution implies that every time a pointer is fetched +EB must be added before referencing +the target machine's memory. +If the target machine has powerful indexing +facilities, EB can be kept in a target machine register, +and the relocation can indeed be done on +every reference to the data address space +at a modest cost in speed. +.PP +The other solution consists of having EM pointers +refer to the true target machine address. +Thus the instruction LAE 6 (Load Address of External 6) +would push the value of EB+6 onto the stack. +When this approach is chosen, back ends must know +how to offset from EB, to translate all +instructions that manipulate EM addresses. +However, the problem is not completely solved, +because a front end may have to initialize a pointer +in CON or ROM data to point to a global address. +This pointer must also be relocated by the back end or the interpreter. +.PP +Although the EM stack grows from high to low EM addresses, +some machines have hardware PUSH and POP +instructions that require the stack to grow upwards. +If reasons of efficiency demand the use of these +instructions, then EM +can be implemented with the memory layout +upside down, as shown in figure 3. +This is possible because the pointer format is explicitly undefined. +The first element of a word array will have a +lower physical address than the second element. +.Dr 18 + | | | | + | EB=60 | | ^ | + | | | | | + |-----------------| |-----------------| + 105 | 45 | 44 | 104 214 | 41 | 40 | 215 + |-----------------| |-----------------| + 103 | 43 | 42 | 102 212 | 43 | 42 | 213 + |-----------------| |-----------------| + 101 | 41 | 40 | 100 210 | 45 | 44 | 211 + |-----------------| |-----------------| + | | | | | + | v | | EB=255 | + | | | | + + Type A Type B +.Df +Figure 3. Two possible memory implementations. +Numbers within the boxes are EM addresses. +The other numbers are physical addresses. +.De +.LP +So, we have two different EM memory implementations: +.IP "A~\-" +stack downwards +.IP "B~\-" +stack upwards +.PP +For each of these two possibilities we give the translation of +the EM instructions to push the third byte of a global data +block starting at EM address 40 onto the stack and to load the +word at address 40. +All translations assume a word and pointer size of two bytes. +The target machine used is a PDP-11 augmented with push and pop instructions. +Registers 'r0' and 'r1' are used and suffer from sign extension for byte +transfers. +Push $40 means push the constant 40, not word 40. +.PP +The translation of the EM instructions depends on the pointer representation +used. +For each of the two solutions explained above the translation is given. +.PP +First, the translation for the two implementations using EM addresses as +pointer representation: +.KS +.TS +tab(:), center; +l s l s l s +l 2 l 6 l 2 l 6 l 2 l. +EM:type A:type B +_ +LAE:40:push:$40:push:$40 + +ADP:3:pop:r0:pop:r0 +::add:$3,r0:add:$3,r0 +::push:r0:push:r0 + +LOI:1:pop:r0:pop:r0 +::\-::neg:r0 +::clr:r1:clr:r1 +::bisb:eb(r0),r1:bisb:eb(r0),r1 +::push:r1:push:r1 + +LOE:40:push:eb+40:push:eb-41 +.TE +.KE +.PP +The translation for the two implementations, if the target machine address is +used as pointer representation, is: +.KS +.TS +tab(:), center; +l s l s l s +l 2 l 6 l 2 l 6 l 2 l. +EM:type A:type B +_ +LAE:40:push:$eb+40:push:$eb-40 + +ADP:3:pop:r0:pop:r0 +::add:$3,r0:sub:$3,r0 +::push:r0:push:r0 + +LOI:1:pop:r0:pop:r0 +::clr:r1:clr:r1 +::bisb:(r0),r1:bisb:(r0),r1 +::push:r1:push:r1 + +LOE:40:push:eb+40:push:eb-41 +.TE +.KE +.PP +The translation presented above is not intended to be optimal. +Most machines can handle these simple cases in one or two instructions. +It demonstrates, however, the flexibility of the EM design. +.PP +There are several possibilities to implement EM on machines with +address spaces larger than 64k bytes. +For EM with two byte pointers one could allocate instruction and +data space each in a separate 64k piece of memory. +EM pointers still have to fit in two bytes, +but the base registers PB and EB may be loaded in hardware registers +wider than 16 bits, if available. +EM implementations can also make efficient use of a machine +with separate instruction and data space. +.PP +EM with 32 bit pointers allows one to make use of machines +with large address spaces. +In a virtual, segmented memory system one could use a separate +segment for each fragment. diff --git a/doc/em/mem.nr b/doc/em/mem.nr new file mode 100644 index 0000000..ee1364f --- /dev/null +++ b/doc/em/mem.nr @@ -0,0 +1,80 @@ +.bp +.P1 MEMORY +.PP +The EM machine has two distinct address spaces, +one for instructions and one for data. +The data space is divided up into 8-bit bytes. +The smallest addressable unit is a byte. +Bytes are numbered consecutively from 0 to some maximum. +All sizes in EM are expressed in bytes. +.PP +Some EM instructions can transfer objects containing several bytes +to and/or from memory. +The size of all objects larger than a word must be a multiple of +the wordsize. +The size of all objects smaller than a word must be a divisor +of the wordsize. +For example: if the wordsize is 2 bytes, objects of the sizes 1, +2, 4, 6,... are allowed. +The address of such an object is the lowest address of all bytes it contains. +For objects smaller than the wordsize, the +address must be a multiple of the object size. +For all other objects the address must be a multiple of the +wordsize. +For example, if an instruction transfers a 4-byte object to memory at +location \fIm\fP and the wordsize is 2, +\fIm\fP must be a multiple of 2 and the bytes at +locations \fIm\fP, \fIm\fP\|+\|1,\fIm\fP\|+\|2 and +\fIm\fP\|+\|3 are overwritten. +.PP +The size of almost all objects in EM +is an integral number of words. +Only two operations are allowed on +objects whose size is a divisor of the wordsize: +push it onto the stack and pop it from the stack. +The addressing of these objects in memory is always indirect. +If such a small object is pushed onto the stack +it is assumed to be a small integer and stored +in the least significant part of a word. +The rest of the word is cleared to zero, +although +EM provides a way to sign-extend a small integer. +Popping a small object from the stack removes a word +from the stack, stores the least significant byte(s) +of this word in memory and discards the rest of the word. +.PP +The format of pointers into both address spaces is explicitly undefined. +The size of a pointer, however, is fixed for a member of EM, so that +the compiler writer knows how much storage to allocate for a pointer. +.PP +A minor problem is raised by the undefined pointer format. +Some languages, notably Pascal, require a special, +otherwise illegal, pointer value to represent the nil pointer. +The current Pascal-VU compiler uses the +integer value 0 as nil pointer. +This value is also used by many C programs as a normally impossible address. +A better solution would be to have a special +instruction loading an illegal pointer value, +but it is hard to imagine an implementation +for which the current solution is inadequate, +especially because the first word in the EM data space +is special and probably not the target of any pointer. +.PP +The next two chapters describe the EM memory +in more detail. +One describes the instruction address space, +the other the data address space. +.PP +A design goal of EM has been to allow +its implementation on a wide range of existing machines, +as well as allowing a new one to be built in hardware. +To this extent we have tried to minimize the demands +of EM on the memory structure of the target machine. +Therefore, apart from the logical partitioning, +EM memory is divided into 'fragments'. +A fragment consists of consecutive machine +words and has a base address and a size. +Pointer arithmetic is only defined within a fragment. +The only exception to this rule is comparison with the null +pointer. +All fragments must be word aligned. diff --git a/doc/em/mkdispatch.c b/doc/em/mkdispatch.c new file mode 100644 index 0000000..10a62f2 --- /dev/null +++ b/doc/em/mkdispatch.c @@ -0,0 +1,492 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + * + */ + +#include "ip_spec.h" +#include +#include "em_spec.h" +#include "em_flag.h" + +#ifndef NORCSID +static char rcs_id[] = "$Id: mkdispatch.c,v 1.6 1994/06/24 10:03:13 ceriel Exp $" ; +#endif + +/* This program reads the human readable interpreter specification + and produces a efficient machine representation that can be + translated by a C-compiler. +*/ + +#define NOTAB 600 /* The max no of interpreter specs */ +#define ESCAP1 256 +#define ESCAP2 257 + +struct opform intable[NOTAB] ; +struct opform *lastform = intable-1 ; + +int nerror = 0 ; +int atend = 0 ; +int line = 1 ; + +extern char em_mnem[][4] ; +char esca1[] = "escape1" ; +char esca2[] = "escape2" ; +#define ename(no) ((no)==ESCAP1?esca1:(no)==ESCAP2?esca2:em_mnem[(no)]) + +extern char em_flag[] ; + +main(argc,argv) char **argv ; { + if ( argc>1 ) { + if ( freopen(argv[1],"r",stdin)==NULL) { + fatal("Cannot open %s",argv[1]) ; + } + } + if ( argc>2 ) { + if ( freopen(argv[2],"w",stdout)==NULL) { + fatal("Cannot create %s",argv[2]) ; + } + } + if ( argc>3 ) { + fatal("%s [ file [ file ] ]",argv[0]) ; + } + atend=0 ; + readin(); + atend=1 ; + checkall(); + if ( nerror==0 ) { + writeout(); + } + exit(nerror) ; +} + +readin() { + register struct opform *nextform ; + char *ident(); + char *firstid ; + + for ( nextform=intable ; + !feof(stdin) && nextform<&intable[NOTAB] ; ) { + firstid=ident() ; + if ( *firstid=='\n' || feof(stdin) ) continue ; + lastform=nextform ; + nextform->i_opcode = getmnem(firstid) ; + nextform->i_flag = decflag(ident()) ; + switch ( nextform->i_flag&OPTYPE ) { + case OPMINI: + case OPSHORT: + nextform->i_num = atoi(ident()) ; + break ; + } + nextform->i_low = atoi(ident()) ; + if ( *ident()!='\n' ) { + int c ; + error("End of line expected"); + while ( (c=readchar())!='\n' && c!=EOF ) ; + } + nextform++ ; + } + if ( !feof(stdin) ) fatal("Internal table too small") ; +} + +char *ident() { + /* skip spaces and tabs, anything up to space,tab or eof is + a identifier. + Anything from # to end-of-line is an end-of-line. + End-of-line is an identifier all by itself. + */ + + static char array[200] ; + register int c ; + register char *cc ; + + do { + c=readchar() ; + } while ( c==' ' || c=='\t' ) ; + for ( cc=array ; cc<&array[(sizeof array) - 1] ; cc++ ) { + if ( c=='#' ) { + do { + c=readchar(); + } while ( c!='\n' && c!=EOF ) ; + } + *cc = c ; + if ( c=='\n' && cc==array ) break ; + c=readchar() ; + if ( c=='\n' ) { + pushback(c) ; + break ; + } + if ( c==' ' || c=='\t' || c==EOF ) break ; + } + *++cc=0 ; + return array ; +} + +int getmnem(str) char *str ; { + char (*ptr)[4] ; + + for ( ptr = em_mnem ; *ptr<= &em_mnem[sp_lmnem-sp_fmnem][0] ; ptr++ ) { + if ( strcmp(*ptr,str)==0 ) return (ptr-em_mnem) ; + } + error("Illegal mnemonic") ; + return 0 ; +} + +error(str,a1,a2,a3,a4,a5,a6) /* VARARGS1 */ char *str ; { + if ( !atend ) fprintf(stderr,"line %d: ",line) ; + fprintf(stderr,str,a1,a2,a3,a4,a5,a6) ; + fprintf(stderr,"\n"); + nerror++ ; +} + +mess(str,a1,a2,a3,a4,a5,a6) /* VARARGS1 */ char *str ; { + if ( !atend ) fprintf(stderr,"line %d: ",line) ; + fprintf(stderr,str,a1,a2,a3,a4,a5,a6) ; + fprintf(stderr,"\n"); +} + +fatal(str,a1,a2,a3,a4,a5,a6) /* VARARGS1 */ char *str ; { + error(str,a1,a2,a3,a4,a5,a6) ; + exit(1) ; +} + +#define ILLGL -1 + +check(val) int val ; { + if ( val!=ILLGL ) error("Illegal flag combination") ; +} + +int decflag(str) char *str ; { + int type ; + int escape ; + int range ; + int wordm ; + int notzero ; + + type=escape=range=wordm=notzero= ILLGL ; + while ( *str ) switch ( *str++ ) { + case 'm' : + check(type) ; type=OPMINI ; break ; + case 's' : + check(type) ; type=OPSHORT ; break ; + case '-' : + check(type) ; type=OPNO ; break ; + case '1' : + check(type) ; type=OP8 ; break ; + case '2' : + check(type) ; type=OP16 ; break ; + case '4' : + check(type) ; type=OP32 ; break ; + case '8' : + check(type) ; type=OP64 ; break ; + case 'u' : + check(type) ; type=OP16U ; break ; + case 'e' : + check(escape) ; escape=0 ; break ; + case 'N' : + check(range) ; range= 2 ; break ; + case 'P' : + check(range) ; range= 1 ; break ; + case 'w' : + check(wordm) ; wordm=0 ; break ; + case 'o' : + check(notzero) ; notzero=0 ; break ; + default : + error("Unknown flag") ; + } + if ( type==ILLGL ) error("Type must be specified") ; + switch ( type ) { + case OP64 : + case OP32 : + if ( escape!=ILLGL ) error("Conflicting escapes") ; + escape=ILLGL ; + case OP16 : + case OP16U : + case OP8 : + case OPSHORT : + case OPNO : + if ( notzero!=ILLGL ) mess("Improbable OPNZ") ; + if ( type==OPNO && range!=ILLGL ) { + mess("No operand in range") ; + } + } + if ( escape!=ILLGL ) type|=OPESC ; + if ( wordm!=ILLGL ) type|=OPWORD ; + switch ( range) { + case ILLGL : type|=OP_BOTH ; + if ( type==OPMINI || type==OPSHORT ) + error("Minies and shorties must have P or N") ; + break ; + case 1 : type|=OP_POS ; break ; + case 2 : type|=OP_NEG ; break ; + } + if ( notzero!=ILLGL ) type|=OPNZ ; + return type ; +} + +/* ----------- checking --------------*/ + +int ecodes[256],codes[256],lcodes[256] ; +char eflags[256], flags[256], lflags[256] ; +int elows[256], lows[256], llows[256]; + +#define NMNEM (sp_lmnem-sp_fmnem+1) +#define MUST 1 +#define MAY 2 +#define FORB 3 + +char negc[NMNEM], zc[NMNEM], posc[NMNEM], lnegc[NMNEM], lposc[NMNEM] ; + +checkall() { + register i,flag ; + register struct opform *next ; + int opc,low ; + + for ( i=0 ; ii_flag&0377 ; + opc = next->i_opcode&0377 ; + low = next->i_low&0377 ; + chkc(flag,low,opc,low) ; + switch(flag&OPTYPE) { + case OPNO : zc[opc]++ ; break ; + case OPMINI : + case OPSHORT : + for ( i=1 ; i<((next->i_num)&0377) ; i++ ) { + chkc(flag,low+i,opc,low) ; + } + if ( !(em_flag[opc]&PAR_G) && + (flag&OPRANGE)==OP_BOTH) { + mess("Mini's and shorties should have P or N"); + } + break ; + case OP8 : + error("OP8 is removed") ; + break ; + case OP16 : + if ( flag&OP_NEG ) + negc[opc]++ ; + else if ( flag&OP_POS ) + posc[opc]++ ; + break ; + case OP32 : + if ( flag&OP_NEG ) + lnegc[opc]++ ; + else if ( flag&OP_POS ) + lposc[opc]++ ; + break ; + case OP16U : + break ; + default : + error("Illegal type") ; + break ; + } + } + atend=1 ; + for ( i=0 ; i<256 ; i++ ) if ( codes[i]== -1 ) { + mess("interpreter opcode %d not used",i) ; + } + for ( opc=0 ; opc1 ) mess("More then one OPNO for %s",ename(emc)) ; + if ( posc[emc]>1 ) mess("More then one OP16(pos) for %s",ename(emc)) ; + if ( negc[emc]>1 ) mess("More then one OP16(neg) for %s",ename(emc)) ; + if ( lposc[emc]>1 ) mess("More then one OP32(pos) for %s",ename(emc)) ; + if ( lnegc[emc]>1 ) mess("More then one OP32(neg) for %s",ename(emc)) ; + switch(zf) { + case MUST: + if ( zc[emc]==0 ) mess("No OPNO for %s",ename(emc)) ; + break ; + case FORB: + if ( zc[emc]==1 ) mess("Forbidden OPNO for %s",ename(emc)) ; + break ; + } + switch(pf) { + case MUST: + if ( posc[emc]==0 ) mess("No OP16(pos) for %s",ename(emc)) ; + break ; + case FORB: + if ( posc[emc]==1 ) + mess("Forbidden OP16(pos) for %s",ename(emc)) ; + break ; + } + switch(nf) { + case MUST: + if ( negc[emc]==0 ) mess("No OP16(neg) for %s",ename(emc)) ; + break ; + case FORB: + if ( negc[emc]==1 ) + mess("Forbidden OP16(neg) for %s",ename(emc)) ; + break ; + } +} + +static int pushchar ; +static int pushf ; + +int readchar() { + int c ; + + if ( pushf ) { + pushf=0 ; + c = pushchar ; + } else { + if ( feof(stdin) ) return EOF ; + c=getc(stdin) ; + } + if ( c=='\n' ) line++ ; + return c ; +} + +pushback(c) { + if ( pushf ) { + fatal("Double pushback") ; + } + pushf++ ; + pushchar=c ; + if ( c=='\n' ) line-- ; +} + +writeout() { + register int i; + + printf("DISPATCH1"); + for (i = 0; i < 256;) { + if (!(i % 8)) printf("\n%d", i); + printf("\t%s", ename(codes[i])); + if (i < 254) { + prx(flags[i],lows[i],i); + } + i++; + } + + printf("\nDISPATCH2"); + for (i = 0; i < 256;) { + if (ecodes[i] != -1) { + if (!(i % 8)) printf("\n%d", i); + printf("\t%s", ename(ecodes[i])); + prx(eflags[i],elows[i],i); + } + else break; + i++; + } + + printf("\nDISPATCH3"); + i = 0; + while (lcodes[i] != -1) { + if (!(i % 8)) printf("\n%d", i); + printf("\t%s", ename(lcodes[i])); + prx(lflags[i],llows[i],i); + i++; + } + while (i++ % 8) putchar('\t'); + putchar('\n'); +} + +prx(flg,low,opc) + register int flg; +{ + int arg = opc - low; + + putchar('.'); + switch(flg&OPTYPE) { + case OPNO: + putchar('z'); + break; + case OP16U: + putchar('u'); + break; + case OP16: + if (flg&OP_POS) putchar('p'); + else if (flg&OP_NEG) putchar('n'); + else putchar('l'); + if (flg&OPWORD) putchar('w'); + break; + case OP32: + if (flg&OP_POS) putchar('P'); + else if (flg&OP_NEG) putchar('N'); + else putchar('L'); + if (flg&OPWORD) putchar('w'); + break; + case OPSHORT: + if (flg & OPWORD) putchar('w'); + else putchar('s'); + /* fall through */ + case OPMINI: + if (flg & OPNZ) arg++; + if (flg & OP_NEG) arg = -arg - 1; + printf("%d",arg); + if((flg&OPTYPE) == OPMINI && (flg & OPWORD)) putchar('W'); + } +} diff --git a/doc/em/proto.make b/doc/em/proto.make new file mode 100644 index 0000000..c836380 --- /dev/null +++ b/doc/em/proto.make @@ -0,0 +1,49 @@ +# $Id: proto.make,v 1.4 1994/06/24 10:03:16 ceriel Exp $ + +#PARAMS do not remove this line! + +TBL=tbl + +TARGET_DIR = $(TARGET_HOME)/doc +SRC_DIR = $(SRC_HOME)/doc/em + +head: $(TARGET_DIR)/em.doc + +FILES = $(SRC_DIR)/macr.nr \ + $(SRC_DIR)/title.nr \ + $(SRC_DIR)/intro.nr \ + $(SRC_DIR)/mem.nr \ + $(SRC_DIR)/ispace.nr \ + $(SRC_DIR)/dspace.nr \ + $(SRC_DIR)/mapping.nr \ + $(SRC_DIR)/types.nr \ + $(SRC_DIR)/descr.nr \ + $(SRC_DIR)/env.nr \ + $(SRC_DIR)/traps.nr \ + $(SRC_DIR)/mach.nr \ + $(SRC_DIR)/assem.nr \ + $(SRC_DIR)/em.i \ + $(SRC_DIR)/app.codes.nr \ + $(SRC_DIR)/app.exam.nr \ + $(SRC_DIR)/cont.nr + +IOP=$(SRC_HOME)/etc/ip_spec.t# # to construct itables from + +$(TARGET_DIR)/em.doc: $(FILES) itables dispatdummy + $(TBL) $(FILES) | soelim > $(TARGET_DIR)/em.doc + +itables: $(IOP) $(SRC_DIR)/ip.awk + awk -f $(SRC_DIR)/ip.awk $(IOP) | sed 's/-/\\-/g' | $(TBL) >itables + +dispatdummy: $(IOP) mkdispatch + mkdispatch < $(IOP) > dispatdummy + sed -f $(SRC_DIR)/dispat1.sed < dispatdummy | $(TBL) > dispat1 + sed -f $(SRC_DIR)/dispat2.sed < dispatdummy | $(TBL) > dispat2 + sed -f $(SRC_DIR)/dispat3.sed < dispatdummy | $(TBL) > dispat3 + +mkdispatch: $(SRC_DIR)/mkdispatch.c + $(UCC) -c -I$(UTIL_HOME)/h $(SRC_DIR)/mkdispatch.c + $(UCC) $(ULDOPTIONS) -o mkdispatch mkdispatch.$(USUF) $(UTIL_HOME)/lib.bin/em_data.$(ULIBSUF) + +clean: + rm -f itables dispatdummy dispat? *.o mkdispatch Out diff --git a/doc/em/title.nr b/doc/em/title.nr new file mode 100644 index 0000000..a310510 --- /dev/null +++ b/doc/em/title.nr @@ -0,0 +1,34 @@ +.LP +\& +.sp 10 +.ce 4 +DESCRIPTION OF A MACHINE +ARCHITECTURE FOR USE WITH +BLOCK STRUCTURED LANGUAGES +.sp 6 +.ce 4 +Andrew S. Tanenbaum +Hans van Staveren +Ed G. Keizer +Johan W. Stevenson\v'-0.5m'*\v'0.5m' +.sp 2 +.ce +August 1983 +.sp 2 +.ce +Informatica Rapport IR-81 +.sp 13 +Abstract +.sp 2 +.ti +5 +EM is a family of intermediate languages +designed for producing portable compilers. +A program called \fBfront end\fP +translates source programs to EM. +Another program, \fBback end\fP, +translates EM to the assembly language of the target machine. +Alternatively, the EM program can be assembled to a highly +efficient binary format for interpretation. +This document describes the EM languages in detail. +.sp 4 +\v'-0.5m'*\v'0.5m' Present affiliation: NV Philips, Eindhoven diff --git a/doc/em/traps.nr b/doc/em/traps.nr new file mode 100644 index 0000000..13c8db6 --- /dev/null +++ b/doc/em/traps.nr @@ -0,0 +1,169 @@ +.bp +.P1 "TRAPS AND INTERRUPTS" +.PP +EM provides a means for the user program to catch all traps +generated by the program itself, the hardware, or external conditions. +This mechanism uses five instructions: LIM, SIM, SIG, TRP and RTT. +This section of the manual may be omitted on the first reading since it +presupposes knowledge of the EM instruction set. +.PP +The action taken when a trap occurs is determined by the value +of an internal EM trap register. +This register contains a pointer to a procedure. +Initially the pointer used is zero and all traps halt the +program with, hopefully, a useful message to the outside world. +The SIG instruction can be used to alter the trap register, +it pops a procedure pointer from the +stack into the trap register. +When a trap occurs after storing a nonzero value in the trap +register, the procedure pointed to by the trap register +is called with the trap number +as the only parameter (see below). +SIG returns the previous value of the trap register on the +stack. +Two consecutive SIGs are a no-op. +When a trap occurs, the trap register is reset to its initial +condition, to prevent recursive traps from hanging the machine up, +e.g. stack overflow in the stack overflow handling procedure. +.PP +The runtime systems for some languages need to ignore some EM +traps. +EM offers a feature called the ignore mask. +It contains one bit for each of the lowest 16 trap numbers. +The bits are numbered 0 to 15, with the least significant bit +having number 0. +If a certain bit is 1 the corresponding trap never +occurs and processing simply continues. +The actions performed by the offending instruction are +described by the Pascal program in appendix A. +.br +If the bit is 0, traps are not ignored. +The instructions LIM and SIM allow copying and replacement of +the ignore mask.~ +.PP +The TRP instruction generates a trap, the trap number being found on the +stack. +This is, among other things, +useful for library procedures and runtime systems. +It can also be used by a low level trap procedure to pass the trap to a +higher level one (see example below). +.PP +The RTT instruction returns from the trap procedure and continues after the +trap. +In the list below all traps marked with an asterisk ('*') are +considered to be fatal and it is explicitly undefined what happens when +restarting after the trap. +.PP +The way a trap procedure is called is completely compatible +with normal calling conventions. The only way a trap procedure +differs from normal procedures is the return. It has to use RTT instead +of RET. This is necessary because the complete runtime status is saved on the +stack before calling the procedure and all this status has to be reloaded. +Error numbers are in the range 0 to 252. +The trap numbers are divided into three categories: +.IP "\0\00\-\063" 12 +EM machine errors, e.g. illegal instruction. +.RS +.IP "\00\-15" 8 +maskable +.IP "16\-63" 8 +not maskable +.RE +.IP "\064\-127" 12 +Reserved for use by compilers, run time systems, etc. +.IP "128\-252" 12 +Available for user programs. +.LP +EM machine errors are numbered as follows: +.TS +tab(@); +n l l. +0@EARRAY@Array bound error +1@ERANGE@Range bound error +2@ESET@Set bound error +3@EIOVFL@Integer overflow +4@EFOVFL@Floating overflow +5@EFUNFL@Floating underflow +6@EIDIVZ@Divide by 0 +7@EFDIVZ@Divide by 0.0 +8@EIUND@Undefined integer +9@EFUND@Undefined float +10@ECONV@Conversion error +16*@ESTACK@Stack overflow +17@EHEAP@Heap overflow +18*@EILLINS@Illegal instruction +19*@EODDZ@Illegal size argument +20*@ECASE@Case error +21*@EMEMFLT@Addressing non existent memory +22*@EBADPTR@Bad pointer used +23*@EBADPC@Program counter out of range +24@EBADLAE@Bad argument of LAE +25@EBADMON@Bad monitor call +26@EBADLIN@Argument of LIN too high +27@EBADGTO@GTO descriptor error +.TE +.PP +As an example, +suppose a subprocedure has to be written to do a numeric +calculation. +When an overflow occurs the computation has to be stopped and +the higher level procedure must be resumed. +This can be programmed as follows using the mechanism described above: +.LP +.KS +.nf +.ta 1n 24n + mes 2,2,2 ; set sizes +ersave + bss 2,0,0 ; Room to save previous value of trap procedure +msave + bss 2,0,0 ; Room to save previous value of trap mask + + pro $calcule,0 ; entry point + lxl 0 ; fill in non-local goto descriptor with LB + ste jmpbuf+4 + lor 1 ; and SP + ste jmpbuf+2 + lim ; get current ignore mask + ste msave ; save it + lim + loc 16 ; bit for EFOVFL + ior 2 ; set in mask + sim ; ignore EFOVFL from now on + lpi $catch ; load procedure identifier + sig ; catch wil get all traps now + ste ersave ; save previous trap procedure identifier + ; perform calculation now, possibly generating overflow +1 ; label jumped to by catch procedure + loe ersave ; get old trap procedure + sig ; refer all following trap to old procedure + asp 2 ; remove result of sig + loe msave ; restore previous mask + sim ; done now + ; load result of calculation + ret 2 ; return result +jmpbuf + con *1,0,0 + end +.KE +.KS +.LP +Example of catch procedure +.LP +.nf +.ta 1n 24n + pro $catch,0 ; Local procedure that must catch the overflow trap + lol 2 ; Load trap number + loc 4 ; check for overflow + bne *1 ; if other trap, call higher trap procedure + gto jmpbuf ; return to procedure calcule +1 ; other trap has occurred + loe ersave ; previous trap procedure + sig ; other procedure will get the traps now + asp 2 ; remove the result of sig + lol 2 ; stack trap number + trp ; call other trap procedure + rtt ; if other procedure returns, do the same + end +.KE +.fi diff --git a/doc/em/types.nr b/doc/em/types.nr new file mode 100644 index 0000000..12f6a5b --- /dev/null +++ b/doc/em/types.nr @@ -0,0 +1,142 @@ +.bp +.P1 "TYPE REPRESENTATIONS" +.PP +The representations used for typed objects are not precisely +specified by EM. +Sometimes we only specify that a typed object occupies a +certain amount of space and state no further restrictions. +If one wants to have a different representation of the value of +an object on the stack one has to use a convert instruction +in most cases. +We do specify some relations between the representations of +types. +This allows some intermixed use of operators for different types +on the same object(s). +For example, the instruction ZER pushes signed and +unsigned integers with the value zero and empty sets. +ZER has as only argument the size of the object. +.QQ +The representation of floating point numbers is a good example, +it allows widely varying implementations. +The only ways to create floating point numbers are via +initialization and via conversions from integer numbers. +Only by using conversions to integers and comparing +two floating point numbers with each other, can these numbers +be converted to human readable output. +Implementations may use base 10, base 2 or any other +base for exponents, and have freedom in choosing the range of +exponent and mantissa. +.QQ +Other types are more precisely described. +In the following paragraphs a description will be given of the +restrictions imposed on the representation of the types used. +A number \fBn\fP used in these paragraphs indicates the size of +the object in \fIbits\fP. +.P2 "Unsigned integers" +.PP +The range of unsigned integers is 0.. +.Ex 2 "\fBn\fP" -1. +A binary representation is assumed. +The order of the bits within an object is knowingly left +unspecified. +Discussing bit order within each 8-bit byte is academic, +so the only real freedom of this specification lies in the byte +order. +We really do not care whether an implementation of a 4-byte +integer has its bytes in a particular order of significance. +This of course means that some sequences of instructions have +unpredictable effects. +For example: +.DS +LOC 258 ; STL 0 ; LAL 0 ; LOI 1 ( wordsize >=2 ) +.DE +The value on the stack after executing this sequence +can be anything, +but will most likely be 1 or 2. +.QQ +Conversion between unsigned integers of different sizes have to +be done with explicit convert instructions. +One cannot simply pad an unsigned integer with zero's at either end +and expect a correct result. +.QQ +We assume existence of at least single word unsigned arithmetic +in any implementation. +.P2 "Signed Integers" +.PP +The range of signed integers is +.Ex \-2 "\fBn\fP\-1" ~.. +.Ex 2 "\fBn\fP\-1" \-1, +in other words the range of signed integers of \fBn\fP bits +using two's complement arithmetic. +The representation is the same as for unsigned integers except the range +.Ex 2 "\fBn\fP\-1" ~.. +.Ex 2 "\fBn\fP" \-1 +is mapped on the +range +.Ex \-2 "\fBn\fP\-1" ~..~\-1. +In other words, the most significant bit is used as sign bit. +The convert instructions between signed and unsigned integers +of the same size can be used to catch errors. +.QQ +The value +.Ex \-2 "\fBn\fP\-1" +is used for undefined +signed integers. +EM implementations should trap when this value is used in an +operation on signed integers. +The instruction mask, accessed with SIM and LIM \-~see chapter 9~\-, +can be used to disable such traps. +.QQ +We assume existence of at least single word signed arithmetic +in any implementation. +.P2 "Floating point values" +.PP +Floating point values must have a signed mantissa and a signed +exponent. +Although no base is specified, base 2 is the normal choice, +because the FEF instruction pushes the exponent in base 2. +.QQ +The implementation of floating point arithmetic is optional. +The compilers currently in use have runtime parameters for the +size of the floating point values they should use. +Common choices are 4 and/or 8 bytes. +.P2 Pointers +.PP +EM has two kinds of pointers: for instruction and for data +space. +Each kind can only be used for its own space, conversion between +these two subtypes is impossible. +We assume that pointers have a range from 0 upwards. +Any implementation may have holes in the pointer range between +fragments. +One can of course not expect to be able to address two megabyte +of memory using a 2-byte pointer. +Normally, a 2-byte pointer allows up to 65536 bytes of +addressable memory. +.QQ +Pointer representation has one restriction. +The pointer with the same representation as the integer zero of +the same size should be invalid. +Some languages and/or runtime systems represent the nil +pointer as zero. +.P2 "Bit sets" +.PP +All bit sets of size \fBn\fP are subsets of the set +{~i~|~i>=0,~i<\fBn\fP~}. +A bit set contains a bit for each element showing its +presence or absence. +Bit sets are subdivided into words. +The word with the lowest EM address governs the subset +{~i~|~i>=0,~i<\fBm\fP~}, where \fBm\fP is the number of bits in +a word. +The next higher words each govern the next higher \fBm\fP set elements. +The relation between a set with size of +a word and an unsigned integer word is that +the value of the unsigned integer is the summation of the +2\v'-0.5m'i\v'0.5m' where i is in the set. +.QQ +Example: a 2-word bit set (wordsize 2) containing the +elements 1, 6, 8, 15, 18, 21, 27 and 28 is composed of two +integers, e.g. at addresses 40 and 42. +The word at 40 contains the value 33090 (or~\-32446), +the word at 42 contains the value 6180. diff --git a/doc/i80.doc b/doc/i80.doc new file mode 100644 index 0000000..b5562d1 --- /dev/null +++ b/doc/i80.doc @@ -0,0 +1,816 @@ +. \" $Id: i80.doc,v 2.6 1994/06/24 10:01:54 ceriel Exp $ +.RP +.ND April 1985 +.TL +Back end table for the Intel 8080 micro-processor +.AU +Gerard Buskermolen +.AB +A back end is a part of the Amsterdam Compiler Kit (ACK). +It translates EM, a family of intermediate languages, into the +assembly language of some target machine, here the Intel 8080 and Intel 8085 microprocessors. +.AE +.NH1 +INTRODUCTION +.PP +To simplify the task of producing portable (cross) compilers and +interpreters, the Vrije Universiteit designed an integrated collection +of programs, the Amsterdam Compiler Kit (ACK). +It is based on the old UNCOL-idea ([4]) which attempts to solve the problem +of making a compiler for each of +.B N +languages on +.B M +different machines without having to write +.B N\ *\ M +programs. +.sp 1 +The UNCOL approach is to write +.B N +"front ends", each of which translates one source language into +a common intermediate language, UNCOL (UNiversal Computer Oriented +Language), and +.B M +"back ends", each of which translates programs in UNCOL into a +specific machine language. +Under these conditions, only +.B N\ +\ M +programs should be written to provide all +.B N +languages on all +.B M +machines, instead of +.B N\ *\ M +programs. +.sp 1 +The intermediate language for the Amsterdam Compiler Kit is the machine +language for a simple stack machine called EM (Encoding Machine). +So a back end for the Intel 8080 micro translates EM code into +8080 assembly language. +.sp 1 +The back end is a single program that is driven by a machine dependent +driving table. +This driving table, or back end table, +defines the mapping from EM code to the machine's assembly language. +.NH 1 +THE 8080 MICRO PROCESSOR +.PP +This back end table can be used without modification for the Intel 8085 +processor. +Except for two additional instructions, the 8085 instruction set +is identical and fully compatible with the 8080 instruction set. +So everywhere in this document '8080' can be read as '8080 and 8085'. +.NH 2 +Registers +.PP +The 8080 processor has an 8 bit accumulator, +six general purpose 8-bit registers, +a 16 bit programcounter and a 16 bit stackpointer. +Assembler programs can refer the accumulator by A and +the general purpose registers by B, C, D, E, H and L. (*) +.FS +* In this document 8080 registers and mnemonics are referenced by capitals, for the sake of clarity. +Nevertheless the assembler expects small letters. +.FE +Several instructions address registers in groups of two, thus creating +16 bit registers: +.DS +Registers referenced: Symbolic reference: + B and C B + D and E D + H and L H +.DE +The first named register, contains the high order byte +(H and L stand for High and Low). +.br +The instruction determines how the processor interprets the reference. +For example, ADD B is an 8 bit operation, adding the contents of +register B to accumulator A. By contrast PUSH B is a 16 bit operation +pushing B and C onto the stack. +.sp 1 +There are no index registers. +.sp 1 +.NH 2 +Flip-flops +.PP +The 8080 microprocessor provides five flip-flops used as condition flags +(S, Z, P, C, AC) and one interrupt enable flip-flop IE. +.br +The sign bit S is set (cleared) by certain instructions when the most significant +bit of the result of an operation equals one (zero). +The zero bit Z is set (cleared) by certain operations when the +8-bit result of an operation equals (does not equal) zero. +The parity bit P is set (cleared) if the 8-bit result of an +operation includes an even (odd) number of ones. +C is the normal carry bit. +AC is an auxiliary carry that indicates whether there has been a carry +out of bit 3 of the accumulator. +This auxiliary carry is used only by the DAA instruction, which +adjusts the 8-bit value in the accumulator to form two 4-bit +binary coded decimal digits. +Needless to say this instruction is not used in the back-end. +.sp 1 +The interrupt enable flip-flop IE is set and cleared under +program control using the instructions EI (Enable Interrupt) and +DI (Disable Interrupt). +It is automatically cleared when the CPU is reset and when +an interrupt occurs, disabling further interrupts until IE = 1 again. +.NH 2 +Addressing modes +.NH 3 +Implied addressing +.PP +The addressing mode of some instructions is implied by the instruction itself. +For example, the RAL (rotate accumulator left) instruction deals only with +the accumulator, and PCHL loads the programcounter with the contents +of register-pair HL. +.NH 3 +Register addressing +.PP +With each instruction using register addressing, +only one register is specified (except for the MOV instruction), +although in many of them the accumulator is implied as +second operand. +Examples are CMP E, which compares register E with the accumulator, +and DCR B, which decrements register B. +A few instructions deal with 16 bit register-pairs: +examples are DCX B, which decrements register-pair BC and the +PUSH and POP instructions. +.NH 3 +Register indirect addressing +.PP +Each instruction that may refer to an 8 bit register, may +refer also to a memory location. In this case the letter M +(for Memory) has to be used instead of a register. +It indicates the memory location pointed to by H and L, +so ADD M adds the contents of the memory location specified +by H and L to the contents of the accumulator. +.br +The register-pairs BC and DE can also be used for indirect addressing, +but only to load or store the accumulator. +For example, STAX B stores the contents of the accumulator +into the memory location addressed by register-pair BC. +.NH 3 +Immediate addressing +.PP +The immediate value can be an 8 bit value, as in ADI 10 which +adds 10 to the accumulator, or a 16 bit value, as in +LXI H,1000, which loads 1000 in the register-pair HL. +.NH 3 +Direct addressing +.PP +Jump instructions include a 16 bit address as part of the instruction. +.br +The instruction SHLD 1234 stores the contents of register +pair HL on memory locations 1234 and 1235. +The high order byte is stored at the highest address. +.NH 1 +THE 8080 BACK END TABLE +.PP +The back end table is designed as described in [5]. +For an overall design of a back end table I refer to this document. +.br +This section deals with problems encountered in writing the +8080 back-end table. +Some remarks are made about particular parts +of the table that might not seem clear at first sight. +.NH 2 +Constant definitions +.PP +Word size (EM_WSIZE) and pointer size (EM_PSIZE) are both +defined as two bytes. +The hole between AB and LB (EM_BSIZE) is four bytes: only the +return address and the local base are saved. +.NH 2 +Registers and their properties +.PP +All properties have the default size of two bytes, because one-byte +registers also cover two bytes when put on the real stack. +.sp 1 +The next considerations led to the choice of register-pair BC +as local base. +Though saving the local base in memory would leave one more register-pair +available as scratch register, it would slow down instructions +as 'lol' and 'stl' too much. +So a register-pair should be sacrificed as local base. +Because a back-end without a free register-pair HL is completely +broken-winged, the only reasonable choices are BC and DE. +Though the choice between them might seem arbitrary at first sight, +there is a difference between register-pairs BC and DE: +the instruction XCHG exchanges the contents of register-pairs DE and +HL. +When DE and HL are both heavily used on the fake-stack, this instruction +is very useful. +Since it won't be useful too often to exchange HL with the local base +and since an instruction exchanging BC and HL does not exist, BC is +chosen as local base. +.sp 1 +Many of the register properties are never mentioned in the +PATTERNS part of the table. +They are only needed to define the INSTRUCTIONS correctly. +.sp 1 +The properties really used in the PATTERNS part are: +.IP areg: 24 +the accumulator only +.IP reg: +any of the registers A, D, E, H or L. Of course the registers B and C which are +used as local base don't possess this property. +When there is a single register on the fake-stack, its value +is always considered non-negative. +.IP dereg: +register-pair DE only +.IP hlreg: +register-pair HL only +.IP hl_or_de: +register-pairs HL and DE both have this property +.IP local base: +used only once (i.e. in the EM-instruction 'str 0') +.PP +.sp 1 +The stackpointer SP and the processor status word PSW have to be +defined explicitly because they are needed in some instructions +(i.e. SP in LXI, DCX and INX and PSW in PUSH and POP). +.br +It doesn't matter that the processor status word is not just register A +but includes the condition flags. +.NH 2 +Tokens +.PP +The tokens 'm' and 'const1' are used in the INSTRUCTIONS- and MOVES parts only. +They will never be on the fake-stack. +.sp 1 +The token 'label' reflects addresses known at assembly time. +It is used to take full profit of the instructions LHLD +(Load HL Direct) and SHLD (Store HL Direct). +.sp 1 +Compared with many other back-end tables, there are only a small number of +different tokens (four). +Reasons are the limited addressing modes of the 8080 microprocessor, +no index registers etc. +For example to translate the EM-instruction +.DS +lol 10 +.DE +the next 8080 instructions are generated: +.DS L +LXI H,10 /* load registers pair HL with value 10 */ +DAD B /* add local base (BC) to HL */ +MOV E,M /* load E with byte pointed to by HL */ +INX H /* increment HL */ +MOV D,M /* load D with next byte */ +.DE +Of course, instead of emitting code immediately, it could be postponed +by placing something like a {LOCAL,10} on the fake-stack, but some day the above +mentioned code will have to be generated, so a LOCAL-token is +hardly useful. +See also the comment on the load instructions. +.NH 2 +Sets +.PP +Only 'src1or2' is used in the PATTERNS. +.NH 2 +Instructions +.PP +Each instruction indicates whether or not the condition flags +are affected, but this information will never have any influence +because there are no tests in the PATTERNS part of the table. +.sp 1 +For each instruction a cost vector indicates the number of bytes +the instruction occupies and the number of time periods it takes +to execute the instruction. +The length of a time period depends on the clock frequency +and may range from 480 nanoseconds to 2 microseconds on a +8080 system and from 320 nanoseconds to 2 microseconds +on a 8085 system. +.sp 1 +In the TOKENS-part the cost of token 'm' is defined as (0,3). +In fact it usually takes 3 extra time periods when this register indirect mode +is used instead of register mode, but since the costs are not completely +orthogonal this results in small deficiencies for the DCR, INR and MOV +instructions. +Although it is not particularly useful these deficiencies are +corrected in the INSTRUCTIONS part, by treating the register indirect +mode separately. +.sp 1 +The costs of the conditional call and return instructions really +depend on whether or not the call resp. return is actually made. +However, this is not important to the behaviour of the back end. +.sp 1 +Instructions not used in this table have been commented out. +Of course many of them are used in the library routines. +.NH 2 +Moves +.PP +This section is supposed to be straight-forward. +.NH 2 +Tests +.PP +The TESTS section is only included to refrain +.B cgg +from complaining. +.NH 2 +Stacking rules +.PP +When, for example, the token {const2,10} has to be stacked while +no free register-pair is available, the next code is generated: +.DS +PUSH H +LXI H,10 +XTHL +.DE +The last instruction exchanges the contents of HL with the value +on top of the stack, giving HL its original value again. +.NH 2 +Coercions +.PP +The coercion to unstack register A, is somewhat tricky, +but unfortunately just popping PSW leaves the high-order byte in +the accumulator. +.sp 1 +The cheapest way to coerce HL to DE (or DE to HL) is by using +the XCHG instruction, but it is not possible to explain +.B cgg +this instruction in fact exchanges the contents of these +register-pairs. +Before the coercion is carried out other appearances of DE and HL +on the fake-stack will be moved to the real stack, because in +the INSTRUCTION-part is told that XCHG destroys the contents +of both DE and HL. +The coercion transposing one register-pair to another one by +emitting two MOV-instructions, will be used only if +one of the register-pairs is the local base. +.NH 2 +Patterns +.PP +As a general habit I have allocated (uses ...) all registers +that should be free to generate the code, although it is not +always necessary. +For example in the code rule +.DS +pat loe +uses hlreg +gen lhld {label,$1} yields hl +.DE +the 'uses'-clause could have been omitted because +.B cgg +knows that LHLD destroys register-pair HL. +.sp 1 +Since there is only one register with property 'hlreg', +there is no difference between 'uses hlreg' (allocate a +register with property 'hlreg') and 'kills hlreg' (remove +all registers with property 'hlreg' from the fake-stack). +The same applies for the property 'dereg'. +.br +Consequently 'kills' is rarely used in this back-end table. +.NH 3 +Group 1: Load instructions +.PP +When a local variable must be squared, there will probably be EM-code like: +.DS +lol 10 +lol 10 +mli 2 +.DE +When the code for the first 'lol 10' has been executed, DE contains the +wanted value. +To refrain +.B cgg +from emitting the code for 'lol 10' again, an extra +pattern is included in the table for cases like this. +The same applies for two consecutive 'loe'-s or 'lil'-s. +.sp 1 +A bit tricky is 'lof'. +It expects either DE or HL on the fake-stack, moves {const2,$1} +into the other one, and eventually adds them. +The 'kills' part is necessary here because if DE was on the fake-stack, +.B cgg +doesn't see that the contents of DE is destroyed by the code +(in fact 'kills dereg' would have been sufficient: because of the +DAD instruction +.B cgg +knows that HL is destroyed). +.sp 1 +By lookahead, +.B cgg +can make a clever choice between the first and +second code rule of 'loi 4'. +The same applies for several other instructions. +.NH 3 +Group 2: Store instructions +.PP +A similar idea as with the two consecutive identical load instructions +in Group 1, applies for a store instruction followed by a corresponding load instruction. +.NH 3 +Groups 3 and 4: Signed and unsigned integer arithmetic +.PP +Since the 8080 instruction set doesn't provide multiply and +divide instructions, special routines are made to accomplish these tasks. +.sp 1 +Instead of providing four slightly differing routines for 16 bit signed or +unsigned division, yielding the quotient or the remainder, +the routines are merged. +This saves space and assembly time +when several variants are used in a particular program, +at the cost of a little speed. +When the routine is called, bit 7 of register A indicates whether +the operands should be considered as signed or as unsigned integers, +and bit 0 of register A indicates whether the quotient or the +remainder has to be delivered. +.br +The same applies for 32 bit division. +.sp 1 +The routine doing the 16 bit unsigned multiplication could +have been used for 16 bit signed multiplication too. +Nevertheless a special 16 bit signed multiplication routine is +provided, because this one will usually be much faster. +.NH 3 +Group 5: Floating point arithmetic +.PP +Floating point is not implemented. +Whenever an EM-instruction involving floating points is offered +to the code-generator, it calls the corresponding +library routine with the proper parameters. +Each floating point library routine calls 'eunimpl', +trapping with trap number 63. +Some of the Pascal and C library routines output floating point +EM-instructions, so code has to be generated for them. +Of course this does not imply the code will ever be executed. +.NH 3 +Group 12: Compare instructions +.PP +The code for 'cmu 2', with its 4 labels, is terrible. +But it is the best I could find. +.NH 3 +Group 9: Logical instructions +.PP +I have tried to merge both variants of the instructions 'and 2', 'ior 2' and 'xor 2', +as in +.DS +pat and $1==2 +with hl_or_de hl_or_de +uses reusing %1, reusing %2, hl_or_de, areg +gen mov a,%1.2 + ana %2.2 + mov %a.2,a + mov a,%1.1 + ana %2.1 + mov %a.1,a yields %a +.DE +but the current version of +.B cgg +doesn't approve this. +In any case +.B cgg +chooses either DE or HL to store the result, using lookahead. +.NH 3 +Group 14: Procedure call instructions +.PP +There is an 8 bytes function return area, called '.fra'. +If only 2 bytes have to be returned, register-pair DE is used. +.NH 1 +LIBRARY ROUTINES +.PP +Most of the library routines start with saving the return address +and the local base, so that the parameters are on the top of the stack +and the registers B and C are available as scratch registers. +Since register-pair HL is needed to accomplish these tasks, +and also to restore everything just before the routine returns, +it is not possible to transfer data between the routines and the +surrounding world through register H or L. +Only registers A, D and E can be used for this. +.sp +When a routine returns 2 bytes, they are usually returned in +registers-pair DE. +When it returns more than 2 bytes they are pushed onto the stack. +.br +It would have been possible to let the 32 bit arithmetic routines +return 2 bytes in DE and the remaining 2 bytes on the stack +(this often would have saved some space and execution time), +but I don't consider that as well-structured programming. +.NH 1 +TRAPS +.PP +Whenever a trap, for example trying to divide by zero, +occurs in a program that originally was written in C or Pascal, +a special trap handler is called. +This trap handler wants to write an appropriate error message on the +monitor. +It tries to read the message from a file (e.g. etc/pc_rt_errors in the +EM home directory for Pascal programs), but since the 8080 back-end +doesn't know about files, we are in trouble. +This problem is solved, as far as possible, by including the 'open'-monitor call in the mon-routine. +It returns with file descriptor -1. +The trap handler reacts by generating another trap, with the original +trap number. +But this time, instead of calling the C- or Pascal trap handler again, +the next message is printed on the monitor: +.DS L + trap number + line of file + +where is the trap number (decimal) + is the line number (decimal) + is the filename of the original program +.DE +.sp 1 +Trap numbers are subdivided as follows: +.IP 1-27: 20 +EM-machine error, as described in [3] +.IP 63: +an unimplemented EM-instruction is used +.IP 64-127: +generated by compilers, runtime systems, etc. +.IP 128-252: +generated by user programs +.NH 1 +IMPLEMENTATION +.PP +It will not be possible to run the entire Amsterdam Compiler Kit on a +8080-based computer system. +One has to write a program on another +system, a system where the compiler kit runs on. +This program may be a mixture of high-level languages, such as +C or Pascal, EM and 8080 assembly code. +The program should be compiled using the compiler kit, producing 8080 machine code. +This code should come available to the 8080 machine +for example by downloading or +by storing it in ROM (Read Only Memory). +.sp 1 +Depending on the characteristics of the particular 8080 based system, some +adaptations have to be made: +.IP 1) 10 +In 'head_em': the base address, which is the address where the first +8080 instruction will be stored, and the initial value of the +stackpointer are set to 0x1000 and 0x8000 respectively. +.br +Other systems require other values. +.IP 2) +In 'head_em': before calling "__m_a_i_n", the environment +pointer, argument vector and argument count will have to be pushed +onto the stack. +Since this back-end is tested on a system without any knowledge +of these things, dummies are pushed now. +.IP 3) +In 'tail_em': proper routines "putchar" and "getchar" should +be provided. +They should write resp. read a character on/from the monitor. +Maybe some conversions will have to be made. +.IP 4) +In 'head_em': an application program returns control to the monitor by +jumping to address 0xFB52. +This may have to be changed for different systems. +.IP 5) +In 'tail_em': the current version of the 8080 back-end has very limited I/O +capabilities, because it was tested on a system that +had no knowledge of files. +So the implementation of the EM-instruction 'mon' is very simple; +it can only do the following things: +.RS +.IP Monitor\ call\ 1: 40 +exit +.IP Monitor\ call\ 3: +read, always reads from the monitor. +.br +echos the read character. +.br +ignores file descriptor. +.IP Monitor\ call\ 4: +write, always writes on the monitor. +.br +ignores file descriptor. +.IP Monitor\ call\ 5: +open file, returns file descriptor -1. +.br +(compare chapter about TRAPS) +.IP Monitor\ call\ 6: +close file, returns error code = 0. +.IP Monitor\ call\ 54: +io-control, returns error code = 0. +.RE +.sp +If the system should do file-handling the routine ".mon" +should be extended thoroughly. +.NH 1 +INTEL 8080 VERSUS ZILOG Z80 AND INTEL 8086 +.NH 2 +Introduction +.PP +At about the same time I developed the back end +for the Intel 8080 and Intel 8085, +Frans van Haarlem did the same job for the Zilog z80 microprocessor. +Since the z80 processor is an extension of the 8080, +any machine code offered to a 8080 processor can be offered +to a z80 too. +The assembly languages are quite different however. +.br +During the developments of the back ends we have used +two micro-computers, both equipped with a z80 microprocessor. +Of course the output of the 8080 back end is assembled by an +8080 assembler. This should assure I have never used any of +the features that are potentially available in the z80 processor, +but are not part of a true 8080 processor. +.sp 1 +As a final job, I have +investigated the differences between the 8080 and z80 processors +and their influence on the back ends. +I have tried to measure this influence by examining the length of +the generated code. +I have also involved the 8086 micro-processor in this measurements. +.NH 2 +Differences between the 8080 and z80 processors +.PP +Except for some features that are less important concerning back ends, +there are two points where the z80 improves upon the 8080: +.IP First, 18 +the z80 has two additional index registers, IX and IY. +They are used as in +.DS + LD B,(IX+10) +.DE +The offset, here 10, should fit in one byte. +.IP Second, +the z80 has several additional instructions. +The most important ones are: +.RS +.IP 1) 8 +The 8080 can only load or store register-pair HL direct +(using LHLD or SHLD). +The z80 can handle BC, DE and SP too. +.IP 2) +Instructions are included to ease block movements. +.IP 3) +There is a 16 bit subtract instruction. +.IP 4) +While the 8080 can only rotate the accumulator, the z80 +can rotate and shift each 8 bit register. +.IP 5) +Special routines are included to jump to near locations, saving 1 byte. +.RE +.NH 2 +Consequences for the 8080 and z80 back end +.PP +The most striking difference between the 8080 and z80 back ends +is the choice of the local base. +The writer of the z80 back end chose index register IY as local base, +because this results in the cheapest coding of EM-instructions +like 'lol' and 'stl'. +The z80 instructions that load local 10, for example +.DS +LD E,(IY+10) +LD D,(IY+11) +.DE +occupy 6 bytes and take 38 time periods to execute. +The five corresponding 8080 instructions loading a local +occupy 7 bytes and take 41 time periods. +Although the profit of the z80 might be not world-shocking, +it should be noted that as a side effect it may save some +pushing and popping since register pair HL is not used. +.sp 1 +The choice of IY as local base has its drawbacks too. +The root of the problem is that it is not possible to add +IY to HL. +For the EM-instruction +.DS +lal 20 +.DE +the z80 back end generates code like +.DS +LD BC,20 +PUSH IY +POP HL +ADD HL,BC +.DE +leaving the wanted address in HL. +.br +This annoying push and pop instructions are also needed in some +other instructions, for instance in 'lol' when the offset +doesn't fit in one byte. +.sp 1 +Beside the choice of the local base, I think there is no +fundamental difference between the 8080 and z80 back ends, +except of course that the z80 back end has register pair BC +and, less important, index register IX available as scratch registers. +.sp 1 +Most of the PATTERNS in the 8080 and z80 tables are more or less +a direct translation of each other. +.NH 2 +What did I do? +.PP +To get an idea of the quality of the code generated by +the 8080, z80 and 8086 back ends I have gathered +some C programs and some Pascal programs. +Then I produced 8080, z80 and 8086 code for them. +Investigating the assembler listing I found the +lengths of the different parts of the generated code. +I have checked two areas: +.IP 1) 8 +the entire text part +.IP 2) +the text part without any library routine, so only the plain user program +.LP +I have to admit that neither one of them is really honest. +When the entire text part is checked, the result is disturbed +because not always the same library routines are loaded. +And when only the user program itself is considered, the result is +disturbed too. +For example the 8086 has a multiply instruction, +so the EM-instruction 'mli 2' is translated in the main program, +but the 8080 and z80 call a library routine that is not counted. +Also the 8080 uses library routines at some places where the +z80 does not. +.sp 1 +But nevertheless I think the measurements will give an idea +about the code produced by the three back ends. +.NH 2 +The results +.PP +The table below should be read as follows. +For all programs I have computed the ratio of the code-lengths +of the 8080, z80 and 8086. +The averages of all Pascal/C programs are listed in the table, +standardized to '100' for the 8080. +So the listed '107' indicates that the lengths +of the text parts of the z80 programs that originally were Pascal programs, +averaged 7 percent larger than in the corresponding 8080 programs. +.DS C + -------------------------------------------------- +| | 8080 | z80 | 8086 | + -------------------------------------------------- +| C, text part | 100 | 103 | 65 | +| Pascal, text part | 100 | 107 | 55 | +| C, user program | 100 | 110 | 71 | +| Pascal, user program | 100 | 118 | 67 | + -------------------------------------------------- +.DE +.TE +The most striking thing in this table is that the z80 back end appears +to produce larger code than the 8080 back end. +The reason is that the current z80 back end table is +not very sophisticated yet. +For instance it doesn't look for any EM-pattern longer than one. +So the table shows that the preparations in the 8080 back end table +to produce faster code (like recognizing special EM-patterns +and permitting one byte registers on the fake-stack) +was not just for fun, but really improved the generated code +significantly. +.sp 1 +The table shows that the 8080 table is relatively better +when only the plain user program is considered instead of the entire text part. +This is not very surprising since the 8080 back end sometimes +uses library routines where the z80 and especially the 8086 don't. +.sp 1 +The difference between the 8080 and z80 on the one hand and the 8086 +on the other is very big. +But of course it was not equal game: +the 8086 is a 16 bit processor that is much more advanced than the +8080 or z80 and the 8086 back end is known to produce +very good code. +.bp +.B REFERENCES +.sp 2 +.IP [1] 10 +8080/8085 Assembly Language Programming Manual, +.br +Intel Corporation (1977,1978) +.IP [2] +Andrew S. Tanenbaum, Hans van Staveren, E.G. Keizer and Johan W. Stevenson, +.br +A practical tool kit for making portable compilers, +.br +Informatica report 74, Vrije Universiteit, Amsterdam, 1983. +.sp +An overview on the Amsterdam Compiler Kit. +.IP [3] +Tanenbaum, A.S., Stevenson, J.W., Keizer, E.G., and van Staveren, H. +.br +Description of an experimental machine architecture for use with block +structured languages, +.br +Informatica report 81, Vrije Universiteit, Amsterdam, 1983. +.sp +The defining document for EM. +.IP [4] +Steel, T.B., Jr. +.br +UNCOL: The myth and the Fact. in Ann. Rev. Auto. Prog. +.br +Goodman, R. (ed.), vol. 2, (1960), p325-344. +.sp +An introduction to the UNCOL idea by its originator. +.IP [5] +van Staveren, Hans +.br +The table driven code generator from the Amsterdam Compiler Kit +(Second Revised Edition), +.br +Vrije Universiteit, Amsterdam. +.sp +The defining document for writing a back end table. +.IP [6] +Voors, Jan +.br +A back end for the Zilog z8000 micro, +.br +Vrije Universiteit, Amsterdam. +.sp +A document like this one, but for the z8000. diff --git a/doc/install.doc b/doc/install.doc new file mode 100644 index 0000000..70bfe8b --- /dev/null +++ b/doc/install.doc @@ -0,0 +1,1237 @@ +.\" $Id: install.doc,v 1.38 1994/06/24 10:01:58 ceriel Exp $ +.if n .nr PD 1v +.if n .nr LL 78m +.if n .ll 78m +.TL +Amsterdam Compiler Kit Installation Guide +.AU +Ed Keizer +(revised for 3rd, 4th and 5th distribution by Ceriel Jacobs) +.AI +Vakgroep Informatica +Vrije Universiteit +Amsterdam +.NH +Introduction +.PP +This document +describes the process of installing the Amsterdam Compiler Kit (ACK). +It depends on the combination of hard- and software how +hard it will be to install the Kit. +This description is intended for a Sun-3 or SPARC workstation. +Installation on VAXen running Berkeley +.UX +or Ultrix, +Sun-2 systems and most System V +.UX +systems should be easy. +As of this distribution, installation on PDP-11's or other +systems with a small address space is no longer supported. +See section 8 for installation on other systems. +.NH +The ACK installation process +.PP +In the ACK installation process, three directory trees are used: +.IP "-" +the ACK source tree. This is the tree on the ACK distribution medium. +For the rest of this document, we will refer to this directory +as $SRC_HOME; +.IP "-" +a configuration tree. This tree is built by the installation process and +is used to do compilations in. Its structure reflects that of the source tree, +but this tree will mostly contain Makefiles and relocatable objects. +For the rest of this document, we will refer to this directory +as $CONFIG; +.IP "-" +an ACK users tree. This tree is also built by the installation process. +For the rest of this document, we will refer to this directory +as $TARGET_HOME; +.LP +After installation, +the directories in $TARGET_HOME contain the following information: +.if n .sp 1 +.if n .nr PD 0 +.IP "bin" 14 +the few utilities that knot things together. +See the section about "Commands". +.IP "lib" +root of a tree containing almost all libraries used by +commands. +Files specific to a certain machine are collected in one subtree +per machine. E.g. "lib/pdp", "lib/z8000". +The names used here are the same names as used for subtrees +of "$SRC_HOME/mach". +.IP "lib/descr" +command descriptor files used by the program ack. +.IP "lib/LLgen" +files used by the LL(1) parser generator. +.IP "lib/flex" +files used by the lexical analyzer generator Flex. +.IP "lib/m2" +definition modules for Modula-2. +.IP "lib.bin" +root of a tree containing almost all binaries used by +commands. +All programs specific to a certain machine are collected in one subtree +per machine. E.g. "lib.bin/pdp", "lib.bin/z8000". +The names used here are the same names as used for subtrees +of "$SRC_HOME/mach". +.IP "lib.bin/ego" +files used by the global optimizer. +.IP "lib.bin/lint" +binaries for the lint passes and lint libraries. +.IP "lib.bin/ceg" +files used by the code-expander-generator. +.IP "etc" +contains the file "ip_spec.t" needed for EM interpreters and EM documentation. +.IP "config" +contains two include files: +.TS +l l. +em_path.h path names used by \fIack\fP, intended for all utilities +local.h various definitions for local versions +.TE +These include files are specific for the current machine, so they +are in a separate directory. +.IP "include/_tail_cc" +.br +include files needed by modules +in the C library from lang/cem/libcc. +.IP "include/tail_ac" +.br +include files for ANSI C. +.IP "include/occam" +include files for occam. +.IP "include/_tail_mon" +.br +more or less system independent include files needed by modules +in the library lang/cem/libcc/mon. +.IP "h" +the #include files for: +.TS +l l. +arch.h definition of the ACK archive format +as_spec.h used by EM assembler and interpreters +bc_io.h used by the Basic run-time system +bc_string.h used by the Basic run-time system +cg_pattern.h used by the backend program "cg" and its bootstrap +cgg_cg.h used by the backend program "ncg" and its bootstrap +em_abs.h contains trap numbers and address for lin and fil +em_ego.h definition of names for some global optimizer + messages +em_flag.h definition of bits in array em_flag in + $TARGET_HOME/lib.bin/em_data.a. Describes parameters + effect on flow of instructions +em_mes.h definition of names for mes pseudo numbers +em_mnem.h instruction => compact mapping +em_pseu.h pseudo instruction => compact mapping +em_ptyp.h useful for compact code reading/writing, + defines classes of parameters +em_reg.h definition of mnemonics indicating register type +em_spec.h definition of constants used in compact code +ip_spec.h used by programs that read e.out files +m2_traps.h used by the Modula-2 run-time system +ocm_chan.h used by the occam run-time system +ocm_parco.h used by the occam run-time system +ocm_proc.h used by the occam run-time system +out.h defines the ACK a.out format +pc_err.h definitions of error numbers in Pascal +pc_file.h macro's used in file handling in Pascal +pc_math.h used by the Pascal runtime system +ranlib.h defines symbol table format for archives +stb.h defines debugger symbol table types +.TE +.IP "modules" +root of a tree containing modules for compiler writers. +.IP "modules/man" +manual pages for all modules. +.IP "modules/lib" +contains module objects. +.IP "modules/h" +include files for some of the modules. +.IP "modules/pkg" +include files for some of the modules. +.IP "doc" +this directory contains the unformatted documents for the Kit. +A list of the available documents can be found in the last section. +These documents must be processed by [nt]roff. +.IP "man" +man files for various utilities. +.if n .nr PD 1v +.LP +When installing ACK on several types of machines with a shared file system, +it may be useful to know that the "doc", "etc", "h", +"include", "lib" and "man" sub-directories do not depend on this +particular installation. They do not contain binaries or path-dependent +information. These directories can therefore be shared between the +ACK installations. This can be accomplished by creating the tree and +suitable symbolic links before starting the installation process. +.LP +For instance, let us say there is a file-system that is accessible from +the different machines as "/usr/share/local", and the ACK binary tree +must be installed in "/usr/local/ack". In this case, proceed as follows: +.IP \- +create a directory "/usr/share/local/ack", with subdirectories +"doc", "etc", "h", "include", "lib" and "man". +.IP \- +create a directory "/usr/local/ack" and +then create symbolic links "doc" to "/usr/share/local/ack/doc", etc. +.LP +If this is done on all machines on which ACK will be installed, the +machine-independent part only has to be installed once, preferably +on the fastest processor (it takes a long time to install all libraries). +.LP +The directories in the source tree contain the following information: +.if n .sp 1 +.if n .nr PD 0 +.IP "bin" 14 +source of some shell-scripts. +.IP "lib" +mostly description files for the "ack" program. +.IP "etc" +the main description of EM sits here. +Files (e.g. em_table) describing +the opcodes and pseudos in use, +the operands allowed, effect in stack etc. etc. +.IP "mach" +just there to group the directories with all sources for each machine. +The section about "Machines" of this manual indicates which subdirectories +are used for which systems. +.br +These directories have subdirectories named: +.in +3n +.TS +l l. +cg the backend (*.m => *.s) +ncg the new backend (*.m => *.s) +as the assembler (*.s => *.o) or + assembler/linker (*.s + libraries => a.out) +cv conversion programs for a.out files +dl down-load programs +top the target optimizer +int source for an interpreter + +libbc to create Basic run-time system and libraries +libcc to create C run-time system and libraries +libcc.ansi to create ANSI C run-time system and libraries +libpc to create Pascal run-time system and libraries +libf77 to create Fortran run-time system and libraries +libm2 to create Modula-2 run-time system and libraries +liboc to create occam run-time system and libraries +libem EM runtime system, only depending on CPU type +libend library defining end, edata, etext +libfp to create floating point library +libdb to create debugger support library +libsys system-dependent EM library +libce fast cc-compatible C compiler library support + +ce code expander (fast back-end) + +test various tests +.TE +.in -3n +Actually, some of these directories will only appear in the configuration tree. +.br +The directory proto contains files used by most machines, +like machine-independent sources and Makefiles. +.in +3n +.TS +l l. +mach/proto/cg current backend sources +mach/proto/ncg new backend sources +mach/proto/as assembler sources +mach/proto/top target optimizer sources +mach/proto/fp floating point package sources +mach/proto/libg makefiles for compiling libraries +mach/proto/grind machine-independent debugger support +.TE +.IP "emtest" +contains prototype of em test set. +.IP "lang" +just there to group the directories for all front-ends. +.IP "lang/pc" +the Pascal front-end. +.IP "lang/pc/libpc" +.br +source of Pascal run-time system (in EM or C). +.IP "lang/pc/test" +some test programs written in Pascal. +.IP "lang/pc/comp" +the Pascal compiler proper. +.IP "lang/cem" +the C front-end. +.IP "lang/cem/libcc" +.br +directories with sources of C runtime system, libraries (in EM or C). +.IP "lang/cem/libcc/gen" +.br +sources for routines in chapter III of +.UX +programmers manual, +excluding stdio. +.IP "lang/cem/libcc/stdio" +.br +stdio sources. +.IP "lang/cem/libcc/math" +.br +sources for mathematical routines, normally available with the +\fB-lm\fP option to \fIcc\fP. +.IP "lang/cem/libcc/mon" +.br +sources for routines in chapter II, mostly written in EM. +.IP "lang/cem/cemcom" +.br +the compiler proper. +.IP "lang/cem/cemcom.ansi" +.br +the ANSI C compiler proper. +.IP "lang/cem/cpp.ansi" +.br +the ANSI C preprocessor. +.IP "lang/cem/libcc.ansi" +.br +the ANSI C library sources. +.IP "lang/cem/ctest" +.br +the C test set. +.IP "lang/cem/ctest/cterr" +.br +programs developed for pinpointing previous errors. +.IP "lang/cem/ctest/ct*" +.br +the test programs. +.IP "lang/cem/lint" +a C program checker. +.IP "lang/cem/lint/lpass1" +.br +the first pass of lint. +.IP "lang/cem/lint/lpass1.ansi" +.br +the first pass of lint, this time for ANSI C. +.IP "lang/cem/lint/lpass2" +.br +the second pass of lint, shared between ANSI C and "old-fashioned" C. +.IP "lang/cem/lint/llib" +.br +programs for producing lint libraries. +.IP "lang/basic" +the Basic front-end. +.IP "lang/basic/src" +.br +the compiler proper. +.IP "lang/basic/lib" +.br +the Basic run-time library source. +.IP "lang/basic/test" +.br +various Basic programs. +.IP "lang/occam" +the occam front-end. +.IP "lang/occam/comp" +.br +the compiler proper. +.IP "lang/occam/lib" +.br +source of occam run-time system (in EM or C). +.IP "lang/occam/test" +.br +some occam programs. +.IP "lang/m2" +the Modula-2 front-end. +.IP "lang/m2/comp" +the compiler proper. +.IP "lang/m2/libm2" +source of Modula-2 run-time system (in EM, C and Modula-2). +.IP "lang/m2/m2mm" +the Modula-2 makefile generator. +.IP "lang/m2/test" +some Modula-2 example programs. +.IP "lang/fortran" +the Fortran front-end (translates Fortran into C). This compiler is not +a part of ACK, but is included because it adds another language. +The Fortran system carries the following copyright notice: +.IP "" +.nf +/************************************************************** +Copyright 1990, 1991 by AT&T Bell Laboratories and Bellcore. + +Permission to use, copy, modify, and distribute this software +and its documentation for any purpose and without fee is hereby +granted, provided that the above copyright notice appear in all +copies and that both that the copyright notice and this +permission notice and warranty disclaimer appear in supporting +documentation, and that the names of AT&T Bell Laboratories or +Bellcore or any of their entities not be used in advertising or +publicity pertaining to distribution of the software without +specific, written prior permission. + +AT&T and Bellcore disclaim all warranties with regard to this +software, including all implied warranties of merchantability +and fitness. In no event shall AT&T or Bellcore be liable for +any special, indirect or consequential damages or any damages +whatsoever resulting from loss of use, data or profits, whether +in an action of contract, negligence or other tortious action, +arising out of or in connection with the use or performance of +this software. +**************************************************************/ +.fi +.IP "lang/fortran/comp" +.br +the compiler proper. +.IP "lang/fortran/lib" +.br +source of Fortran runtime system and libraries. +.IP "fast" +contains sub-directories for installing the fast ACK compatible compilers. +.IP "fast/driver" +.br +contains the sources of the fast ACK compatible compiler drivers. +.IP "fcc" +contains the fast cc-compatible C compiler for SUN-3 and VAX. +.IP "util" +contains directories with sources for various utilities. +.IP "util/ack" +the program used for translation with the Kit. +.IP "util/opt" +the EM peephole optimizer (*.k => *.m). +.IP "util/ego" +the global optimizer. +.IP "util/topgen" +the target optimizer generator. +.IP "util/misc" +decode (*.[km] => *.e) + encode (*.e => *.k). +.IP "util/data" +the C-code for $TARGET_HOME/lib.bin/em_data.a. +These sources are created by the Makefile in `etc`. +.IP "util/ass" +the EM assembler (*.[km] + libraries => e.out). +.IP "util/arch" +the archivers to be used for all EM utilities. +.IP "util/cgg" +a program needed for compiling backends. +.IP "util/ncgg" +a program needed for compiling the newest backends. +.IP "util/cpp" +the C preprocessor. +.IP "util/shf" +various shell files. +.IP "util/LLgen" +the extended LL(1) parser generator. +.IP "util/amisc" +contains some programs handling ACK a.out format, such as anm, asize. +.IP "util/cmisc" +contains some programs to help in resolving name conflicts, and +a dependency generator for makefiles. +.IP "util/led" +the ACK link-editor, reading ACK relocatable a.out format, and writing +ACK a.out format. +.IP "util/int" +an EM interpreter, written in C. Very useful for checking out software, +but slow. +.IP "util/ceg" +code expander generator. +.IP "util/grind" +a symbolic debugger. +.IP "util/byacc" +this is Berkeley yacc, in the public domain. +.IP "util/flex" +this is a replacement for lex. It carries the following copyright notice: +.IP "" +.nf +Copyright (c) 1990 The Regents of the University of California. +All rights reserved. + +This code is derived from software contributed to Berkeley by +Vern Paxson. + +The United States Government has rights in this work pursuant +to contract no. DE-AC03-76SF00098 between the United States +Department of Energy and the University of California. + +Redistribution and use in source and binary forms are permitted +provided that: (1) source distributions retain this entire +copyright notice and comment, and (2) distributions including +binaries display the following acknowledgement: ``This product +includes software developed by the University of California, +Berkeley and its contributors'' in the documentation or other +materials provided with the distribution and in all advertising +materials mentioning features or use of this software. Neither the +name of the University nor the names of its contributors may be +used to endorse or promote products derived from this software +without specific prior written permission. + +THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. +.fi +.ne 4 +.if n .nr PD 1v +.LP +All path names mentioned in the text of this document are relative to +$SRC_HOME, unless they start with '/' or one of $SRC_HOME, +$TARGET_HOME or $CONFIG. +.NH +Restoring the ACK tree +.PP +The process of installing the Amsterdam Compiler Kit is quite simple. +The first step is to restore the Amsterdam Compiler Kit +distribution tree structure. +Proceed as follows +.IP " \-" 10 +Create a directory, for example /usr/share/local/src/ack, on a device +with at least 15 Megabytes left. This directory will be $SRC_HOME. +.IP " \-" +Change to that directory (cd ...). +.IP " \-" +Extract all files from the distribution medium, for instance +magtape: +\fBtar x\fP. +.IP " \-" +Keep a copy of the original distribution to be able to repeat the process +of installation in case of disasters. +This copy is also useful as a reference point for diff-listings. +.NH +Adapting ACK to the local system +.PP +Before compiling the sources in the Kit some installation dependent +actions have to be taken. +Most of these are performed by an interactive shell script in the file +.I $SRC_HOME/first/first. +Calling this script should be done +from another directory, for instance an empty directory which will later +become $CONFIG. +.LP +The actions of the +.I first +script are: +.if n .sp 1 +.if n .nr PD 0 +.IP \- +Asking for the path names of the ACK source directory ($SRC_HOME), the +configuration directory ($CONFIG), and the ACK users directory ($TARGET_HOME). +About 5M are needed for the configuration tree. The disk space needed +for the ACK users tree depends on which front-ends and back-ends are to be +installed. +For instance, on our SPARC systems +we have installed all languages and 6 back-ends, including the +system-independent part. This amounts to about 16M. +On our SUN-3 systems, we have installed all front-ends and 5 back-ends, +but only the machine-dependent part. The machine-independent directories are +symbolic links to the SPARC ACK users tree. +We also have the fast ACK compilers +installed on the SUN-3's. +The total amount of disk-space used is less than 8M. +.IP \- +Asking for what type of system the binary tree must be produced for +and creating the shell script "ack_sys" in the Kit's bin directory. +Several utilities make use of "ack_sys" to determine the type of +system. +The current choice is between: +.TS +c c c +l l l. +answer system type default machine +vax_bsd4_1a VAX11 + BSD4.1a vax4 +vax_bsd4_2 VAX11 + BSD4.2 vax4 +vax_sysV_2 VAX11 + System V.2 vax4 +i386 Intel 80386 system + Xenix System V i386 +sun3 Sun-3 Motorola 68020 workstation sun3 +sun2 Sun-2 Motorola 68010 workstation sun2 +m68_sysV_0 68000 + Uniplus System V.0 mantra +m68020 Motorola 68020 VME131 + System V/68 R2V2.1 m68020 +sparc Sun-4 or SPARC workstation running SunOs 4 sparc +sparc_solaris Sun-4 or SPARC workstation running Solaris 2 sparc_solaris +ANY Neither of the above ??? +.TE +For some of these, the installation procedure has not been tested, as +we don't have them. +For others, the installation procedure has only been tested with earlier +distributions, as we don't have those systems anymore. +However, the sun3 and sparc systems are known to behave reasonably. +The sparc_solaris system has only been tested with the GNU C compiler, +because we don't have the SUN C compiler (it is unbundled in Solaris 2). +The Sun systems should run SunOs Release 3.0 or newer. +The i386 choice may also be used for Intel 80386 or 80486 systems +running +.UX +System V Release 4. These systems are also able to run Xenix System V +binaries. +If the target system is not on this list, choose one that comes close. +If none of them come close, use the "ANY" choice. +For ANY, any name can be used, +but the Kit will not be able to compile programs for the target system. +See the section about "compilation +on a different machine". +.IP \- +Setting the default machine for which code is +produced to the local type of system according to the table above. +This in done in the file "$TARGET_HOME/config/local.h". +See also section 9.1. +.IP \- +Asking for things that don't have to be installed. +.IP \- +Producing a shell script called "INSTALL" that will take care of the +ACK installation process. +.NH +Compiling the Kit +.PP +The next step in the installation process is to run the "INSTALL" +shell-script. When using a Bourne-shell, type: +.DS +sh INSTALL > INSTALL.out 2>&1 & +.DE +When using a C-shell, type: +.DS +sh INSTALL >& INSTALL.out & +.DE +This shell-script performs the following steps: +.if n .sp 1 +.if n .nr PD 0 +.IP \- +Produce a configuration tree ($CONFIG), reflecting the structure of the +source tree. +.IP \- +Produce Makefiles in $CONFIG. +As mentioned before, compilations +will be done in the configuration tree, not in the source tree. +Most configuration directories will have Makefiles +used to compile and install the programs in that +directory. +All programs needed for compilation and/or cross compilation +with the Kit are installed in $TARGET_HOME by these Makefiles. +These Makefiles are produced from corresponding files called +"proto.make" in the source tree. In fact, the "proto.make" files +are almost complete Makefiles, except for some macro definitions that +are collected by the \fIfirst\fP script. +The Makefiles adhere to a standard which is described in the +section 9. +.IP \- +Copy "Action" files to the configuration tree and editing them to +reflect the choices concerning the parts of ACK that have to be +installed. "Action" files are described below. +.IP \- +Copy part of the source tree to the ACK users tree (include files, +manual pages, documentation, et cetera). +.IP \- +Calling the "TakeAction" script. +All these Makefiles do not have to be called separately. +We wrote a shell script calling the make's needed to install +the whole Kit. +This script consists of the file $SRC_HOME/TakeAction +and a few files called Action in some configuration directories. +The Action files describe in a very simple form which actions +have to be performed in which directories. +The default action is to start "make install && make clean". +The output of each make is diverted to a file called "Out" +in the same directory as the make was started in. +If the make was successful (return code 0) the Out file is removed +and the script TakeAction produces a small message indicating +that it succeeded in fulfilling its goal. +If the make was not successful (any other return code) the Out file +is left alone for further examination and the script TakeAction +produces a small message indicating that it failed. +.br +For some programs the scripts already know they can't be +installed on the local type of system. +In that case they produce a message "Sorry, ....." and +happily proceed with further installation commands. +.if n .sp 1 +.if n .nr PD 1v +.LP +Installation of the Kit might take anything from a few +hours to more than a day, depending on the speed of the local machine and +what must be installed. +.LP +If the installation succeeded, the Kit is ready to be used. +Read section 6 and the manuals provided +with the Kit (in the $TARGET_HOME/man directory) on how to use it. +.NH 2 +Problems +.NH 3 +on Unisoft m68000 systems. +.PP +The Unisoft C compiler has a bug which impedes the correct +translation of the peephole optimizer. +For a more detailed description of this phenomenon see +the file "$SRC_HOME/mach/m68k2/Unisoft_bug". +(This observation was made in 1985 or so, so it is probably +no longer true). +.NH 3 +with backends +.PP +The backends for the PDP11, VAX, Motorola 68000 and 68020, +SPARC, Intel 8086, and Intel 80386 +have been heavily used by ourselves and are well tested. +The backends for the other machines are known to run our own +test programs, +but might reveal errors when more heavily used. +.NH 2 +An example output of TakeAction. +.LP +.sp 1 +.nf + System definition -- done + EM definition library -- done + C utilities -- done + Flex lexical analyzer generator -- done + Yacc parser generator -- done + system-call interface module -- done + . + . + . + EM Global optimizer -- done + ACK archiver -- done + Program 'ack' -- done + Bootstrap for backend tables -- done + Bootstrap for newest form of backend tables -- done + . + . + . + C frontend -- done + ANSI-C frontend -- done + ANSI-C preprocessor -- done + ANSI-C header files -- done + Failed for LINT C program checker, see lang/cem/lint/Out + Pascal frontend -- done + Basic frontend -- done + . + . + . + Vax 4-4 assembler -- done + Vax 4-4 backend -- done + Vax target optimizer -- done + ACK a.out to VAX a.out conversion program -- done + Sorry, Vax code expander library can only be made on vax* systems + Vax 4-4 EM library -- done + Vax 4-4 debugger support library -- done + Vax 4-4 etext,edata,end library -- done + Vax 4-4 systemcall interface -- done + . + . + . +.sp 1 +.fi +.LP +The lines starting with "Sorry, " indicate that certain programs cannot +be translated on the local machine. +The lines starting with "Failed for" indicate +that certain programs/libraries were expected to, +but did not compile. +In this example, the installation of LINT failed. +To repeat a certain part of the installation, look in +the Action file, which resides in the root of the configuration tree, +for the directory in which that part is to be found. +If that directory contains an Action file issue the command +"sh $CONFIG/bin/TakeAction", otherwise type "make install". +.NH +Commands +.PP +The following commands are available in the $TARGET_HOME/bin directory after compilation +of the Kit: +.IP "\fIack\fP, \fIacc\fP, \fIabc\fP, \fIapc\fP, \fIocm\fP, \fIm2\fP, \fIf2c\fP and their links" 14 +.br +the names mentioned here can be used to compile Pascal, C, etc... programs. +Most of the links can be used to generate code for a particular +machine. +See also the section about "Machines". +.IP \fIarch\fP +the archiver used for the EM- and universal assembler/loader. +.IP \fIaal\fP +the archiver used for ACK objects. +.IP \fIem\fP +this program selects a interpreter to execute an e.out file. +Interpreters exist for PDP-11 and Motorola 68000 systems. +.IP \fIeminform\fP +the program to unravel the post-mortem information of +the EM interpretator for the PDP-11. +.IP \fILLgen\fP +the LL(1) parser generator. +.IP \fIack_sys\fP +a shell script producing an identification of the target system. +Used by some utilities to determine what is, and what is +not feasible on the target system. +.IP \fImarch\fP +a shell script used while compiling libraries. +.IP "\fIasize\fP, \fIanm\fP, \fIastrip\fP" +.br +do the same as \fIsize\fP, \fInm\fP and \fIstrip\fP, but for ACK object format. +.IP \fImkdep\fP +a dependency generator for makefiles. +.IP "\fIcid\fP, \fIprid\fP, \fIcclash\fP" +.br +some utilities for handling name clashes in C programs. Some +systems have C-compilers with only 7 or 8 characters significant in +identifiers. +.IP \fItabgen\fP +a utility for generating character tables for C-programs. +.IP \fIint\fP +an EM interpreter. This one is written in C, and is very useful for checking +out programs. +.IP \fIgrind\fP +a source level debugger for C, ANSI-C, Modula-2 and Pascal. +.IP "\fIafcc\fP, \fIafm2\fP, \fIafpc\fP" +.br +these are ACK-compatible fast C, Modula-2 and Pascal compilers, +available for M68020, VAX and Intel 80386 systems. They compile very fast, +but produce slow code. +.IP \fIfcc\fP +this is a cc-compatible fast C compiler, available on SUN-3 and VAX +systems. It compiles very fast, but produces slow code. +.LP +We currently make the Kit available to our users by telling +them that they should include the $TARGET_HOME/bin directory in +their PATH shell variable. +The programs will still work when moved to a different +directory or linked to. +Copying should preferably be done with tar, since links are +heavily used. +Renaming of the programs linked to \fIack\fP will not always +produce the desired result. +This program uses its call name as an argument. +Any call name not being \fIcc\fP, \fIacc\fP, \fIabc\fP, \fIpc\fP, \fIf2c\fP, +\fIocm\fP, \fIm2\fP, or \fIapc\fP will be +interpreted as the name of a 'machine description' and the +program will try to find a description file with that name. +The installation process will only touch the utilities in the $TARGET_HOME/bin +directory, not copies of these utilities. +.NH +Machines +.PP +Below is a table with entries for all commands in +the bin directory used to (cross)compile for a particular machine. +The name in the first column gives the name in the bin directory. +The column headed dir indicates which subdirectories of +$TARGET_HOME/lib and/or $TARGET_HOME/lib.bin are needed for compilation. +The column head i/p contains the integer and pointer size used in units of +bytes. +The subdirectories with the same name in mach contain the sources. +A * in the column headed 'fp' indicates that floating point can be used +for that particular machine. A + in that column indicates that floating +point is available under the '-fp' option. In this case, software +floating point emulation is used. +.TS +l l l l l l l. +command system i/p languages fp dir remarks + +pdp PDP/UNIX V7 2/2 C * pdp + Pascal + Basic + occam + Modula-2 + +vax4 VAX/BSD 4.? 4/4 C * vax4 + System V.2 Pascal + Basic + occam + Modula-2 + Fortran + +sparc Sun-4 4/4 C * sparc + Pascal + Basic + occam + Modula-2 + Fortran + +sparc_solaris Sun-4 4/4 C * sparc_solaris + Pascal + Basic + occam + Modula-2 + Fortran + +m68k2 M68000/ 2/4 C + m68k2 + Unisoft Pascal + Basic + occam + Modula-2 + +m68k4 M68000/ 4/4 C + m68k4 + Unisoft Pascal m68k2 + Basic + occam + Modula-2 + Fortran + +pmds M68000/ 2/4 C + pmds Philips Micro + PMDS Pascal m68k2 Devel. System + Basic + occam + Modula-2 + +pmds4 M68000/ 4/4 C + pmds4 Philips Micro + PMDS Pascal m68k2 Devel. System + Basic m68k4 + occam + Modula-2 + Fortran + +mantra M68000/ 4/4 C + mantra + Sys V.0 Pascal m68k2 + Basic m68k4 + occam + Modula-2 + Fortran + +m68020 M68020/ 4/4 C + m68020 + Sys V/68 R2V2.1 Pascal + Basic + occam + Modula-2 + Fortran + +sun3 Sun-3 R4.1 4/4 C + sun3 + Pascal m68020 + Basic + occam + Modula-2 + Fortran + +sun2 Sun-2 R3.0 4/4 C + sun2 + Pascal m68k4 + Basic m68k2 + occam + Modula-2 + Fortran + +i86 IBM PC/IX 2/2 C + i86 IBM PC with PC/IX + Pascal Causes kernel crashes + Basic + occam + Modula-2 + +xenix3 Microsoft 2/2 C + xenix3 IBM AT with Xenix + Xenix V3 Pascal i86 + Basic + occam + Modula-2 + +i386 SCO Xenix 4/4 C + i386 Intel 80386 + System V Pascal Xenix System V + Basic + occam + Modula-2 + Fortran + +minix Minix PC 2/2 C + minix IBM PC running Minix + Pascal i86 + Basic + occam + Modula-2 + +minixST ST Minix 2/4 C + minixST Atari ST running Minix + Pascal m68k2 + Basic + occam + Modula-2 + +z8000 Zilog 8000 2/2 C z8000 Central Data + Pascal CPU board + Basic Assembler/loader + occam + Modula-2 + +em22 EM machine 2/2 C * em22 Needs interpreter + Pascal + Basic + occam + Modula-2 + +em24 EM machine 2/4 C * em24 Needs interpreter + Pascal + Basic + occam + Modula-2 + +em44 EM machine 4/4 C * em44 Needs interpreter + Pascal + Basic + occam + Modula-2 + Fortran + +6500 6502/BBC 2/2 C 6500 Assembler/loader + Pascal + Basic + occam + Modula-2 + +6800 Bare 6800 6800 Assembler only + +6805 Bare 6805 6805 Assembler only + +6809 Bare 6809 6809 Assembler only + +ns Bare NS16032 4/4 C ns + Pascal + Basic + occam + Modula-2 + Fortran + +i80 Hermac/z80 2/2 C i80 + Pascal + Basic + occam + Modula-2 + +z80 Hermac/z80 2/2 C z80 \fIi80\fP is faster + Pascal + Basic + occam + Modula-2 + +s2650 Signetics s2650 Assembler only + +arm Acorn 4/4 C * arm Assembler/loader + Archimedes Pascal + Basic + occam + Modula-2 + Fortran +.TE +.LP +The commands \fBem22\fP, \fBem24\fP and \fBem44\fP +produce e.out files with EM machine code which must be interpreted. +The Kit contains three interpreters: one running under PDP 11/V7 UNIX, +one for the M68000, running under the PMDS system, Sun systems, +the Mantra system, etc, and a portable one, written in C. +The first one can only interpret 2/2 e.out files, +the second takes 2/4 and 4/4 files, +and the last one takes 2/2, 2/4 and 4/4. +The PDP 11 interpreter executes floating point instructions. +.LP +The program \fB$TARGET_HOME/bin/em\fP calls the appropriate +interpreter. +The interpreters are looked for in the em22, em24 and em44 +subdirectories of $TARGET_HOME/lib.bin. +The third interpreter is available as the program \fB$TARGET_HOME/bin/int\fP +in the bin directory. +.NH +Compilation on a different machine. +.PP +The installation mechanism of the Kit is supposed to be portable across +.UX +machines, so +the Kit can be installed and used as a cross-compiler +for the languages it supports on any +.UX +machine. +The presence of most +.UX +utilities is essential for compilation. +A few of the programs certainly needed are: sh, C-compiler, sed, ed, +make, and awk. +.NH 2 +Backend +.PP +The existence of a backend with a system call library +for the target system is essential +for producing executable files for that system. +Rewriting the system call library if the one supplied does +not work on the target system is fairly straightforward. +If no backend exists for the target CPU type, a new backend has to be written +which is a major undertaking. +.NH 2 +Universal assembler/loader, link editor +.PP +For most machines, the description files in $TARGET_HOME/lib/*/descr use our +universal assembler and our link editor. +The load file produced is not directly +usable in any system known to us, +but has to be converted before it can be put to use. +The \fIcv\fP programs convert our a.out format into +executable files. +The \fIdl\fP programs present for some machines unravel +our a.out files and transmit commands to load memory +to a microprocessor over a serial line. +The file $TARGET_HOME/man/man5/ack.out.5 contains a description of the format of +the universal assembler load file. +It might be useful to those who wish or need to write their +own conversion programs. +Also, a module is included to read and write our a.out format. +See $TARGET_HOME/man/man3/object.3. +.NH +Options +.NH 2 +Default machine +.PP +There is one important option in $TARGET_HOME/config/local.h. +The utility \fIack\fP uses a default machine name when called +as \fIacc\fP, \fIcc\fP, \fIabc\fP, \fIapc\fP, \fIpc\fP, \fIocm\fP, +\fIm2\fP, \fIf2c\fP, or \fIack\fP. +The machine name used by default is determined by the +definition of ACKM in $TARGET_HOME/config/local.h. +The Kit is distributed with "sun3" as the default machine, +but the shell script "first" in the directory "first" alters this +to suit the target system. +There is nothing against using the Kit as a cross-compiler +and by default produce code that can't run on the local system. +.NH 2 +Pathnames +.PP +Absolute path names are concentrated in "$TARGET_HOME/config/em_path.h". +Only the utilities \fIack\fP, \fIflex\fP, and \fILLgen\fP use +absolute path names to access files in the Kit. +The tree is distributed with /usr/em as the working +directory. +The definition of EM_DIR in em_path.h should be altered to +specify the root +directory for the Compiler Kit binaries on the local system ($TARGET_HOME). +This is done automatically by the shell script "first" in the +directory "first". +Em_path.h also specifies which directory should be used for +temporary files. +Most programs from the Kit do indeed use that directory +although some remain stubborn and use /tmp or /usr/tmp. +.LP +The shape of the tree should not be altered lightly because +most Makefiles and the +utility \fIack\fP know the shape of the ACK tree. +The knowledge of the utility \fIack\fP about the shape of the tree is +concentrated in the files in the directory $TARGET_HOME/lib/*/descr and $TARGET_HOME/lib/descr/*. +.NH +Makefiles +.PP +Most directories contain a "proto.make", from which a Makefile is derived. +Apart from commands applying to that specific directory these +files all recognize a few special commands. +When called with one of these they will apply the command to +their own directory. +The special commands are: +.sp 1 +.IP "install" 20 +recompile and install all binaries and libraries. +.br +Some Makefiles allow errors to occur in the programs they call. +They ignore such errors and notify the user with the message +"~....... error code n: ignored". +Whenever such a message appears in the output it can be ignored. +.IP "cmp" +recompile all binaries and libraries and compare them to the +ones already installed. +.IP pr +print the sources and documentation on the standard output. +.IP opr +make pr | opr +.br +Opr should be an off-line printer daemon. +On some systems it exists under another name e.g. lpr. +The easiest way to call such a spooler is using a shell script +with the name opr that calls lpr. +This script should be placed in /usr/bin or $TARGET_HOME/bin or +one of the directories in the PATH environment variable. +.IP clean +remove all files not needed for day-to-day use, +that is binaries not in $TARGET_HOME/bin or $TARGET_HOME/lib.bin, object files etc. +.LP +Example: +.DS +make install +.DE +given as command in a configuration directory will cause +compilation of all programs in the directory and copying of the results +to the $TARGET_HOME/bin and $TARGET_HOME/lib.bin directories. +.NH +Testing +.PP +Test sets are available in Pascal, C, Basic and EM assembly: +.IP EM 8 +the directory $SRC_HOME/emtest contains a few EM test programs. +The EM assembly files in these tests must be transformed into +load files. +These tests use the LIN and NOP instructions to mark the passing of each +test. +The NOP instruction prints the current line number during the +test phase. +Each test notifies its correctness by calling LIN with a unique +number followed by a NOP which prints this line number. +The test finishes normally with 0 as the last number printed +In all other cases a bug showed its +existence. +.IP Pascal +the directory $SRC_HOME/lang/pc/test contains a few Pascal test programs. +All these programs print the number of errors found and a +identification of these errors. +.sp 1 +.ti +4 +We also tested Pascal with the Validation Suite. +The Validation Suite is a collection of more than 200 Pascal programs, +designed by Brian Wichmann and Arthur Sale to test Pascal compilers. +We are not allowed to distribute it, but a copy may +be requested from +.DS +Richard J. Cichelli +A.N.P.A. +1350 Sullivan Trail +P.O. Box 598 +Easton, Pennsylvania 18042 +USA +.DE +.IP C +the sub-directories in $SRC_HOME/lang/cem/ctest contain C test programs. +The idea behind these tests is: +if there is a program called xx.c, compile it into xx.cem. +Run it with standard output to xx.cem.r, compare this file to +xx.cem.g, a file containing the 'ideal' output. +Any differences will point to implementation differences or +bugs. +Giving the command "run gen" or plain "run" starts this +process. +The differences will be presented on standard output. +The contents of the result files depend on the word size, +the xx.cem.g files on the distribution are intended for a +32-bit machine. +.IP Basic +the directory $SRC_HOME/lang/basic/test contains some forty Basic programs. +Not all of these programs are correct, some have syntactic errors, +some simply don't work. +The Makefile in that directory attempts to compile and run +these tests. +If it compiles its output is compared to a file with suffix .g +which contains the output to be expected. +The make should be started with its standard input diverted +to /dev/null. +An example of the output of a make is present in the file Out.std. +.NH +Documentation +.PP +After installation, the manual pages for Amsterdam Compiler Kit can be found +in the $TARGET_HOME/man directory. Also, the following documents are provided +in the $TARGET_HOME/doc directory: +.TS +l l. +toolkit.doc general overview (CACM article) +em.doc description of the EM machine architecture +ack.doc format of machine description files (lib/*/descr) +ansi_C.doc ANSI C implementation description +basic.doc Basic reference manual +pcref.doc Pascal-frontend reference manual +val.doc results of running the Pascal Validation Suite +crefman.doc C-frontend description +LLgen description of the LL(1) parser generator +peep.doc internal documentation for the peephole optimizer +cg.doc documentation for backend writers and maintainers +regadd.doc addendum to previous document describing register variables +ncg.doc documentation for the newest backends +v7bugs.doc bugs in the V7 system and how to fix them +6500.doc MSC 6500 backend description +i80.doc Intel 8080 backend description +z80.doc Zilog Z80 backend description +m68020.doc Motorola M68000/M68020 backend description +sparc.doc SPARC code expander description +occam.doc occam-frontend description +ego.doc Global Optimizer description +top.doc Target Optimizer description +int.doc description of the EM interpreter written in C +ceg.doc documentation for code-expander writers and maintainers +lint.doc documentation of LINT +m2ref.doc Modula-2 frontend description +install.doc this document +install.pr this document (formatted for a simple line printer) +.TE +.LP +Use the Makefile to get readable copies. +.LP +Good luck. diff --git a/doc/install.pr b/doc/install.pr new file mode 100644 index 0000000..476039e --- /dev/null +++ b/doc/install.pr @@ -0,0 +1,1320 @@ + + + + + + + + + + AAAAmmmmsssstttteeeerrrrddddaaaammmm CCCCoooommmmppppiiiilllleeeerrrr KKKKiiiitttt IIIInnnnssssttttaaaallllllllaaaattttiiiioooonnnn GGGGuuuuiiiiddddeeee + + + Ed Keizer + (revised for 3rd, 4th and 5th distribution by Ceriel Jacobs) + + Vakgroep Informatica + Vrije Universiteit + Amsterdam + + + + + +_1. _I_n_t_r_o_d_u_c_t_i_o_n + + This document describes the process of installing the Amsterdam Compiler +Kit (ACK). It depends on the combination of hard- and software how hard it +will be to install the Kit. This description is intended for a Sun-3 or SPARC +workstation. Installation on VAXen running Berkeley UNIX|- or Ultrix, Sun-2 +systems and most System V UNIX systems should be easy. As of this distribu- +tion, installation on PDP-11's or other systems with a small address space is +no longer supported. See section 8 for installation on other systems. + +_2. _T_h_e _A_C_K _i_n_s_t_a_l_l_a_t_i_o_n _p_r_o_c_e_s_s + + In the ACK installation process, three directory trees are used: + +- the ACK source tree. This is the tree on the ACK distribution medium. + For the rest of this document, we will refer to this directory as + $SRC_HOME; + +- a configuration tree. This tree is built by the installation process and + is used to do compilations in. Its structure reflects that of the source + tree, but this tree will mostly contain Makefiles and relocatable + objects. For the rest of this document, we will refer to this directory + as $CONFIG; + +- an ACK users tree. This tree is also built by the installation process. + For the rest of this document, we will refer to this directory as + $TARGET_HOME; + +After installation, the directories in $TARGET_HOME contain the following +information: + +bin the few utilities that knot things together. See the section + about "Commands". +lib root of a tree containing almost all libraries used by commands. +_________________________ +|- UNIX is a trademark of Bell Laboratories. + + + + + September 10, 2002 + + + + + + - 2 - + + + Files specific to a certain machine are collected in one subtree + per machine. E.g. "lib/pdp", "lib/z8000". The names used here + are the same names as used for subtrees of "$SRC_HOME/mach". +lib/descr command descriptor files used by the program ack. +lib/LLgen files used by the LL(1) parser generator. +lib/flex files used by the lexical analyzer generator Flex. +lib/m2 definition modules for Modula-2. +lib.bin root of a tree containing almost all binaries used by commands. + All programs specific to a certain machine are collected in one + subtree per machine. E.g. "lib.bin/pdp", "lib.bin/z8000". The + names used here are the same names as used for subtrees of + "$SRC_HOME/mach". +lib.bin/ego files used by the global optimizer. +lib.bin/lint binaries for the lint passes and lint libraries. +lib.bin/ceg files used by the code-expander-generator. +etc contains the file "ip_spec.t" needed for EM interpreters and EM + documentation. +config contains two include files: + + em_path.h path names used by _a_c_k, intended for all utilities + local.h various definitions for local versions + + These include files are specific for the current machine, so + they are in a separate directory. +include/_tail_cc + include files needed by modules in the C library from + lang/cem/libcc. +include/tail_ac + include files for ANSI C. +include/occam include files for occam. +include/_tail_mon + more or less system independent include files needed by modules + in the library lang/cem/libcc/mon. +h the #include files for: + + arch.h definition of the ACK archive format + as_spec.h used by EM assembler and interpreters + bc_io.h used by the Basic run-time system + bc_string.h used by the Basic run-time system + cg_pattern.h used by the backend program "cg" and its bootstrap + cgg_cg.h used by the backend program "ncg" and its bootstrap + em_abs.h contains trap numbers and address for lin and fil + em_ego.h definition of names for some global optimizer + messages + em_flag.h definition of bits in array em_flag in + $TARGET_HOME/lib.bin/em_data.a. Describes parameters + effect on flow of instructions + em_mes.h definition of names for mes pseudo numbers + em_mnem.h instruction => compact mapping + em_pseu.h pseudo instruction => compact mapping + em_ptyp.h useful for compact code reading/writing, + defines classes of parameters + em_reg.h definition of mnemonics indicating register type + em_spec.h definition of constants used in compact code + + + + September 10, 2002 + + + + + + - 3 - + + + ip_spec.h used by programs that read e.out files + m2_traps.h used by the Modula-2 run-time system + ocm_chan.h used by the occam run-time system + ocm_parco.h used by the occam run-time system + ocm_proc.h used by the occam run-time system + out.h defines the ACK a.out format + pc_err.h definitions of error numbers in Pascal + pc_file.h macro's used in file handling in Pascal + pc_math.h used by the Pascal runtime system + ranlib.h defines symbol table format for archives + stb.h defines debugger symbol table types + +modules root of a tree containing modules for compiler writers. +modules/man manual pages for all modules. +modules/lib contains module objects. +modules/h include files for some of the modules. +modules/pkg include files for some of the modules. +doc this directory contains the unformatted documents for the Kit. + A list of the available documents can be found in the last sec- + tion. These documents must be processed by [nt]roff. +man man files for various utilities. + +When installing ACK on several types of machines with a shared file system, it +may be useful to know that the "doc", "etc", "h", "include", "lib" and "man" +sub-directories do not depend on this particular installation. They do not +contain binaries or path-dependent information. These directories can there- +fore be shared between the ACK installations. This can be accomplished by +creating the tree and suitable symbolic links before starting the installation +process. + +For instance, let us say there is a file-system that is accessible from the +different machines as "/usr/share/local", and the ACK binary tree must be +installed in "/usr/local/ack". In this case, proceed as follows: + +- create a directory "/usr/share/local/ack", with subdirectories "doc", + "etc", "h", "include", "lib" and "man". + +- create a directory "/usr/local/ack" and then create symbolic links "doc" + to "/usr/share/local/ack/doc", etc. + +If this is done on all machines on which ACK will be installed, the machine- +independent part only has to be installed once, preferably on the fastest pro- +cessor (it takes a long time to install all libraries). + +The directories in the source tree contain the following information: + +bin source of some shell-scripts. +lib mostly description files for the "ack" program. +etc the main description of EM sits here. Files (e.g. em_table) + describing the opcodes and pseudos in use, the operands allowed, + effect in stack etc. etc. +mach just there to group the directories with all sources for each + machine. The section about "Machines" of this manual indicates + which subdirectories are used for which systems. + + + + September 10, 2002 + + + + + + - 4 - + + + These directories have subdirectories named: + + cg the backend (*.m => *.s) + ncg the new backend (*.m => *.s) + as the assembler (*.s => *.o) or + assembler/linker (*.s + libraries => a.out) + cv conversion programs for a.out files + dl down-load programs + top the target optimizer + int source for an interpreter + + libbc to create Basic run-time system and libraries + libcc to create C run-time system and libraries + libcc.ansi to create ANSI C run-time system and libraries + libpc to create Pascal run-time system and libraries + libf77 to create Fortran run-time system and libraries + libm2 to create Modula-2 run-time system and libraries + liboc to create occam run-time system and libraries + libem EM runtime system, only depending on CPU type + libend library defining end, edata, etext + libfp to create floating point library + libdb to create debugger support library + libsys system-dependent EM library + libce fast cc-compatible C compiler library support + + ce code expander (fast back-end) + + test various tests + + Actually, some of these directories will only appear in the con- + figuration tree. + The directory proto contains files used by most machines, like + machine-independent sources and Makefiles. + + mach/proto/cg current backend sources + mach/proto/ncg new backend sources + mach/proto/as assembler sources + mach/proto/top target optimizer sources + mach/proto/fp floating point package sources + mach/proto/libg makefiles for compiling libraries + mach/proto/grind machine-independent debugger support + + emtest contains prototype of em test set. + lang just there to group the directories for all front-ends. + lang/pc the Pascal front-end. + lang/pc/libpc + source of Pascal run-time system (in EM or C). + lang/pc/test some test programs written in Pascal. + lang/pc/comp the Pascal compiler proper. + lang/cem the C front-end. + lang/cem/libcc + directories with sources of C runtime system, libraries (in + EM or C). + lang/cem/libcc/gen + + + + September 10, 2002 + + + + + + - 5 - + + + sources for routines in chapter III of UNIX programmers + manual, excluding stdio. + lang/cem/libcc/stdio + stdio sources. + lang/cem/libcc/math + sources for mathematical routines, normally available with + the ----llllmmmm option to _c_c. + lang/cem/libcc/mon + sources for routines in chapter II, mostly written in EM. + lang/cem/cemcom + the compiler proper. + lang/cem/cemcom.ansi + the ANSI C compiler proper. + lang/cem/cpp.ansi + the ANSI C preprocessor. + lang/cem/libcc.ansi + the ANSI C library sources. + lang/cem/ctest + the C test set. + lang/cem/ctest/cterr + programs developed for pinpointing previous errors. + lang/cem/ctest/ct* + the test programs. + lang/cem/lint a C program checker. + lang/cem/lint/lpass1 + the first pass of lint. + lang/cem/lint/lpass1.ansi + the first pass of lint, this time for ANSI C. + lang/cem/lint/lpass2 + the second pass of lint, shared between ANSI C and "old- + fashioned" C. + lang/cem/lint/llib + programs for producing lint libraries. + lang/basic the Basic front-end. + lang/basic/src + the compiler proper. + lang/basic/lib + the Basic run-time library source. + lang/basic/test + various Basic programs. + lang/occam the occam front-end. + lang/occam/comp + the compiler proper. + lang/occam/lib + source of occam run-time system (in EM or C). + lang/occam/test + some occam programs. + lang/m2 the Modula-2 front-end. + lang/m2/comp the compiler proper. + lang/m2/libm2 source of Modula-2 run-time system (in EM, C and Modula-2). + lang/m2/m2mm the Modula-2 makefile generator. + lang/m2/test some Modula-2 example programs. + lang/fortran the Fortran front-end (translates Fortran into C). This com- + piler is not a part of ACK, but is included because it adds + + + + September 10, 2002 + + + + + + - 6 - + + + another language. The Fortran system carries the following + copyright notice: + + /************************************************************** + Copyright 1990, 1991 by AT&T Bell Laboratories and Bellcore. + + Permission to use, copy, modify, and distribute this software + and its documentation for any purpose and without fee is hereby + granted, provided that the above copyright notice appear in all + copies and that both that the copyright notice and this + permission notice and warranty disclaimer appear in supporting + documentation, and that the names of AT&T Bell Laboratories or + Bellcore or any of their entities not be used in advertising or + publicity pertaining to distribution of the software without + specific, written prior permission. + + AT&T and Bellcore disclaim all warranties with regard to this + software, including all implied warranties of merchantability + and fitness. In no event shall AT&T or Bellcore be liable for + any special, indirect or consequential damages or any damages + whatsoever resulting from loss of use, data or profits, whether + in an action of contract, negligence or other tortious action, + arising out of or in connection with the use or performance of + this software. + **************************************************************/ + lang/fortran/comp + the compiler proper. + lang/fortran/lib + source of Fortran runtime system and libraries. + fast contains sub-directories for installing the fast ACK compati- + ble compilers. + fast/driver + contains the sources of the fast ACK compatible compiler + drivers. + fcc contains the fast cc-compatible C compiler for SUN-3 and VAX. + util contains directories with sources for various utilities. + util/ack the program used for translation with the Kit. + util/opt the EM peephole optimizer (*.k => *.m). + util/ego the global optimizer. + util/topgen the target optimizer generator. + util/misc decode (*.[km] => *.e) + encode (*.e => *.k). + util/data the C-code for $TARGET_HOME/lib.bin/em_data.a. These sources + are created by the Makefile in `etc`. + util/ass the EM assembler (*.[km] + libraries => e.out). + util/arch the archivers to be used for all EM utilities. + util/cgg a program needed for compiling backends. + util/ncgg a program needed for compiling the newest backends. + util/cpp the C preprocessor. + util/shf various shell files. + util/LLgen the extended LL(1) parser generator. + util/amisc contains some programs handling ACK a.out format, such as + anm, asize. + util/cmisc contains some programs to help in resolving name conflicts, + and a dependency generator for makefiles. + + + + September 10, 2002 + + + + + + - 7 - + + + util/led the ACK link-editor, reading ACK relocatable a.out format, + and writing ACK a.out format. + util/int an EM interpreter, written in C. Very useful for checking out + software, but slow. + util/ceg code expander generator. + util/grind a symbolic debugger. + util/byacc this is Berkeley yacc, in the public domain. + util/flex this is a replacement for lex. It carries the following copy- + right notice: + + Copyright (c) 1990 The Regents of the University of California. + All rights reserved. + + This code is derived from software contributed to Berkeley by + Vern Paxson. + + The United States Government has rights in this work pursuant + to contract no. DE-AC03-76SF00098 between the United States + Department of Energy and the University of California. + + Redistribution and use in source and binary forms are permitted + provided that: (1) source distributions retain this entire + copyright notice and comment, and (2) distributions including + binaries display the following acknowledgement: ``This product + includes software developed by the University of California, + Berkeley and its contributors'' in the documentation or other + materials provided with the distribution and in all advertising + materials mentioning features or use of this software. Neither the + name of the University nor the names of its contributors may be + used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE. + + All path names mentioned in the text of this document are relative to + $SRC_HOME, unless they start with '/' or one of $SRC_HOME, $TARGET_HOME or + $CONFIG. + + _3. _R_e_s_t_o_r_i_n_g _t_h_e _A_C_K _t_r_e_e + + The process of installing the Amsterdam Compiler Kit is quite simple. + The first step is to restore the Amsterdam Compiler Kit distribution tree + structure. Proceed as follows + + - Create a directory, for example /usr/share/local/src/ack, on a + device with at least 15 Megabytes left. This directory will be + $SRC_HOME. + + - Change to that directory (cd ...). + + - Extract all files from the distribution medium, for instance + + + + September 10, 2002 + + + + + + - 8 - + + + magtape: ttttaaaarrrr xxxx. + + - Keep a copy of the original distribution to be able to repeat the + process of installation in case of disasters. This copy is also + useful as a reference point for diff-listings. + + _4. _A_d_a_p_t_i_n_g _A_C_K _t_o _t_h_e _l_o_c_a_l _s_y_s_t_e_m + + Before compiling the sources in the Kit some installation dependent + actions have to be taken. Most of these are performed by an interactive + shell script in the file $_S_R_C__H_O_M_E/_f_i_r_s_t/_f_i_r_s_t. Calling this script should + be done from another directory, for instance an empty directory which will + later become $CONFIG. + + The actions of the _f_i_r_s_t script are: + + - Asking for the path names of the ACK source directory ($SRC_HOME), the + configuration directory ($CONFIG), and the ACK users directory + ($TARGET_HOME). About 5M are needed for the configuration tree. The + disk space needed for the ACK users tree depends on which front-ends + and back-ends are to be installed. For instance, on our SPARC systems + we have installed all languages and 6 back-ends, including the + system-independent part. This amounts to about 16M. On our SUN-3 sys- + tems, we have installed all front-ends and 5 back-ends, but only the + machine-dependent part. The machine-independent directories are sym- + bolic links to the SPARC ACK users tree. We also have the fast ACK + compilers installed on the SUN-3's. The total amount of disk-space + used is less than 8M. + - Asking for what type of system the binary tree must be produced for + and creating the shell script "ack_sys" in the Kit's bin directory. + Several utilities make use of "ack_sys" to determine the type of sys- + tem. The current choice is between: + + answer system type default machine + vax_bsd4_1a VAX11 + BSD4.1a vax4 + vax_bsd4_2 VAX11 + BSD4.2 vax4 + vax_sysV_2 VAX11 + System V.2 vax4 + i386 Intel 80386 system + Xenix System V i386 + sun3 Sun-3 Motorola 68020 workstation sun3 + sun2 Sun-2 Motorola 68010 workstation sun2 + m68_sysV_0 68000 + Uniplus System V.0 mantra + m68020 Motorola 68020 VME131 + System V/68 R2V2.1 m68020 + sparc Sun-4 or SPARC workstation running SunOs 4 sparc + sparc_solaris Sun-4 or SPARC workstation running Solaris 2 sparc_solaris + ANY Neither of the above ??? + + For some of these, the installation procedure has not been tested, as + we don't have them. For others, the installation procedure has only + been tested with earlier distributions, as we don't have those systems + anymore. However, the sun3 and sparc systems are known to behave rea- + sonably. The sparc_solaris system has only been tested with the GNU C + compiler, because we don't have the SUN C compiler (it is unbundled in + Solaris 2). The Sun systems should run SunOs Release 3.0 or newer. + The i386 choice may also be used for Intel 80386 or 80486 systems + + + + September 10, 2002 + + + + + + - 9 - + + + running UNIX System V Release 4. These systems are also able to run + Xenix System V binaries. If the target system is not on this list, + choose one that comes close. If none of them come close, use the + "ANY" choice. For ANY, any name can be used, but the Kit will not be + able to compile programs for the target system. See the section about + "compilation on a different machine". + - Setting the default machine for which code is produced to the local + type of system according to the table above. This in done in the file + "$TARGET_HOME/config/local.h". See also section 9.1. + - Asking for things that don't have to be installed. + - Producing a shell script called "INSTALL" that will take care of the + ACK installation process. + + _5. _C_o_m_p_i_l_i_n_g _t_h_e _K_i_t + The next step in the installation process is to run the "INSTALL" + shell-script. When using a Bourne-shell, type: + + sh INSTALL > INSTALL.out 2>&1 & + + When using a C-shell, type: + + sh INSTALL >& INSTALL.out & + + This shell-script performs the following steps: + + - Produce a configuration tree ($CONFIG), reflecting the structure of + the source tree. + - Produce Makefiles in $CONFIG. As mentioned before, compilations will + be done in the configuration tree, not in the source tree. Most con- + figuration directories will have Makefiles used to compile and install + the programs in that directory. All programs needed for compilation + and/or cross compilation with the Kit are installed in $TARGET_HOME by + these Makefiles. These Makefiles are produced from corresponding + files called "proto.make" in the source tree. In fact, the + "proto.make" files are almost complete Makefiles, except for some + macro definitions that are collected by the _f_i_r_s_t script. The + Makefiles adhere to a standard which is described in the section 9. + - Copy "Action" files to the configuration tree and editing them to + reflect the choices concerning the parts of ACK that have to be + installed. "Action" files are described below. + - Copy part of the source tree to the ACK users tree (include files, + manual pages, documentation, et cetera). + - Calling the "TakeAction" script. All these Makefiles do not have to + be called separately. We wrote a shell script calling the make's + needed to install the whole Kit. This script consists of the file + $SRC_HOME/TakeAction and a few files called Action in some configura- + tion directories. The Action files describe in a very simple form + which actions have to be performed in which directories. The default + action is to start "make install && make clean". The output of each + make is diverted to a file called "Out" in the same directory as the + make was started in. If the make was successful (return code 0) the + Out file is removed and the script TakeAction produces a small message + indicating that it succeeded in fulfilling its goal. If the make was + not successful (any other return code) the Out file is left alone for + + + + September 10, 2002 + + + + + + - 10 - + + + further examination and the script TakeAction produces a small message + indicating that it failed. + For some programs the scripts already know they can't be installed on + the local type of system. In that case they produce a message "Sorry, + ....." and happily proceed with further installation commands. + + + Installation of the Kit might take anything from a few hours to more than a + day, depending on the speed of the local machine and what must be + installed. + + If the installation succeeded, the Kit is ready to be used. Read section 6 + and the manuals provided with the Kit (in the $TARGET_HOME/man directory) + on how to use it. + + _5._1. _P_r_o_b_l_e_m_s + + _5._1._1. _o_n _U_n_i_s_o_f_t _m_6_8_0_0_0 _s_y_s_t_e_m_s. + + The Unisoft C compiler has a bug which impedes the correct translation + of the peephole optimizer. For a more detailed description of this + phenomenon see the file "$SRC_HOME/mach/m68k2/Unisoft_bug". (This observa- + tion was made in 1985 or so, so it is probably no longer true). + + _5._1._2. _w_i_t_h _b_a_c_k_e_n_d_s + + The backends for the PDP11, VAX, Motorola 68000 and 68020, SPARC, + Intel 8086, and Intel 80386 have been heavily used by ourselves and are + well tested. The backends for the other machines are known to run our own + test programs, but might reveal errors when more heavily used. + + _5._2. _A_n _e_x_a_m_p_l_e _o_u_t_p_u_t _o_f _T_a_k_e_A_c_t_i_o_n. + + + System definition -- done + EM definition library -- done + C utilities -- done + Flex lexical analyzer generator -- done + Yacc parser generator -- done + system-call interface module -- done + . + . + . + EM Global optimizer -- done + ACK archiver -- done + Program 'ack' -- done + Bootstrap for backend tables -- done + Bootstrap for newest form of backend tables -- done + . + . + . + C frontend -- done + ANSI-C frontend -- done + ANSI-C preprocessor -- done + + + + September 10, 2002 + + + + + + - 11 - + + + ANSI-C header files -- done + Failed for LINT C program checker, see lang/cem/lint/Out + Pascal frontend -- done + Basic frontend -- done + . + . + . + Vax 4-4 assembler -- done + Vax 4-4 backend -- done + Vax target optimizer -- done + ACK a.out to VAX a.out conversion program -- done + Sorry, Vax code expander library can only be made on vax* systems + Vax 4-4 EM library -- done + Vax 4-4 debugger support library -- done + Vax 4-4 etext,edata,end library -- done + Vax 4-4 systemcall interface -- done + . + . + . + + + The lines starting with "Sorry, " indicate that certain programs cannot be + translated on the local machine. The lines starting with "Failed for" + indicate that certain programs/libraries were expected to, but did not com- + pile. In this example, the installation of LINT failed. To repeat a cer- + tain part of the installation, look in the Action file, which resides in + the root of the configuration tree, for the directory in which that part is + to be found. If that directory contains an Action file issue the command + "sh $CONFIG/bin/TakeAction", otherwise type "make install". + + _6. _C_o_m_m_a_n_d_s + + The following commands are available in the $TARGET_HOME/bin directory + after compilation of the Kit: + + _a_c_k, _a_c_c, _a_b_c, _a_p_c, _o_c_m, _m_2, _f_2_c and their links + the names mentioned here can be used to compile Pascal, C, + etc... programs. Most of the links can be used to generate + code for a particular machine. See also the section about + "Machines". + + _a_r_c_h the archiver used for the EM- and universal assembler/loader. + + _a_a_l the archiver used for ACK objects. + + _e_m this program selects a interpreter to execute an e.out file. + Interpreters exist for PDP-11 and Motorola 68000 systems. + + _e_m_i_n_f_o_r_m the program to unravel the post-mortem information of the EM + interpretator for the PDP-11. + + _L_L_g_e_n the LL(1) parser generator. + + _a_c_k__s_y_s a shell script producing an identification of the target + + + + September 10, 2002 + + + + + + - 12 - + + + system. Used by some utilities to determine what is, and + what is not feasible on the target system. + + _m_a_r_c_h a shell script used while compiling libraries. + + _a_s_i_z_e, _a_n_m, _a_s_t_r_i_p + do the same as _s_i_z_e, _n_m and _s_t_r_i_p, but for ACK object format. + + _m_k_d_e_p a dependency generator for makefiles. + + _c_i_d, _p_r_i_d, _c_c_l_a_s_h + some utilities for handling name clashes in C programs. Some + systems have C-compilers with only 7 or 8 characters signifi- + cant in identifiers. + + _t_a_b_g_e_n a utility for generating character tables for C-programs. + + _i_n_t an EM interpreter. This one is written in C, and is very use- + ful for checking out programs. + + _g_r_i_n_d a source level debugger for C, ANSI-C, Modula-2 and Pascal. + + _a_f_c_c, _a_f_m_2, _a_f_p_c + these are ACK-compatible fast C, Modula-2 and Pascal com- + pilers, available for M68020, VAX and Intel 80386 systems. + They compile very fast, but produce slow code. + + _f_c_c this is a cc-compatible fast C compiler, available on SUN-3 + and VAX systems. It compiles very fast, but produces slow + code. + + We currently make the Kit available to our users by telling them that they + should include the $TARGET_HOME/bin directory in their PATH shell variable. + The programs will still work when moved to a different directory or linked + to. Copying should preferably be done with tar, since links are heavily + used. Renaming of the programs linked to _a_c_k will not always produce the + desired result. This program uses its call name as an argument. Any call + name not being _c_c, _a_c_c, _a_b_c, _p_c, _f_2_c, _o_c_m, _m_2, or _a_p_c will be interpreted + as the name of a 'machine description' and the program will try to find a + description file with that name. The installation process will only touch + the utilities in the $TARGET_HOME/bin directory, not copies of these utili- + ties. + + _7. _M_a_c_h_i_n_e_s + + Below is a table with entries for all commands in the bin directory + used to (cross)compile for a particular machine. The name in the first + column gives the name in the bin directory. The column headed dir indi- + cates which subdirectories of $TARGET_HOME/lib and/or $TARGET_HOME/lib.bin + are needed for compilation. The column head i/p contains the integer and + pointer size used in units of bytes. The subdirectories with the same name + in mach contain the sources. A * in the column headed 'fp' indicates that + floating point can be used for that particular machine. A + in that column + indicates that floating point is available under the '-fp' option. In this + + + + September 10, 2002 + + + + + + - 13 - + + + case, software floating point emulation is used. + + command system i/p languages fp dir remarks + + pdp PDP/UNIX V7 2/2 C * pdp + Pascal + Basic + occam + Modula-2 + + vax4 VAX/BSD 4.? 4/4 C * vax4 + System V.2 Pascal + Basic + occam + Modula-2 + Fortran + + sparc Sun-4 4/4 C * sparc + Pascal + Basic + occam + Modula-2 + Fortran + + sparc_solaris Sun-4 4/4 C * sparc_solaris + Pascal + Basic + occam + Modula-2 + Fortran + + m68k2 M68000/ 2/4 C + m68k2 + Unisoft Pascal + Basic + occam + Modula-2 + + m68k4 M68000/ 4/4 C + m68k4 + Unisoft Pascal m68k2 + Basic + occam + Modula-2 + Fortran + + pmds M68000/ 2/4 C + pmds Philips Micro + PMDS Pascal m68k2 Devel. System + Basic + occam + Modula-2 + + pmds4 M68000/ 4/4 C + pmds4 Philips Micro + PMDS Pascal m68k2 Devel. System + Basic m68k4 + occam + + + + September 10, 2002 + + + + + + - 14 - + + + Modula-2 + Fortran + + mantra M68000/ 4/4 C + mantra + Sys V.0 Pascal m68k2 + Basic m68k4 + occam + Modula-2 + Fortran + + m68020 M68020/ 4/4 C + m68020 + Sys V/68 R2V2.1 Pascal + Basic + occam + Modula-2 + Fortran + + sun3 Sun-3 R4.1 4/4 C + sun3 + Pascal m68020 + Basic + occam + Modula-2 + Fortran + + sun2 Sun-2 R3.0 4/4 C + sun2 + Pascal m68k4 + Basic m68k2 + occam + Modula-2 + Fortran + + i86 IBM PC/IX 2/2 C + i86 IBM PC with PC/IX + Pascal Causes kernel crashes + Basic + occam + Modula-2 + + xenix3 Microsoft 2/2 C + xenix3 IBM AT with Xenix + Xenix V3 Pascal i86 + Basic + occam + Modula-2 + + i386 SCO Xenix 4/4 C + i386 Intel 80386 + System V Pascal Xenix System V + Basic + occam + Modula-2 + Fortran + + minix Minix PC 2/2 C + minix IBM PC running Minix + Pascal i86 + Basic + occam + + + + September 10, 2002 + + + + + + - 15 - + + + Modula-2 + + minixST ST Minix 2/4 C + minixST Atari ST running Minix + Pascal m68k2 + Basic + occam + Modula-2 + + z8000 Zilog 8000 2/2 C z8000 Central Data + Pascal CPU board + Basic Assembler/loader + occam + Modula-2 + + em22 EM machine 2/2 C * em22 Needs interpreter + Pascal + Basic + occam + Modula-2 + + em24 EM machine 2/4 C * em24 Needs interpreter + Pascal + Basic + occam + Modula-2 + + em44 EM machine 4/4 C * em44 Needs interpreter + Pascal + Basic + occam + Modula-2 + Fortran + + 6500 6502/BBC 2/2 C 6500 Assembler/loader + Pascal + Basic + occam + Modula-2 + + 6800 Bare 6800 6800 Assembler only + + 6805 Bare 6805 6805 Assembler only + + 6809 Bare 6809 6809 Assembler only + + ns Bare NS16032 4/4 C ns + Pascal + Basic + occam + Modula-2 + Fortran + + i80 Hermac/z80 2/2 C i80 + Pascal + + + + September 10, 2002 + + + + + + - 16 - + + + Basic + occam + Modula-2 + + z80 Hermac/z80 2/2 C z80 _i_8_0 is faster + Pascal + Basic + occam + Modula-2 + + s2650 Signetics s2650 Assembler only + + arm Acorn 4/4 C * arm Assembler/loader + Archimedes Pascal + Basic + occam + Modula-2 + Fortran + + + The commands eeeemmmm22222222, eeeemmmm22224444 and eeeemmmm44444444 produce e.out files with EM machine code + which must be interpreted. The Kit contains three interpreters: one run- + ning under PDP 11/V7 UNIX, one for the M68000, running under the PMDS sys- + tem, Sun systems, the Mantra system, etc, and a portable one, written in C. + The first one can only interpret 2/2 e.out files, the second takes 2/4 and + 4/4 files, and the last one takes 2/2, 2/4 and 4/4. The PDP 11 interpreter + executes floating point instructions. + + The program $$$$TTTTAAAARRRRGGGGEEEETTTT____HHHHOOOOMMMMEEEE////bbbbiiiinnnn////eeeemmmm calls the appropriate interpreter. The + interpreters are looked for in the em22, em24 and em44 subdirectories of + $TARGET_HOME/lib.bin. The third interpreter is available as the program + $$$$TTTTAAAARRRRGGGGEEEETTTT____HHHHOOOOMMMMEEEE////bbbbiiiinnnn////iiiinnnntttt in the bin directory. + + _8. _C_o_m_p_i_l_a_t_i_o_n _o_n _a _d_i_f_f_e_r_e_n_t _m_a_c_h_i_n_e. + + The installation mechanism of the Kit is supposed to be portable + across UNIX machines, so the Kit can be installed and used as a cross- + compiler for the languages it supports on any UNIX machine. The presence + of most UNIX utilities is essential for compilation. A few of the programs + certainly needed are: sh, C-compiler, sed, ed, make, and awk. + + _8._1. _B_a_c_k_e_n_d + + The existence of a backend with a system call library for the target + system is essential for producing executable files for that system. + Rewriting the system call library if the one supplied does not work on the + target system is fairly straightforward. If no backend exists for the tar- + get CPU type, a new backend has to be written which is a major undertaking. + + _8._2. _U_n_i_v_e_r_s_a_l _a_s_s_e_m_b_l_e_r/_l_o_a_d_e_r, _l_i_n_k _e_d_i_t_o_r + + For most machines, the description files in $TARGET_HOME/lib/*/descr + use our universal assembler and our link editor. The load file produced is + not directly usable in any system known to us, but has to be converted + + + + September 10, 2002 + + + + + + - 17 - + + + before it can be put to use. The _c_v programs convert our a.out format into + executable files. The _d_l programs present for some machines unravel our + a.out files and transmit commands to load memory to a microprocessor over a + serial line. The file $TARGET_HOME/man/man5/ack.out.5 contains a descrip- + tion of the format of the universal assembler load file. It might be use- + ful to those who wish or need to write their own conversion programs. + Also, a module is included to read and write our a.out format. See + $TARGET_HOME/man/man3/object.3. + + _9. _O_p_t_i_o_n_s + + _9._1. _D_e_f_a_u_l_t _m_a_c_h_i_n_e + + There is one important option in $TARGET_HOME/config/local.h. The + utility _a_c_k uses a default machine name when called as _a_c_c, _c_c, _a_b_c, _a_p_c, + _p_c, _o_c_m, _m_2, _f_2_c, or _a_c_k. The machine name used by default is determined + by the definition of ACKM in $TARGET_HOME/config/local.h. The Kit is dis- + tributed with "sun3" as the default machine, but the shell script "first" + in the directory "first" alters this to suit the target system. There is + nothing against using the Kit as a cross-compiler and by default produce + code that can't run on the local system. + + _9._2. _P_a_t_h_n_a_m_e_s + + Absolute path names are concentrated in + "$TARGET_HOME/config/em_path.h". Only the utilities _a_c_k, _f_l_e_x, and _L_L_g_e_n + use absolute path names to access files in the Kit. The tree is distri- + buted with /usr/em as the working directory. The definition of EM_DIR in + em_path.h should be altered to specify the root directory for the Compiler + Kit binaries on the local system ($TARGET_HOME). This is done automati- + cally by the shell script "first" in the directory "first". Em_path.h also + specifies which directory should be used for temporary files. Most pro- + grams from the Kit do indeed use that directory although some remain stub- + born and use /tmp or /usr/tmp. + + The shape of the tree should not be altered lightly because most Makefiles + and the utility _a_c_k know the shape of the ACK tree. The knowledge of the + utility _a_c_k about the shape of the tree is concentrated in the files in the + directory $TARGET_HOME/lib/*/descr and $TARGET_HOME/lib/descr/*. + + _1_0. _M_a_k_e_f_i_l_e_s + + Most directories contain a "proto.make", from which a Makefile is + derived. Apart from commands applying to that specific directory these + files all recognize a few special commands. When called with one of these + they will apply the command to their own directory. The special commands + are: + + + install recompile and install all binaries and libraries. + Some Makefiles allow errors to occur in the programs + they call. They ignore such errors and notify the user + with the message "~....... error code n: ignored". + Whenever such a message appears in the output it can be + + + + September 10, 2002 + + + + + + - 18 - + + + ignored. + + cmp recompile all binaries and libraries and compare them + to the ones already installed. + + pr print the sources and documentation on the standard + output. + + opr make pr | opr + Opr should be an off-line printer daemon. On some sys- + tems it exists under another name e.g. lpr. The easi- + est way to call such a spooler is using a shell script + with the name opr that calls lpr. This script should + be placed in /usr/bin or $TARGET_HOME/bin or one of the + directories in the PATH environment variable. + + clean remove all files not needed for day-to-day use, that is + binaries not in $TARGET_HOME/bin or + $TARGET_HOME/lib.bin, object files etc. + + Example: + + make install + + given as command in a configuration directory will cause compilation of all + programs in the directory and copying of the results to the + $TARGET_HOME/bin and $TARGET_HOME/lib.bin directories. + + _1_1. _T_e_s_t_i_n_g + + Test sets are available in Pascal, C, Basic and EM assembly: + + EM the directory $SRC_HOME/emtest contains a few EM test programs. + The EM assembly files in these tests must be transformed into load + files. These tests use the LIN and NOP instructions to mark the + passing of each test. The NOP instruction prints the current line + number during the test phase. Each test notifies its correctness + by calling LIN with a unique number followed by a NOP which prints + this line number. The test finishes normally with 0 as the last + number printed In all other cases a bug showed its existence. + + Pascal the directory $SRC_HOME/lang/pc/test contains a few Pascal test + programs. All these programs print the number of errors found and + a identification of these errors. + + We also tested Pascal with the Validation Suite. The Valida- + tion Suite is a collection of more than 200 Pascal programs, + designed by Brian Wichmann and Arthur Sale to test Pascal com- + pilers. We are not allowed to distribute it, but a copy may be + requested from + + + + + + + + September 10, 2002 + + + + + + - 19 - + + + + Richard J. Cichelli + A.N.P.A. + 1350 Sullivan Trail + P.O. Box 598 + Easton, Pennsylvania 18042 + USA + + + C the sub-directories in $SRC_HOME/lang/cem/ctest contain C test pro- + grams. The idea behind these tests is: if there is a program + called xx.c, compile it into xx.cem. Run it with standard output + to xx.cem.r, compare this file to xx.cem.g, a file containing the + 'ideal' output. Any differences will point to implementation + differences or bugs. Giving the command "run gen" or plain "run" + starts this process. The differences will be presented on standard + output. The contents of the result files depend on the word size, + the xx.cem.g files on the distribution are intended for a 32-bit + machine. + + Basic the directory $SRC_HOME/lang/basic/test contains some forty Basic + programs. Not all of these programs are correct, some have syntac- + tic errors, some simply don't work. The Makefile in that directory + attempts to compile and run these tests. If it compiles its output + is compared to a file with suffix .g which contains the output to + be expected. The make should be started with its standard input + diverted to /dev/null. An example of the output of a make is + present in the file Out.std. + + _1_2. _D_o_c_u_m_e_n_t_a_t_i_o_n + + After installation, the manual pages for Amsterdam Compiler Kit can be + found in the $TARGET_HOME/man directory. Also, the following documents are + provided in the $TARGET_HOME/doc directory: + + toolkit.doc general overview (CACM article) + em.doc description of the EM machine architecture + ack.doc format of machine description files (lib/*/descr) + ansi_C.doc ANSI C implementation description + basic.doc Basic reference manual + pcref.doc Pascal-frontend reference manual + val.doc results of running the Pascal Validation Suite + crefman.doc C-frontend description + LLgen description of the LL(1) parser generator + peep.doc internal documentation for the peephole optimizer + cg.doc documentation for backend writers and maintainers + regadd.doc addendum to previous document describing register variables + ncg.doc documentation for the newest backends + v7bugs.doc bugs in the V7 system and how to fix them + 6500.doc MSC 6500 backend description + i80.doc Intel 8080 backend description + z80.doc Zilog Z80 backend description + m68020.doc Motorola M68000/M68020 backend description + sparc.doc SPARC code expander description + + + + September 10, 2002 + + + + + + - 20 - + + + occam.doc occam-frontend description + ego.doc Global Optimizer description + top.doc Target Optimizer description + int.doc description of the EM interpreter written in C + ceg.doc documentation for code-expander writers and maintainers + lint.doc documentation of LINT + m2ref.doc Modula-2 frontend description + install.doc this document + install.pr this document (formatted for a simple line printer) + + + Use the Makefile to get readable copies. + + Good luck. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + September 10, 2002 + + diff --git a/doc/int/Makefile b/doc/int/Makefile new file mode 100755 index 0000000..4496bd6 --- /dev/null +++ b/doc/int/Makefile @@ -0,0 +1,10 @@ +# $Id: Makefile,v 2.6 1994/06/24 10:05:07 ceriel Exp $ + +DOC = draw.mac cover txt1 txt2 txt3 appA appB bib + +FLS = README proto.make Makefile $(DOC) + +.distr: Makefile + echo $(FLS) | tr ' ' '\012' >.distr + +distr: .distr diff --git a/doc/int/README b/doc/int/README new file mode 100644 index 0000000..b31b409 --- /dev/null +++ b/doc/int/README @@ -0,0 +1,4 @@ +# $Id: README,v 2.2 1994/06/24 10:05:10 ceriel Exp $ + +This directory contains the text of the documentation for the +Production Quality Interpreter "int". diff --git a/doc/int/appA b/doc/int/appA new file mode 100644 index 0000000..5470585 --- /dev/null +++ b/doc/int/appA @@ -0,0 +1,280 @@ +.\" List of all warnings; source of warn_msg and warn.h +.\" +.\" $Id: appA,v 2.3 1994/06/24 10:05:13 ceriel Exp $ +.\" +.\" This file contains the warnings issued by the interpreter, together +.\" with their names and values in the code of the interpreter. Some of +.\" the source files of the interpreter are generated from the Wn +.\" macros in this file. +.\" When modifying this file, preserve the parameters of the Wn macros. +.de Wn \" +.IP \\$3. 7 +.B "\\$1" +.br +.. Wn +.bp +.DS C +APPENDIX A +.DE +.SH +List of Warnings. +.PP +The shadow-byte administration makes it possible to check for a +wide range of errors during run-time. +We have tried to make the diagnostics self-explanatory and especially useful +for the C-programmer. +The warnings are printed in the message file, together with source file +and line number. +The complete list of warnings is presented here, followed by an +explanation of what might be wrong. +Often, these explanations implicitly assume that the program +being interpreted, was originally written in C (and not Pascal, Basic etc.). +.LP +.I "Reading the load file" +.Wn "Floating point instructions flag in header ignored" WFLUSED 1 +.Wn "No float initialisation in this version" WFLINIT 2 +The interpreter was compiled with the NOFLOAT option; code involving +floating point operations can be run as long as the actual +instructions are avoided. +.Wn "Extra-test flag in header ignored" WEXTRIGN 4 +The interpreter already tests anything conceivable. +.Wn "Maximum line number in header was 0" WNLINEZR 5 +This number could be used to allocate tables for tallying; these tables are, +however, expanded as needed, so the number is immaterial. +.Wn "Bad float initialisation" WBADFLOAT 7 +The loadfile contains a floating point denotation which does not +satisfy the syntax (see 2.6). +Examining the loadfile (with \fBod \-c\fP) might show the syntax error. +Probably there is a bug in the front-end, creating floats with +a bad syntax. +.LP +.I "System calls" +.Wn "IOCTL \- bad or unimplemented request" WBADIOCTL 11 +The second parameter to the ioctl() request (the operation code) is invalid or +not implemented; since there are many different opcodes on the various UNIX +systems, it is difficult to tell which. The system call fails. +.Wn "MPXCALL \- not (yet) implemented" WMPXIMP 14 +.Wn "PROFIL \- not (yet) implemented" WPROFILIMP 15 +.Wn "PTRACE \- not (yet) implemented" WPTRACEIMP 16 +The monitor calls \fImpxcall()\fP, \fIprofil()\fP and \fIptrace()\fP +have not been implemented. The monitor call fails. +.Wn "Inaccessible memory in system call" WMONFLT 21 +Bad pointers passed to system calls do not cause a memory fault (which in UNIX +would happen to the kernel), but cause the system call to fail with the UNIX +variable errno set to 14 (EFAULT). It seems likely that the program is at +fault, but there is also a good possibility that a library routine made +unwarranted assumptions about word size and pointer size. +.Wn "READ \- buffer resides in unallocated memory" WRUMEM 23 +.Wn "READ \- buffer across global data area and heap" WRGDAH 24 +When the buffer passed to the read() system call is situated (completely +or partially) in unallocated memory (beyond \fIHP\fP) or begins +in the global data area and ends in the heap, the appropriate warning +is given. +The buffer is not written. +.Wn "WRITE \- buffer resides in unallocated memory" WWUMEM 25 +.Wn "WRITE \- buffer across global data area and heap" WWGDAH 26 +.Wn "WRITE \- (part of) global buffer is undefined" WWGUNDEF 27 +.Wn "WRITE \- (part of) local buffer is undefined" WWLUNDEF 28 +The first two are equivalent to the READ-errors above. +Writing out a buffer usually makes no sense when the contents are undefined, +so one of the latter two warnings will be generated in this case. +A global buffer resides in the data partition; a local buffer resides in +the stack partition. +This corresponds to global and local variables in a C-program. +In the first two cases the WRITE is not performed, in the latter two cases +it is. +.LP +.I "Traps and signals" +.Wn "SIGTRP \- bad signo argument" WILLSN 31 +The \fIsigtrp()\fP monitor call allows \fIsig_no\fP arguments in the +range [1..17] (UNIX Version 7 signals); the actual argument is out of range. +.Wn "SIGTRP \- signo argument is a synchronous trap" WUNIXTR 32 +The signal is one that can only be caused synchronously by the running program +on UNIX; it cannot occur to an interpreted program. +.Wn "SIGTRP \- bad trapno argument" WILLTN 33 +The \fIsigtrp()\fP monitor call allows \fItrap_no\fP arguments between 0 and +252, and the special values \-2 and \-3; the actual argument is not one of +these. +.Wn "Heap overflow due to command line limitation" WEHEAP 36 +.Wn "Stack overflow due to command line limitation" WESTACK 37 +The maximum sizes of the heap and the stack can be limited by options on the +command line. If overflow occurs due to such limitations, the corresponding +trap is taken, preceded by one of the above warnings. If the memory of the +interpreter itself is exhausted, a fatal error follows. +.LP +.I "Run-time type checking" +.Wn "Local character expected" WLCEXP 41 +.Wn "Global character expected" WGCEXP 42 +.Wn "Local integer expected" WLIEXP 43 +.Wn "Global integer expected" WGIEXP 44 +.Wn "Local float expected" WLFEXP 45 +.Wn "Global float expected" WGFEXP 46 +.Wn "Local data pointer expected" WLDPEXP 47 +.Wn "Global data pointer expected" WGDPEXP 48 +.Wn "Local instruction pointer expected" WLIPEXP 49 +.Wn "Global instruction pointer expected" WGIPEXP 50 +In general, a type violation has taken place when one of +these warnings is given. +The \fBfloat\fP- and \fBinstruction pointer\fP warnings are rare and will +usually be easy traceable. +\fBInteger/character expected\fP will normally occur when unsigned arithmetic +is performed on datapointers or when memory containing objects other than +integers is copied bytewise. +Often, this warning is followed by a warning \fBdatapointer expected\fP. +This is due to our decision of transforming pointers to (unsigned) integers +after doing unsigned arithmetic on them. +When such a transformed integer is dereferenced (as if it were a pointer) +or, in general, when it is treated as a pointer, this results in a warning. +The present library implementation of malloc() causes such a +sequence of errors. +.LP +These messages are always followed by a tentative description of what is found +in memory at the offending place. +.Wn "Actual memory is undefined" WWASUND 61 +.Wn "Actual memory contains an integer" WWASINT 62 +.Wn "Actual memory contains a float" WWASFLOAT 63 +.Wn "Actual memory contains a data pointer" WWASDATAP 64 +.Wn "Actual memory contains an instruction pointer" WWASINSP 65 +.Wn "Actual memory contains mixed information" WWASMISC 66 +If the contents of the area was undefined, +check the source code for an uninitialized variable of the mentioned type. +Officially, the use of an undefined value +should result in a EIUND or EFUND trap but the occurrence is +so common that a warning is more appropriate. +The contents of memory are described as mixed if the data consists of pieces +of different types. This happens, e.g., when caller and callee do not agree on +the types and lengths of the parameters. +.LP +.I "Protection" +.br +.Wn "Destroying contents of ROM (at or near loc 0)" WDESROM 71 +The program stores a value in Read-Only Memory; the only ROM in the present +implementation is the area near location 0. The warning probably results from +storing under a NULL pointer. This is only a warning, the store operation is +executed normally. Reads from location 0 are not detected. +.Wn "Destroying contents of Return Status Block" WDESRSB 72 +The Return Status Block is the stack area containing the return address, the +dynamic link, etc. +This may or may not be an error. +The current implementation of \fIsetjmp()\fP/\fIlongjmp()\fP +may be responsible for it. +If the program does not use setjmp(), there \fIis\fP something +very wrong (e.g. argument for ASP too large). +Note that there are some library routines (such as \fIalarm()\fP) which +use \fIsetjmp()\fP. +.Wn "Logical operation using undefined operand(s)" WUNLOG 81 +.Wn "Comparing undefined operand(s)" WUNCMP 82 +The logical operations AND, XOR, IOR, COM and the compare operation +CMS do their jobs bytewise. +If one of the bytes is found to be undefined, the corresponding warning +is given, and the operation is stopped immediately. +The stack is adjusted so interpretation may continue. +.br +It is hard to say what went wrong. +Possibly, the argument of the instruction at hand (which indicates the +size of the objects to be compared), was too large. +.LP +.I "Bad operands" +.Wn "Shift over negative distance" WSHNEG 91 +.Wn "Shift over too large distance" WSHLARGE 92 +Shift instructions yield undefined results if the shift distance is negative +or larger than the object size. +.Wn "Pointer arithmetic yields pointer to bad segment" WSEGADP 93 +When doing pointer arithmetic (ADP, ADS), the operand and result pointer +must be in the same \fIsegment\fP (see sec. 4). +E.g. loading the address of the first local and adding 20 to it will +certainly give this warning. +.Wn "Subtracting pointers to different segments" WSEGSBS 94 +Pointers may be subtracted only if they point into the same segment. +.Wn "Pointer arithmetic with NULL pointer" WNULLPA 96 +By definition it is illegal to do arithmetic with null pointers. +Integers with the size of a pointer and the value zero are recognized +as NULL pointers. +A well-known C-trick to compute the offset of some field in a struct +is converting the null-pointer to the type of the struct and simply +taking the address of the field. +This trick will \-when translated and interpreted\- generate this warning +because it results in arithmetic with the NULL pointer. +.LP +.I "Return area" +.Wn "Returned function result too large" WRFUNLAR 101 +.Wn "Returned function result too small" WRFUNSML 102 +This warning is generated when the size of the expected return value +is not equal to the size actually returned. +.br +An interpreted program may have fallen through the end of +the code without explicitly doing an \fIexit()\fP or \fIreturn()\fP. +The start-up routine (\fIcrt0()\fP) however always expects to get some +value returned by the program proper. +.br +Another (less probable) possibility of course is that the code contains +a subroutine or function call that does not return properly (e.g. +it returns a short instead of a long). +.Wn "Returned function result may be garbled" WRFUNGAR 103 +This warning will be generated, when the contents of the FRA are fetched +after some instruction is executed which can mess up the area. +Compiler-generated loadfiles should not generate this message. +.LP +.I "Return Status Block" +.Wn "RET did not find a Return Status Block" WRETBAD 111 +.Wn "Used RET to return from a trap" WRETTRAP 112 +The RET instruction found a garbled Return Status Block, or on that resulted +from a trap. +.Wn "RTT did not find a Return Status Block" WRTTBAD 115 +.Wn "RTT on empty stack" WRTTEMPTY 116 +.Wn "Used RTT to return from a call" WRTTCALL 117 +.Wn "Used RTT to return from a non-returnable trap" WRTTNRTT 118 +The RTT (Return from Trap) instruction found a Return Status block that was not +created properly by a trap. +.Wn "Stack Pointer too large in RET" WRETSTL 121 +.Wn "Stack Pointer too small in RET" WRETSTS 122 +.Wn "Stack Pointer too large in RTT" WRTTSTL 125 +.Wn "Stack Pointer too small in RTT" WRTTSTS 126 +According to the EM Manual (4.2), "the value of SP just after the return +value has been popped must be the same as the +value of SP just before executing the first instruction of the +invocation." +If the Stack Pointer is too large, some dynamically allocated item or some +temporary result may have been left behind on the stack. +If the Stack Pointer is too small, some locals have been unstacked. +Since the interpreter has enough information in the Return Status Block, it +recovers correctly from these errors. +.LP +.I "Traps" +.LP +Some traps have ambiguous or non-obvious causes. +As far as possible, these are preceded by a warning, explaining the +circumstances of the trap. +.Wn "Trap ESTACK: DCH on bad LB" WDCHBADLB 131 +.Wn "Trap ESTACK: LPB on bad LB" WLPBBADLB 132 +.Wn "Trap ESTACK: SP retracted over Return Status Block" WSPGTLB 133 +.Wn "Trap ESTACK: SP moved into data area" WSPINHEAP 134 +.Wn "Trap ESTACK: SP set to non-word-boundary" WSPODD 135 +.Wn "Trap ESTACK: LB set out of stack" WLBOUT 136 +.Wn "Trap ESTACK: LB set to non-word-boundary" WLBODD 137 +.Wn "Trap ESTACK: LB set to position where there is no RSB" WLBRSB 138 +.Wn "Trap EHEAP: HP retracted into Global Data Area" WHPGDA 141 +.Wn "Trap EHEAP: HP pushed into stack" WHPSTACK 142 +.Wn "Trap EHEAP: HP set to non-word-boundary" WHPODD 143 +.Wn "Trap EILLINS: unknown opcode" WBADOPC 151 +.Wn "Trap EILLINS: conversion with unacceptable size for this machine" WILLCONV 152 +.Wn "Trap EILLINS: FIL with non-existing address" WILLFIL 153 +.Wn "Trap EILLINS: LFR with too large size" WILLLFR 154 +.Wn "Trap EILLINS: RET with too large size" WILLRET 155 +.Wn "Trap EILLINS: instruction argument of class c does not fit a word" WARGC 156 +.Wn "Trap EILLINS: instruction on double word on machine with word size 4" WARGD 157 +.Wn "Trap EILLINS: local offset too large" WARGL 158 +.Wn "Trap EILLINS: instruction argument of class g not in GDA" WARGG 159 +.Wn "Trap EILLINS: fragment offset too large" WARGF 160 +.Wn "Trap EILLINS: counter in lexical instruction out of range" WARGN 161 +.Wn "Trap EILLINS: non-existent procedure identifier" WARGP 162 +.Wn "Trap EILLINS: illegal register number" WARGR 163 +.Wn "Trap EBADPC: jump out of text segment" WPCOVFL 172 +.Wn "Trap EBADPC: jump out of procedure fragment" WPCPROC 173 +.Wn "Trap EBADGTO: GTO does not restore an existing RSB" WGTORSB 181 +.Wn "Trap EBADGTO: GTO descriptor on the stack" WGTOSTACK 182 +.Wn "Trap caused by TRP instruction" WTRP 191 +.ig +.Wn "Last warning" WMSG 199 +!Leave these lines here! +.. diff --git a/doc/int/appB b/doc/int/appB new file mode 100644 index 0000000..226c86e --- /dev/null +++ b/doc/int/appB @@ -0,0 +1,486 @@ +.\" A simple tutorial +.\" +.\" $Id: appB,v 2.3 1994/06/24 10:05:16 ceriel Exp $ +.\" +.bp +.DS +APPENDIX B +.DE +.SH +How to use the interpreter +.PP +The interpreter is not normally used for the debugging of programs under +construction. Its primary application is as a verification tool for almost +completed programs. Although the proper operation of the interpreter is +obviously a black art, this chapter tries to provide some guidelines. +.LP +For the sake of the argument, the source language is assumed to be C, but most +hints apply equally well to other languages supported by ACK. +.sp +.LP +.I "Initial measures" +.PP +Start with a test case of trivial size; to be on the safe side, reckon with a +time dilatation factor of about 500, i.e., a second grows into 10 minutes. +(The interpreter takes 0.5 msec to do one EM instruction on a Sun 3/50). +Fortunately many trivial test cases are much shorter than one second. +.PP +Compile the program into an \fIe.out\fP, the EM machine version of a +\fIa.out\fP, by calling \fIem22\fP (for 2-byte integers and 2-byte pointers), +\fIem24\fP (for 2 and 4) or \fIem44\fP (for 4 and 4) as seems appropriate; +if in doubt, use \fIem44\fP. These compilers can be found in the ACK +\fIbin\fP directory, and should be used instead of \fIacc\fP (or normal +.UX +\fIcc\fP). Alternatively, \fIacc \-memNN\fP can be used instead of +\fIemNN\fP. +.LP +If a C program consists of more than one file, as it usually does, there is +a small problem. The \fIacc\fP and \fIcc\fP compilers generate .o files, +whereas the \fIemNN\fP compilers generate .m files as object files. +A simple technique to avoid the problem is to call +.DS +em44 *.c +.DE +if possible. If not, the following hack on the \fIMakefile\fP generally works. +.IP \- +Make sure the \fIMakefile\fP is reasonably clean and complete: all calls to +the compiler are through \fI$(CC)\fP, \fICFLAGS\fP is used properly and all +dependencies are specified. +.IP \- +Add the following lines to the \fIMakefile\fP (possibly permanently): +.DS +\&.SUFFIXES: .o +\&.c.o: +\& $(CC) \-c $(CFLAGS) $< +.DE +.IP \- +Set CC to \fIem44 \-.c\fP (for example). Make sure CFLAGS includes +the \-O option; this yields a speed-up of about 15 %. +.IP \- +Change all .o to .m (or .k if the \-O option is not used). +.IP \- +If necessary, change \fIa.out\fP to \fIe.out\fP. +.PP +With these changes, \fImake\fP will produce an EM object; +\fIesize\fP can be used to verify that it is indeed an EM object and obtain some +statistics. Then call the interpreter: +.DS +int [ parameters ] +.DE +where the parameters are the normal parameters of the program. This should +work exactly like the original program, though slower. It reads from the +terminal if the original does, it opens and closes files like the original and +it accepts interrupts. +.sp +.LP +.I "Interpreting the results" +.PP +Now there are several possibilities. +.PP +It does all this. Great! This means the program +does not do very uncouth things. Now +read the file \fIint.mess\fP to see if any messages were generated. If there +are none, the program did not really run (perhaps the original cc \fIa.out\fP +got called instead?) Normally there is at least a termination message like +.DS +(Message): program exits with status 0 at "awa.p", line 64, INR = 4124 +.DE +This says that the program terminated through an exit(0) on line 64 of the +file \fIawa.p\fP after 4124 EM instructions. +If this is the only message it is time to move to a bigger test case. +.PP +On the other hand, the program may come to a grinding halt with an error +message. +All messages (errors and warnings) have a format in which the sequence +.DS +"", line +.DE +occurs, which is the same sequence many compilers produce for their error +messages. Consequently, the \fIint.mess\fP file can be processed as any +compiler message output. +.PP +One such message can be +.DS +(Fatal error) a.em: trap "Addressing non existent memory" not caught at "a.c", line 2, INR = 16 +.DE +produced by the abysmal program +.DS +main() { + *(int*)200000 = 1; +} +.DE +.LP +Often the effects are more subtle, however. The program +.DS +main() { + int *a, b = 777; + + b = *a; +} +.DE +produces the following five warnings (in far less than a second): +.DS +(Warning 47, #1): Local data pointer expected at "t.c", line 4, INR = 17 +(Warning 61, cont.): Actual memory is undefined at "t.c", line 4, INR = 17 +(Warning 102, #1): Returned function result too small at "", line 0, INR = 21 +(Warning 43, #1): Local integer expected at "exit.c", line 11, INR = 34 +(Warning 61, cont.): Actual memory is undefined at "exit.c", line 11, INR = 34 +.DE +The one about the function result looks the most frightening, +but is the most easily solved: +\fImain\fP is a function returning an int, so the start-up routine expects a +(four-byte) integer but gets an empty (zero-byte) return area. +.LP +\fINote\fP: The experts are divided about this. The traditional school holds +that \fImain\fP is an int function and its result is the return code; this +leaves them with two ways of supplying a return code: one as the parameter +of \fIexit()\fP and one as the result +of \fImain\fP. The modern school (Berkeley 4.2 etc.) claims that +return codes are supplied exclusively +by \fIexit()\fP, and they have an \fIexit(0)\fP in +the start-up routine, just after the call to \fImain()\fP; leaving \fImain()\fP +through the bottom implies successful termination. +.LP +We shall satisfy both groups by +.DS +main() { + int *a, b = 777; + + b = *a; + exit(0); +} +.DE +This results in +.DS +(Warning 47, #1): Local data pointer expected at "t.c", line 4, INR = 17 +(Warning 61, cont.): Actual memory is undefined at "t.c", line 4, INR = 17 +(Message): program exits with status 0 at "exit.c", line 11, INR = 33 +.DE +which is pretty clear as it stands. +.sp +.LP +.I "Using stack dumps" +.PP +Let's, for the sake of argument +and to avoid the fierce realism of 10000-line programs, assume that the above +still does not give enough information. +Since the error occurred in EM instruction number 17, we should like to see +more information around that moment. Call the interpreter again, now with the +shell variable AT set at 17: +.DS +int AT=17 t.em +.DE +(The interpreter has a number of internal variables that can be set by +assignments on the command line, like with \fImake\fP.) +This gives a file called \fIint.log\fP containing the +stack dump of 150 lines presented at the end of this chapter. +.PP +Since dumping is a subfacility of logging in the interpreter, the formats of +the lines are +the same. If a line starts with an @, it will contain a file-name/line-number +indication; the next two characters are the subject and the log +level. Then comes the information, preceded by a space. The text contains +three stack dumps, one before the offending instruction, one at it, and one +after it; then the interpreter stops. All kinds of other dumps can be +obtained, but this is default. +.PP +For each instruction we have, in order: +.IP \- +an @x9 line, giving the position in the program, +.IP \- +the messages, warnings and errors from the instruction as it is being executed, +.IP \- +dump(s), as requested. +.PP +The first two lines mean that at line 4 in file \fIt.c\fP the interpreter +performed its 16-th instruction, with the Program Counter at 30 pointing at +opcode 180 in the text segment; the instruction was an LOL (LOad Local) +with the operand \-4 derived from the opcode. It copies the local at offset +\-4 to the top of the stack. The effect can be seen from the subsequent stack +dump, where the undefined word at addresses 2147483568 to ...571 (the variable +\fIa\fP) has been copied to the top of the stack at 2147483560 (copying +undefined values does not generate a warning). +Since we used the \fIem44\fP compiler, all pointers and ints in our dump are +4 bytes long. +So a variable at address X in reality extends from address X to X+3. +.br +Note that this is not the offending instruction; this stack dump represents +the situation just before the error. +.PP +The stack consists of a sequence of frames, each containing data followed by +a Return Status Block resulting from a call; the last frame ends in +top-of-stack. The first frame represents the stack when the program starts, +through a call to the start-up routine. This routine prepares the second +stack frame with the actual parameters to \fImain()\fP: +\fIargc\fP at 2147483596, \fIargv\fP at 2147483600 and \fIenviron\fP at +2147483604. +.LP +The RSB line shows that the call to \fImain()\fP was made from procedure 0 +which has 0 locals, with PC at +16, an LB of 2147483608 and file name and line number still unknown. +The \fIcode\fP in the RSB tells how this RSB was made; possible values are STP +(start-up), CAL, RTT (returnable trap) and NRT (non-returnable trap). +.PP +The next frame shows the local variable(s) of \fImain()\fP; there are two of +them, the pointer \fIa\fP at 2147483568, which is undefined, and variable +\fIb\fP at 2147483564, which has the value 777. Then comes a copy of \fIa\fP, +just made by the LOL instruction, at 2147483560. The following line shows that +the Function Return Area (which does not reside at the end of the stack, but +just happens to be printed here) has size 0 and is presently undefined. +The stack dump ends +by showing that the Actuals Base is at 2147483596 (pointing at \fIargc\fP), the +Locals Base at 2147483572 (pointing just above the local \fIa\fP), the Stack +Pointer at 2147483560 (pointing at the undefined pointer), the line count is 4 +and the file name is "t.c". +.LP +(Notice that there is one more stack frame than one would probably expect, the +one above the start-up routine.) +.LP +The Function Return Area +could have a size larger than 0 and still be undefined, for +example when an instruction that does not preserve the contents of the FRA has +just been executed; likewise the FRA could have size 0 and be defined +nevertheless, for example just after a RET 0 instruction. +.PP +All this has set the scene for the distaster which is about to strike in the +next instruction. This is indeed a LOI (LOad Indirect) of size 4, opcode 169; +it causes the message +.DS +warning: Local data pointer expected [stack.c: 242] +.DE +and its continuation +.DS +warning cont.: Actual memory is undefined +.DE +(detected in the interpreter file \fIstack.c\fP at line 242; this can be +useful for sorting out dubious semantics). We see that the effect, as shown in +the third frame of this stack dump (at instruction number 17) is somewhat +unexpected: the LOI has fetched the value 4 and stacked it. The reason is +that, unfortunately, undefinedness is not transitive in the interpreter. When +an undefined value is used in an operation (other than copying) a warning is +given, but thereafter the value is treated as if it were zero. So, after the +warning a normal null pointer remains, which is then used to pick up the value +at location 0. This is the place where the EM machine stores its current line +number, which is presently 4. +.PP +The third stack dump shows the final effect: the value 4 has been unstacked +and copied to variable \fIb\fP at 2147483564 through an STL (STore Local) +instruction. +.PP +Since this form of logging dumps the stack only, the log file is relatively +small as dumps go. +Nevertheless, a useful excerpt can be obtained with the command +.DS +grep 'd1' int.log +.DE +This extracts the Return Status Block lines from the log, thus producing three +traces of calls, one for each instruction in the log: +.DS + d1 >> RSB: code = STP, PI = uninit, PC = 0, LB = 2147483644, LIN = 0, FIL = NULL + d1 >> RSB: code = CAL, PI = (0,0), PC = 16, LB = 2147483608, LIN = 0, FIL = NULL + d1 >> AB = 2147483596, LB = 2147483572, SP = 2147483560, HP = 848, LIN = 4, FIL = "t.c" + d1 >> RSB: code = STP, PI = uninit, PC = 0, LB = 2147483644, LIN = 0, FIL = NULL + d1 >> RSB: code = CAL, PI = (0,0), PC = 16, LB = 2147483608, LIN = 0, FIL = NULL + d1 >> AB = 2147483596, LB = 2147483572, SP = 2147483560, HP = 848, LIN = 4, FIL = "t.c" + d1 >> RSB: code = STP, PI = uninit, PC = 0, LB = 2147483644, LIN = 0, FIL = NULL + d1 >> RSB: code = CAL, PI = (0,0), PC = 16, LB = 2147483608, LIN = 0, FIL = NULL + d1 >> AB = 2147483596, LB = 2147483572, SP = 2147483564, HP = 848, LIN = 4, FIL = "t.c" +.DE +Theoretically, the pertinent trace is the middle one, but in practice all three +are equal. In the present case there isn't much to trace, but in real programs +the trace can be useful. +.sp +.LP +.I "Errors in libraries" +.PP +Since libraries are generally compiled with suppression of line number and +file name information, the line number and file name in the interpreter will +not be updated when it enters a library routine. Consequently, all messages +generated by interpreting library routines will seem to originate from the +line of the call. This is especially true for the routine malloc(), which, +from the nature of its business, often contains dubitable code. +.PP +A usual message is: +.DS +(Warning 43, #1): Local integer expected at "buff.c", line 18, INR = 266 +(Warning 64, cont.): Actual memory contains a data pointer at "buff.c", line 18, INR = 266 +.DE +and indeed at line 18 of the file buff.c we find: +.DS + buff = malloc(buff_size = BFSIZE); +.DE +This problem can be avoided by using a specially compiled version of the +library that contains the correct LIN and FIL instructions, or, less +elegantly, by including the source code of the library routines in the +program; in the latter case, one has to be sure to have them all. +.sp +.LP +.I "Unavoidable messages" +.br +Some messages produced by the logging are almost unavoidable; sometimes the +writer of a library routine is forced to take liberties with the semantics of +EM. +.LP +Examples from C include the memory allocation routines. +For efficiency reasons, one bit of an pointer in the administration is used as +a flag; setting, clearing and reading this bit requires bitwise operations on +pointers, which gives the above messages. +Realloc causes a problem in that it may have to copy the originally allocated +area to a different place; this area may contain uninitialised bytes. +.bp +.DS +.ft CW +@x9 "t.c", line 4, INR = 16, PC = 30 OPCODE = 180 +@L6 "t.c", line 4, INR = 16, DoLOLm(-4) + d2 + d2 . . STACK_DUMP[4/4] . . INR = 16 . . STACK_DUMP . . + d2 ---------------------------------------------------------------- + d2 ADDRESS BYTE ITEM VALUE SHADOW + d2 2147483643 0 (Dp) + d2 2147483642 0 (Dp) + d2 2147483641 0 (Dp) + d2 2147483640 40 [ 40] (Dp) + d2 2147483639 0 (Dp) + d2 2147483638 0 (Dp) + d2 2147483637 3 (Dp) + d2 2147483636 64 [ 832] (Dp) + d2 2147483635 0 (In) + d2 2147483634 0 (In) + d2 2147483633 0 (In) + d2 2147483632 1 [ 1] (In) + d1 >> RSB: code = STP, PI = uninit, PC = 0, LB = 2147483644, LIN = 0, FIL = NULL + d2 + d2 ADDRESS BYTE ITEM VALUE SHADOW + d2 2147483607 0 (Dp) + d2 2147483606 0 (Dp) + d2 2147483605 0 (Dp) + d2 2147483604 40 [ 40] (Dp) + d2 2147483603 0 (Dp) + d2 2147483602 0 (Dp) + d2 2147483601 3 (Dp) + d2 2147483600 64 [ 832] (Dp) + d2 2147483599 0 (In) + d2 2147483598 0 (In) + d2 2147483597 0 (In) + d2 2147483596 1 [ 1] (In) + d1 >> RSB: code = CAL, PI = (0,0), PC = 16, LB = 2147483608, LIN = 0, FIL = NULL + d2 + d2 ADDRESS BYTE ITEM VALUE SHADOW + d2 2147483571 undef + d2 | | | | | | + d2 2147483568 undef (1 word) + d2 2147483567 0 (In) + d2 2147483566 0 (In) + d2 2147483565 3 (In) + d2 2147483564 9 [ 777] (In) + d2 2147483563 undef + d2 | | | | | | + d2 2147483560 undef (1 word) + d2 FRA: size = 0, undefined + d1 >> AB = 2147483596, LB = 2147483572, SP = 2147483560, HP = 848, \e + LIN = 4, FIL = "t.c" + d2 ---------------------------------------------------------------- + d2 +@x9 "t.c", line 4, INR = 17, PC = 31 OPCODE = 169 +@w1 "t.c", line 4, INR = 17, warning: Local data pointer expected [stack.c: 242] +@w1 "t.c", line 4, INR = 17, warning cont.: Actual memory is undefined +@L6 "t.c", line 4, INR = 17, DoLOIm(4) + d2 + d2 . . STACK_DUMP[4/4] . . INR = 17 . . STACK_DUMP . . + d2 ---------------------------------------------------------------- + d2 ADDRESS BYTE ITEM VALUE SHADOW + d2 2147483643 0 (Dp) + d2 2147483642 0 (Dp) + d2 2147483641 0 (Dp) + d2 2147483640 40 [ 40] (Dp) + d2 2147483639 0 (Dp) + d2 2147483638 0 (Dp) + d2 2147483637 3 (Dp) + d2 2147483636 64 [ 832] (Dp) + d2 2147483635 0 (In) + d2 2147483634 0 (In) + d2 2147483633 0 (In) + d2 2147483632 1 [ 1] (In) + d1 >> RSB: code = STP, PI = uninit, PC = 0, LB = 2147483644, LIN = 0, FIL = NULL + d2 + d2 ADDRESS BYTE ITEM VALUE SHADOW + d2 2147483607 0 (Dp) + d2 2147483606 0 (Dp) + d2 2147483605 0 (Dp) + d2 2147483604 40 [ 40] (Dp) + d2 2147483603 0 (Dp) + d2 2147483602 0 (Dp) + d2 2147483601 3 (Dp) + d2 2147483600 64 [ 832] (Dp) + d2 2147483599 0 (In) + d2 2147483598 0 (In) + d2 2147483597 0 (In) + d2 2147483596 1 [ 1] (In) + d1 >> RSB: code = CAL, PI = (0,0), PC = 16, LB = 2147483608, LIN = 0, FIL = NULL + d2 + d2 ADDRESS BYTE ITEM VALUE SHADOW + d2 2147483571 undef + d2 | | | | | | + d2 2147483568 undef (1 word) + d2 2147483567 0 (In) + d2 2147483566 0 (In) + d2 2147483565 3 (In) + d2 2147483564 9 [ 777] (In) + d2 2147483563 0 (In) + d2 2147483562 0 (In) + d2 2147483561 0 (In) + d2 2147483560 4 [ 4] (In) + d2 FRA: size = 0, undefined + d1 >> AB = 2147483596, LB = 2147483572, SP = 2147483560, HP = 848, \e + LIN = 4, FIL = "t.c" + d2 ---------------------------------------------------------------- + d2 +@x9 "t.c", line 4, INR = 18, PC = 32 OPCODE = 229 +@S6 "t.c", line 4, INR = 18, DoSTLm(-8) + d2 + d2 . . STACK_DUMP[4/4] . . INR = 18 . . STACK_DUMP . . + d2 ---------------------------------------------------------------- + d2 ADDRESS BYTE ITEM VALUE SHADOW + d2 2147483643 0 (Dp) + d2 2147483642 0 (Dp) + d2 2147483641 0 (Dp) + d2 2147483640 40 [ 40] (Dp) + d2 2147483639 0 (Dp) + d2 2147483638 0 (Dp) + d2 2147483637 3 (Dp) + d2 2147483636 64 [ 832] (Dp) + d2 2147483635 0 (In) + d2 2147483634 0 (In) + d2 2147483633 0 (In) + d2 2147483632 1 [ 1] (In) + d1 >> RSB: code = STP, PI = uninit, PC = 0, LB = 2147483644, LIN = 0, FIL = NULL + d2 + d2 ADDRESS BYTE ITEM VALUE SHADOW + d2 2147483607 0 (Dp) + d2 2147483606 0 (Dp) + d2 2147483605 0 (Dp) + d2 2147483604 40 [ 40] (Dp) + d2 2147483603 0 (Dp) + d2 2147483602 0 (Dp) + d2 2147483601 3 (Dp) + d2 2147483600 64 [ 832] (Dp) + d2 2147483599 0 (In) + d2 2147483598 0 (In) + d2 2147483597 0 (In) + d2 2147483596 1 [ 1] (In) + d1 >> RSB: code = CAL, PI = (0,0), PC = 16, LB = 2147483608, LIN = 0, FIL = NULL + d2 + d2 ADDRESS BYTE ITEM VALUE SHADOW + d2 2147483571 undef + d2 | | | | | | + d2 2147483568 undef (1 word) + d2 2147483567 0 (In) + d2 2147483566 0 (In) + d2 2147483565 0 (In) + d2 2147483564 4 [ 4] (In) + d2 FRA: size = 0, undefined + d1 >> AB = 2147483596, LB = 2147483572, SP = 2147483564, HP = 848, \e + LIN = 4, FIL = "t.c" + d2 ---------------------------------------------------------------- + d2 +.DE diff --git a/doc/int/bib b/doc/int/bib new file mode 100644 index 0000000..8955340 --- /dev/null +++ b/doc/int/bib @@ -0,0 +1,25 @@ +.\" Bibliography +.\" +.\" $Id: bib,v 2.2 1994/06/24 10:05:20 ceriel Exp $ +.bp +.DS C +BIBLIOGRAPHY +.DE +.LP +[1] A.S. Tanenbaum, H. van Staveren, E.G. Keizer and J.W. Stevenson. +\fIDescription of a Machine Architecture for use with Block Structured +Languages\fP. VU Informatica Rapport IR-81, august 1983. +.LP +[2] E.G. Keizer. \fIAck description file reference manual.\fP +.LP +[3] K. Jensen and N. Wirth. +\fIPASCAL, User Manual and Report\fP. Springer Verlag. +.LP +[4] B.W. Kernighan and D.M. Ritchie. +\fIThe C Programming Language\fP. Prentice-Hall, 1978. +.LP +[5] D.M. Ritchie. \fIC Reference Manual\fP. +.LP +[6] \fIAmsterdam Compiler Kit, reference manual.\fP +.LP +[7] \fIUnix Programmer's Manual, 4.1BSD\fP. UCB, August 1983. diff --git a/doc/int/cover b/doc/int/cover new file mode 100644 index 0000000..78409c1 --- /dev/null +++ b/doc/int/cover @@ -0,0 +1,26 @@ +.\" Front page +.\" +.\" $Id: cover,v 2.2 1994/06/24 10:05:23 ceriel Exp $ +.TL +The EM Interpreter +.AU +Eddo de Groot +Leo van den Berge +Dick Grune +.AI +Faculteit Wiskunde en Informatica +Vrije Universiteit, Amsterdam +.AB +This document describes the implementation +and usage of a new interpreter for the EM machine language. +This interpreter implements the full EM machine +and can be helpful to people writing new front-ends. +Moreover, it can be used as a thorough testing and debugging +tool by anyone familiar with the EM language. +.PP +A list of all warnings is given in appendix A; appendix B is a simple +tutorial. +.AE +.PP +.pn 1 +.bp diff --git a/doc/int/draw.mac b/doc/int/draw.mac new file mode 100644 index 0000000..889df70 --- /dev/null +++ b/doc/int/draw.mac @@ -0,0 +1,24 @@ +.\" Macros for simple constant width drawings (uses font CW) +.\" +.\" $Id: draw.mac,v 2.2 1994/06/24 10:05:26 ceriel Exp $ +.de Dr \" Drawing $1 (size) +.sp 1 +.ne \\$1 +.na +.nf +.ft CW \" constant width font +.lg 0 \" no ligatures +.. +.de Df \" Drawing Footer +.sp 1 +.ft R +.ce 1000 +.lg 1 +.. +.de De \" Drawing End $1 (lines) +.Df \" if it has not happened yet +.ce +.ad +.fi +.sp \\$1 +.. diff --git a/doc/int/proto.make b/doc/int/proto.make new file mode 100755 index 0000000..c50179d --- /dev/null +++ b/doc/int/proto.make @@ -0,0 +1,18 @@ +# $Id: proto.make,v 1.2 1994/06/24 10:05:28 ceriel Exp $ + +#PARAMS do not remove this line! + +SRC_DIR = $(SRC_HOME)/doc/int +TBL=tbl + +DOC = $(SRC_DIR)/draw.mac \ + $(SRC_DIR)/cover \ + $(SRC_DIR)/txt1 \ + $(SRC_DIR)/txt2 \ + $(SRC_DIR)/txt3 \ + $(SRC_DIR)/appA \ + $(SRC_DIR)/appB \ + $(SRC_DIR)/bib + +$(TARGET_HOME)/doc/int.doc: $(DOC) + $(TBL) $(DOC) > $@ diff --git a/doc/int/txt1 b/doc/int/txt1 new file mode 100644 index 0000000..ff7e1fd --- /dev/null +++ b/doc/int/txt1 @@ -0,0 +1,215 @@ +.\" Introduction +.\" +.\" $Id: txt1,v 2.2 1994/06/24 10:05:31 ceriel Exp $ +.NH +INTRODUCTION. +.PP +This document describes an EM interpreter which does extensive checking. +The interpreter exists in two versions: the normal version with full checking +and debugging facilities, and a fast stripped version that does interpretation +only. +This document assumes that the full version is used. +.LP +First the virtual EM machine embodied by the interpreter (called \fBint\fP) is +described, followed by some remarks on performance. +The second section gives some specific implementation decisions. +Section three explains the usage of the built-in debugging tool. +.LP +Appendix A gives an overview of the various warnings \fBint\fP gives, +with possible causes and solutions. +Appendix B is a simple tutorial on the use of \fBint\fP. +A separate manual page exists. +.PP +The document assumes a good understanding of what EM is and what +the assembly code looks like [1]. +Notions like 'procedure descriptor', 'mini', 'shortie' etc. are not +explained. +In the sequel, any word in \fIthis font\fP refers to the name of a +variable, constant, function or whatever, used in the source code under +the same name. +.LP +To avoid confusion: \fBint\fP interprets EM machine language (e.out files), +\fInot\fP the assembly language (.e files) and \fInot\fP the compact +code (.k files). +.NH 2 +The virtual EM machine. +.PP +The memory layout of the virtual EM machine represented by the interpreter +differs in details from the description in [1]. +Virtual memory is split up into two separate spaces: +one space containing the instructions, +the other all the data, including stack and heap (D-space). +The procedure descriptors are preprocessed and stored in a separate array, +\fIproctab[]\fP. +Both spaces start off at address 0. +This is possible because pointers in the two different spaces are +distinguishable by context (and shadow-bytes: see 2.6). +.NH 3 +Instruction Space +.PP +Figure 1 shows the I-space, together with the position of some important +EM registers. +.Dr 12 + NEXT --> |________________| <-- DB \e + | | | + | | | T + | | <-- PC | + | Program | | e + | | | + | Text | | x + | | | + | | | t + 0 --> |________________| <--(PB) / +.Df +\fI Fig 1. Virtual instruction space (I-space).\fP +.De +.PP +The I-space is just big enough to contain all the instructions. +The size needed for the program text (\fINTEXT\fP) is found from the +header-bytes of the loadfile. +Legal values for the program counter (\fIPC\fP) consist of all +addresses in the range from 0 through \fINTEXT\fP \- 1. +If the \fIPC\fP is made to point to an illegal address, a trap will occur. +.NH 3 +The Procedure Table +.PP +The \fINProc\fP constant indicates how many procedure descriptors there +are in the proctab array. +Elements of this array contain for each procedure: the number of locals, the +entry point and the entry point of the textually following procedure. This is +used in testing the restriction that the program counter may not wander from +procedure to procedure. +.NH 3 +The Data Space +.PP +Figure 2 shows the layout of the data space, which closely conforms to the EM +Manual. +.Dr 36 + __________________ + maxaddr(psize) --> | | <-- ML \e + | | | S + | Locals | | t + | & | | a + | RSBs | | c + | | | k + |________________| <-- SP / + . . + . . + . Unused . + . . + . . + . . + . . + . . + . Unused . + . . + . . + |________________| <-- HP + | | \e + | Heap | | + |________________| <-- HB | + | | | D + | Arguments | | + | Environ | | a + | _ _ _ _ | | + | | | t + | | | + | | | a + | Global data | | + | | | + | | | + 0 --> |________________| <--(EB) / +.Df +\fI Fig 2. Virtual dataspace (D-space).\fP +.De +.PP +D-space begins at address 0, and ends at the largest address +representable by the pointer size (\fIpsize\fP) being used; +for a 2-byte pointer size this maximum address is +.DS +((2 ^ 16 \- 1) / word size * word size) \- 1 +.DE +for a 4-byte pointer size it is +.DS +((2 ^ 31 \- 1) / word size * word size) \- 1 +.DE +(not 2 ^ 32, to allow illegal pointers to be implemented in the future). The +funny rounding construction is required to make ML+1 expressible as the +initialisation value of LB and SP. +.PP +D-space is split into two partitions: Data and Stack (indicated by the +brackets). +The Data partition holds the global data area (GDA) and the heap. +Its initial size is given by the loadfile constant SZDATA. +Some space is added to it, because arguments and environment are +stored here also. +This total size is static while interpreting. +However, as the heap may grow during execution (e.g. caused by dynamic +allocation) this results in a variable size for the Data partition. +Initially, the size for the Data partition is the sum of the space needed +by the GDA (including the space needed for arguments and environment) and +the initial heapspace. +The lowest legal Data address is 0; the highest \fIHP\fP \- 1. +.LP +The Stack partition holds the stack. +It begins at the highest available D-space address, and grows +towards the low addresses, so the Stack partition is of variable size too. +The lowest legal Stack address is the stackpointer (\fISP\fP), +the highest is the memory limit (\fIML\fP). +.NH 2 +Physical lay-out +.PP +Each partition is mapped onto a piece of physical memory with the +same name: \fItext\fP (fig. 1), \fIstack\fP and \fIdata\fP (fig. 2). +These are the storage structures which \fBint\fP uses to physically +store the contents of the virtual EM spaces. +Figure 2 thus shows the mapping of D-space onto two +different physical parts: \fIstack\fP and \fIdata\fP. +The I-space is represented by one physical part: \fItext\fP. +.LP +Each time more space is needed, the actual partition is reallocated, +with the new size being computed with the formula: +.DS +\fInew size\fP = 1.5 \(mu (\fIold size\fP + \fIextra\fP) +.DE +\fIextra\fP is the number of bytes exceeding the \fIold size\fP. +One can prove that using this method, there is a +linear relationship between allocation time and needed partition size. +.PP +A virtual D-space starting at address 0 is in correspondence with +the definition in [1], p. 3\-6. +The main reason for having D-space start at address 0, is that it induces +a one-one correspondence between the heap \- and GDA +addresses on the virtual machine (and hence the definition) on one hand, +and the offset within the \fIdata\fP partition on the other. +This implies that no extra calculation is needed to perform load and +storage operations. +.LP +Some calculation however cannot be avoided, because the stack part of +the D-space grows downwards by EM definition. +The first address of the virtual stack (\fIML\fP, the maximum address for +the given \fIpsize\fP) is mapped onto the +beginning of the \fIstack\fP partition. +When the stack grows (i.e. EM addresses get lower), the offset within the +\fIstack\fP partition gets higher. +By taking offset \fIML \- A\fP in the stack partition, one obtains the +physical address corresponding to some virtual EM (stack) address \fIA\fP. +.NH 2 +Speed. +.PP +From several test results with both versions of the interpreter, the +following may be concluded. +The speed of the interpreter depends strongly on the type of +program being interpreted. +If plain CPU arithmetic is performed, the interpreter is +relatively slow (1000 \(mu the cc version). +When stack manipulation is at hand, the interpreter is +quite fast (100 \(mu the cc version). +.LP +Most programs however will not be this extreme, so an interpretation +time of somewhere between 300 and 500 times direct execution +for a normal program is to be expected. +.LP +The fast version runs in about 60% of the time of the full version, at the +expense of a considerably lower functionality. +Tallying costs about 10%. diff --git a/doc/int/txt2 b/doc/int/txt2 new file mode 100644 index 0000000..f9fad85 --- /dev/null +++ b/doc/int/txt2 @@ -0,0 +1,589 @@ +.\" Implementation details +.\" +.\" $Id: txt2,v 2.3 1994/06/24 10:05:35 ceriel Exp $ +.bp +.NH +IMPLEMENTATION DETAILS. +.PP +The pertinent issues are addressed below, in arbitrary order. +.NH 2 +Stack manipulation and start-up +.PP +It is not at all easy to start the EM machine with the stack in a reasonable +and consistent state. One reason is the anomalous value of the ML register +and another is the absence of a proper RSB. It may be argued that the initial +stack does not have to be in a consistent state, since the first instruction +proper is only executed after \fIargc\fP, \fIargv\fP and \fIenviron\fP +have been stacked (which takes care of the empty stack) and the initial +procedure has been called (which creates a RSB). We would, however, like to +preform the stacking of these values and the calling of the initial procedure +using the normal stack and call routines, which again require the stack to be +in an acceptable state. +.NH 3 +The anomalous value of the ML register +.PP +All registers in the EM machine point to word boundaries, and all of them, +except ML, address the even-numbered byte at the boundary. +The exception has a good reason: the even numbered byte at the ML boundary does +not exist. +This problem is not particular to EM but is inherent in the number system: the +number of N-digit numbers can itself not be expressed in an N-digit number, and +the number of addresses in an N-bit machine will itself not fit in an N-bit +address. The problem is solved in the interpreter by having ML point to the +highest word boundary that has bytes on either side; this makes ML+1 +expressible. +.NH 3 +The absence of an initial Return Status Block +.PP +When the stack is empty, there is no legal value for AB, since there are no +actuals; LB can be set naturally to ML+1. This is all right when the +interpreter starts with a call of the initial routine which stores the value +of LB in the first RSB, but causes problems when finally this call returns. We +want this call to return completely before stopping the interpreter, to check +the integrity of the last RSB; restoring information from it will, however, +cause illegal values to be stored in LB and AB (ML+1 and ML+1+rsbsize, resp.). +On top of this, the initial (illegal) Procedure Identifier of the running +procedure will be restored; then, upon restoring the likewise illegal PC will +cause a check to see if it still is inside the running procedure. After a few +attempts at writing special cases, we have decided that it is possible, but not +worth the effort; the final (= initial) RSB will not be unstacked. +.NH 2 +Floating point numbers. +.PP +The interpreter is capable of working with 4- and 8-byte floating point (FP) +numbers. +In C-terms, this corresponds to objects of type float and double respectively. +Both types fit in a C-double so the obvious way to manipulate these entities +internally is in doubles. +Pushing a 8-byte FP, all bytes of the C-double are pushed. +Pushing a 4-byte FP causes the 4 bytes representing the smallest fraction +to be discarded. +.PP +In EM, floats can be obtained in two different ways: via conversion +of another type, or via initialization in the loadfile. +Initialized floats are represented in the loadfile by an ASCII string in +the syntax of a Pascal real (signed \fPUnsignedReal\fP). +I.e. a float looks like: +.DS +[ \fISign\fP ] \fIDigit\fP+ [ . \fIDigit\fP+ ] [ \fIExp\fP [ \fISign\fP ] \fIDigit\fP+ ] (G1) +.DE +followed by a null byte. +Here \fISign\fP = {+, \-}; \fIDigit\fP = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; +\fIExp\fP = {e, E}; [ \fIAnything\fP ] means that \fIAnything\fP is optional; +and a + means one or more times. +To accommodate some loose code generators, the actual grammar accepted is: +.DS +[ \fISign\fP ] \fIDigit\fP\(** [ . \fIDigit\fP\(** ] [ \fIExp\fP [ \fISign\fP ] \fIDigit\fP+ ] (G2) +.DE +followed by a null byte. Here \(** means zero or more times. A floating +denotation which is in G2 but not in G1 draws a warning, one that is not even +in G2 causes a fatal error. +.LP +A string, representing a float which does not fit in a double causes a +warning to be given. +In that case, the returned value will be the double 0.0. +.LP +Floating point arithmetic is handled by some simple routines, checking for +over/underflow, and returning appropriate values in case of an ignored error. +.PP +Since not all C compilers provide floating point operations, there is a +compile time flag NOFLOAT, which, if defined, suppresses the use of all +fp operations in the interpreter. The resulting interpreter will still load +EM files with floats in the global data area (and ignore them) but will give a +fatal error upon attempt to execute a floating point instruction; consequently +code involving floating point operations can be run as long as the actual +instructions are avoided. +.NH 2 +Pointers. +.PP +The following sub-sections both deal with problems concerning pointers. +First, something is said about pointer arithmetic in general. +Then, the null-pointer problem is dealt with. +.NH 3 +Pointer arithmetic. +.PP +Strictly speaking, pointer arithmetic is defined only within a \fBfragment\fP. +From the explanation of the term fragment however (as given in [1], page 3), +it is not quite clear what a fragment should look like +from an interpreter's point of view. +For this reason we introduced the term \fBsegment\fP, +bordering the various areas within which pointer arithmetic is allowed. +Every stack-frame is a segment, and so are the global data area (GDA) and +the heap area. +Thus, the number of segments varies over time, and at some point in time is +given by the number of currently active stack-frames +(#CAL + #CAI \- #RET \- #RTT) plus 2 (gda, heap). +Pointers in the area between heap and stack (which is inaccessible by +definition), are assumed to be in the heap segment. +.PP +The interpreter, while building a new stack-frame (i.e. segment), stores the +value of the last ActualBase in a pointer-array (\fIAB_list[\ ]\fP). +When a pointer (say \fIP\fP) is available for arithmetic, the number +of the segment where it points (say \fIS\d\s-2P\s+2\u\fP), +is determined first. +Next, the arithmetic is performed, followed by a check on the number +of the segment where the resulting pointer \fIR\fP points +(say \fIS\d\s-2R\s+2\u\fP). +Now, if \fIS\d\s-2P\s+2\u != S\d\s-2R\s+2\u\fP, a warning is given: +\fBPointer arithmetic yields pointer to bad segment\fP. +.br +It may also be clear now, why the illegal area between heap and stack +was joined with the heap segment. +When calculating a new heap pointer (\fIHP\fP), one will obtain intermediate +results being pointers in this area just before it is made legal. +We do not want error messages all of the time, just because someone is +allocating space in the heap. +.LP +A similar treatment is given to the pointers in the SBS instruction; they have +to point into the same fragment for subtraction to be meaningful. +.LP +The length of the \fIAB_list[\ ]\fP is initially 100, +and it is reallocated in the same way the dynamically growing partitions +are (see 1.1). +.NH 3 +Null pointer. +.PP +Because the EM language lacks an instruction for loading a null pointer, +most programs solve this problem by loading a pointer-sized integer of +value zero, and using this as a null pointer (this is also proposed in [1]). +\fBInt\fP allows this, and will not complain. +A warning is given however, when an attempt is made to add something to a +null pointer (i.e. the pointer-sized integer zero). +.LP +Since many programming languages use a pointer to location 0 as an illegal +value, it is desirable to detect its use. +The big problem is though that 0 is a perfectly legal EM address; +address 0 holds the current line number in the source file. It may be freely +read but is written only by means of the LIN instruction. This allows us to +declare the area consisting of the line number and the file name pointer to be +read-only memory. Thus a store will be caught (and result in a warning) but a +read will succeed (and yield the EM information stored there). +.NH 2 +Function Return Area (FRA). +.PP +The Function Return Area (\fIFRA[\ ]\fP) has a default size of 8 bytes; +this default can +be overridden through the use of the \fB\-r\fP-option, but cannot be +made smaller than the size of two pointers, in accordance with the +remark on page 5 of [1]. +The global variable \fIFRASize\fP keeps track of how many bytes were +stored in the FRA, the last time a RET instruction was executed. +The LFR instruction only works when its argument is equal to this size. +If not, the FRA contents are loaded anyhow, but one of the following warnings +is given: +\fBReturned function result too large\fP (\fIFRASize\fP > LFR size) or +\fBReturned function result too small\fP (\fIFRASize\fP < LFR size). +.LP +Note that a C-program, falling through the end of its code without doing +a proper \fIreturn\fP or \fIexit()\fP, will generate this warning. +.PP +The only instructions that do not disturb the contents of the FRA are +GTO, BRA, ASP and RET. +This is expressed in the program by setting \fIFRA_def\fP to "undefined" +in any instruction except these four. +We realize this is a useless action most of the time, but a more +efficient solution does not seem to be at hand. +If a result is loaded when \fIFRA_def\fP is "undefined", the warning: +\fBReturned function result may be garbled\fP is generated. +.LP +Note that the FRA needs a shadow-FRA in order to store the shadow +information when performing a LFR instruction. +.NH 2 +Environment interaction. +.PP +The EM machine represented by \fBint\fP can communicate with +the environment in three different ways. +A first possibility is by means of (UNIX) interrupts; +the second by executing (relatively) high level system calls (called +monitor calls). +A third means of interaction, especially interesting for the debugging +programmer, is via internal variables set on the command line. +The former two techniques, and the way they are implemented will be described +in this section. +The latter has been allotted a separate section (3). +.NH 3 +Traps and interrupts. +.PP +Simple user programs will generally not mess around with UNIX-signals. +In interpreting these programs, the default actions will be taken +when a signal is received by the program: it gives a message and +stops running. +.LP +There are programs however, which try to handle certain signals +themselves. +In C, this is achieved by the system call \fIsignal(\ sig_no,\ catch\ )\fP, +which calls the handling routine \fIcatch()\fP, as soon as signal +\fBsig_no\fP occurs. +EM does not provide this call; instead, the \fIsigtrp()\fP monitor call +is available for mapping UNIX signals onto EM traps. +This implies that a \fIsignal()\fP call in a C-program +must be translated by the EM library routine to a \fIsigtrp()\fP call in EM. +.PP +The interpreter keeps an administration of the mapping of UNIX-signals +onto EM traps in the array \fIsig_map[NSIG]\fP. +Initially, the signals all have their default values. +Now assume a \fIsigtrp()\fP occurs, telling to map signal \fBsig_no\fP onto +trap \fBtrap_no\fP. +This results in: +.IP 1. +setting the relevant array element +\fIsig_map[sig_no]\fP to \fBtrap_no\fP (after saving the old value), +.IP 2. +catching the next to come \fBsig_no\fP signal with the handling routine +\fIHndlEMSig\fP (by a plain UNIX \fIsignal()\fP of course), and +.IP 3. +returning the saved map-value on the stack so the user can know the previous +trap value onto which \fBsig_no\fP was mapped. +.LP +On an incoming signal, +the handling routine for signal \fBsig_no\fP arms the +correct EM trap by calling the routine \fIarm_trap()\fP with argument +\fIsig_map[sig_no]\fP. +At the end of the EM instruction the proper call of \fItrap()\fP is done. +\fITrap()\fP on its turn examines the value of the \fIHaltOnTrap\fP variable; +if it is set, the interpreter will stop with a message. In the normal case of +controlled trap handling this bit is not on and the interpreter examines +the value of the \fITrapPI\fP variable, +which contains the procedure identifier of the EM trap handling routine. +It then initiates a call to this routine and performs a \fIlongjmp()\fP +to the main +loop to bypass all further processing of the instruction that caused the trap. +\fITrapPI\fP should be set properly by the library routines, through the +SIG instruction. +.LP +In short: +.IP 1. +A UNIX interrupt is caught by the interpreter. +.IP 2. +A handling routine is called which generates the corresponding EM trap +(according to the mapping). +.IP 3. +The trap handler calls the corresponding EM routine which emulates a UNIX +interrupt for the benefit of the interpreted program. +.PP +When considering UNIX signals, it is important to notice that some of them +are real signals, i.e., messages coming from outside the program, like DEL +and QUIT, but some are actually program-caused synchronous traps, like Illegal +Instruction. The latter, if they happen, are incurred by the interpreter +itself and consequently are of no concern to the interpreted program: it +cannot catch them. The present code assumes that the UNIX signals between +SIGILL (4) and SIGSYS (12) are really traps; \fIdo_sigtrp()\fP +will fail on them. +.LP +To avoid losing the last line(s) of output files, the interpreter should +always do a proper close-down, even in the presence of signals. To this end, +all non-ignored genuine signals are initially caught by the interpreter, +through the routine \fIHndlIntSig\fP, which gives a message and preforms a +proper close-down. +Synchronous trap can only be caused by the interpreter itself; they are never +caught, and consequently the UNIX default action prevails. Generally they +cause a core dump. +Signals requested by the interpreted program are caught by the routine +\fIHndlEMSig\fP, as explained above. +.NH 3 +Monitor calls. +.PP +For the convenience of the programmer, as many monitor calls as possible +have been implemented. +The list of monitor calls given in [1] pages 20/21, has been implemented +completely, except for \fIptrace()\fP, \fIprofil()\fP and \fImpxcall()\fP. +The semantics of \fIptrace()\fP and \fIprofil()\fP from an interpreted program +is unclear; the data structure passed to \fImpxcall()\fP is non-trivial +and the system call has low portability and applicability. +For these calls, on invocation a warning is generated, and the arguments which +were meant for the call are popped properly, so the program can continue +without the stack being messed up. +The errorcode 5 (IOERROR) is pushed onto the stack (twice), in order to +fake an unsuccessful monitor call. +No other \- more meaningful \- errorcode is available in the errno-list. +.LP +Now for the implemented monitor calls. +The returned value is zero for a successful call. +When something goes wrong, the value of the external \fIerrno\fP variable +is pushed, thus enabling the user to find out what the reason of failure was. +The implementation of the majority of the monitor calls is straightforward. +Those working with a special format buffer, (e.g. \fIioctl()\fP, +\fItime()\fP and \fIstat()\fP variants), need some extra attention. +This is due to the fact that working with varying word/pointer size +combinations may cause alignment problems. +.LP +The data structure returned by the UNIX system call results from +C code that has been translated with the regular C compiler, which, +on the VAX, happens to be a 4-4 compiler. +The data structure expected by the interpreted program conforms +to the translation by \fBack\fP of the pertinent include file. +Depending on the exact call of \fBack\fP, sizes and alignment may differ. +.LP +An example is in order. The EM MON 18 instruction in the interpreted program +leads to a UNIX \fIstat()\fP system call by the interpreter. +This call fills the given struct with stat information, the contents +and alignments of which are determined by the version of UNIX and the +used C compiler, resp. +The interpreter, like any program wishing to do system calls that fill +structs, has to be translated by a C compiler that uses the +appropriate struct definition and alignments, so that it can use, e.g., +\fIstab.st_mtime\fP and expect to obtain the right field. +This struct cannot be copied directly to the EM memory to fulfill the +MON instruction. +First, the struct may contain extraneous, system-dependent fields, +pertaining, e.g., to symbolic links, sockets, etc. +Second, it may contain holes, due to alignment requirements. +The EM program runs on an EM machine, knows nothing about these +requirements and expects UNIX Version 7 fields, with offsets as +determined by the em22, em24 or em44 compiler, resp. +To do the conversion, the interpreter has a built-in table of the +offsets of all the fields in the structs that are filled by the MON +instruction. +The appropriate fields from the result of the UNIX \fIstat()\fP are copied +one by one to the appropriate positions in the EM memory to be filled +by MON 18. +.PP +The \fIioctl()\fP call (MON 54) poses additional problems. Not only does it +have a second argument which is a pointer to a struct, the type of +which is dynamically determined, but its first argument is an opcode +that varies considerably between the versions of UNIX. +To solve the first problem, the interpreter examines the opcode (request) and +treats the second argument accordingly. The second problem can be solved by +translating the UNIX Version 7 \fIioctl()\fP request codes to their proper +values on the various systems. This is, however, not always useful, since +some EM run-time systems use the local request codes. There is a compile-time +flag, V7IOCTL, which, if defined, will restrict the \fIioctl()\fP call to the +version 7 request codes and emulate them on the local system; otherwise the +request codes of the local system will be used (as far as implemented). +.PP +Minor problems also showed up with the implementation of \fIexecve()\fP +and \fIfork()\fP. +\fIExecve()\fP expects three pointers on the stack. +The first points to the name of the program to be executed, +the second and third are the beginnings of the \fBargv\fP and \fBenvp\fP +pointer arrays respectively. +We cannot pass these pointers to the system call however, because +the EM addresses to which they point do not correspond with UNIX +addresses. +Moreover, (it is not very likely to happen but) what if someone constructs +a program holding the contents for one of these pointers in the stack? +The stack is implemented upside down, so passing the pointer to +\fIexecve()\fP causes trouble for this reason too. +The only solution was to copy the pointer contents completely +to fresh UNIX memory, constructing vectors which can be passed to the +system call. +Any impending memory fault while making these copies results in failure of the +system call, with \fIerrno\fP set to EFAULT. +.PP +The implementation of the \fIfork()\fP call faced us with problems +concerning IO-channels. +Checking messages (as well as logging) must be divided over different files. +Otherwise, these messages will coincide. +This problem was solved by post-fixing the default message file +\fBint.mess\fP (as well as the logging file \fBint.log\fP) with an +automatically leveled number for every new forked process. +Children of the original process do their diagnostics +in files with postfix 1,2,3 etc. +Second generation processes are assigned files numbered 11, 12, 21 etc. +When 6 generations of processes exist at one moment, the seventh will +get the same message file as the sixth, for the length of the filename +will become too long. +.PP +Some of the monitor calls receive pointers (addresses) from to program, to be +passed to the kernel; examples are the struct stat for \fIstat()\fP, the area +to be filled for \fIread()\fP, etc. If the address is wrong, the kernel does +not generate a trap, but rather the system call returns with failure, while +\fIerrno\fP is set to EFAULT. This is implemented by consistent checking of +all pointers in the MON instruction. +.NH 2 +Internal arithmetic. +.PP +Doing arithmetic on signed integers, the smallest negative integer +(\fIminsint\fP) is considered a legal value. +This is in contradiction with the EM Manual [1], page 14, which proposes using +\fIminsint\fP for uninitialized integers. +The shadow bytes already check for uninitialized integers however, +so we do not need this special illegal value. +Although the EM Manual provides two traps, for undefined integers and floats, +undefined objects occur so frequently (e.g. in block copying partially +initialized areas) that the interpreter just gives a warning. +.LP +Except for arithmetic on unsigneds, all arithmetic checks for overflow. +The value that is pushed on the stack after an overflow occurs depends +on the UNIX behavior with regard to that particular calculation. +If UNIX would not accept the calculation (e.g. division by zero), a zero +is pushed as a convention. +Illegal computations which UNIX does accept in silence (e.g. one's +complement of \fIminsint\fP), simply push the UNIX-result after giving a +trap message. +.NH 2 +Shadow bytes implementation. +.PP +A great deal of run-time checking is performed by the interpreter (except if +used in the fast version). +This section gives all details about the shadow bytes. +In order to keep track of information about the contents of D-space (stack +and global data area), there is one shadow-byte for each byte in these spaces. +Each bit in a shadow-byte represents some piece +of information about the contents of its corresponding 'sun-byte'. +All bits off indicates an undefined sun-byte. +One or more bits on always guarantees a well-defined sun-byte. +The bits have the following meaning: +.IP "\(bu bit 0:" 8 +indicates that the sun-byte is (a part of) an integer. +.IP "\(bu bit 1:" 8 +the sun-byte is a part of a floating point number. +.IP "\(bu bit 2:" 8 +the sun-byte is a part of a pointer in dataspace. +.IP "\(bu bit 3:" 8 +the sun-byte is a part of a pointer in the instruction space. +According to [1] (paragraph 6.4), there are two types pointers which +must be distinguishable. +Conversion between these two types is impossible. +The shadow-bytes make the distinction here. +.IP "\(bu bit 4:" 8 +protection bit. +Indicates that the sun-byte is part of a protected piece of memory. +There is a protected area in the stack, the Return Status Block. +The EM machine language has no possibility to declare protected +memory, as is possible in EM assembly (the ROM instruction). The protection +bit is, however, set for the line number and filename pointer area near +location 0, to aid in catching references to location 0. +.IP "\(bu bit 5/6/7:" 8 +free for later use. +.LP +The shadow bytes are managed by the routines declared in \fIshadow.h\fP. +The warnings originating from checking these shadow-bytes during +run-time are various. +A list of them is given in appendix A, together with suggestions +(primarily for the C-programmer) where to look for the trouble maker(s). +.LP +A point to notice is, that once a warning is generated, it may be repeated +thousands of times. +Since repetitive warnings carry little information, but consume much +file space, the interpreter keeps track of the number of times a given warning +has been produced from a given line in a given file. +The warning message will +be printed only if the corresponding counter is a power of four (starting at +1). In this way, a logarithmic back-off in warning generation is established. +.LP +It might be argued that the counter should be kept for each (warning, PC +value) pair rather than for each (warning, file position) pair. Suppose, +however, that two instruction in a given line would cause the same message +regularly; this would produce two intertwined streams of identical messages, +with their counters jumping up and down. This does not seem desirable. +.NH 2 +Return Status Block (RSB) +.PP +According to the description in [1], at least the return address and the +base address of the previous RSB have to be pushed when performing a call. +Besides these two pointers, other information can be stored in the RSB +also. +The interpreter pushes the following items: +.IP \- +a pointer to the current filename, +.IP \- +the current line number (always four bytes), +.IP \- +the Local Base, +.IP \- +the return address (Program Counter), +.IP \- +the current procedure identifier +.IP \- +the RSB code, which distinguishes between initial start-up, normal call, +returnable trap and non-returnable trap (a word-size integer). +.LP +Consequently, the size of the RSB varies, depending on +word size and pointer size; its value is available as \fIrsbsize\fP. +When the RSB is removed from the stack (by a RET or RTT) the RSB code is under +the Stack Pointer for immediate checking. It is not clear what should be done +if RSB code and return instruction do not match; at present we give a message +and continue, for what it is worth. +.PP +The reason for pushing filename and line number is that some front-ends tend +to forget the LIN and FIL instructions after returning from a function. +This may result in error messages in wrong source files and/or line numbers. +.PP +The procedure identifier is kept and restored to check that the PC will not +move out of the running procedure. The PI is an index in the proctab, which +tells the limits in the text segment of the running procedure. +.PP +If the Return Status Block is generated as a result of a trap, more is +stacked. Before stacking the normal RSB, the trap function pushes the +following items: +.IP \- +the contents of the entire Function Return Area, +.IP \- +the number of bytes significant in the above (a word-size integer), +.IP \- +a word-size flag indicating if the contents of the FRA are valid, +.IP \- +the trap number (a word-size integer). +.LP +The latter is followed directly by the RSB, and consequently acts as the only +parameter to the trap handler. +.NH 2 +Operand access. +.PP +The EM Manual mentions two ways to access the operands of an instruction. It +should be noticed that the operand in EM is often not the direct operand of the +operation; the operand of the ADI instruction, e.g., is the width of the +integers to be added, not one of the integers themselves. The various operand +types are described in [1]. Each opcode in the text segment identifies an +instruction with a particular operand type; these relations are described in +computer-readable format in a file in the EM tree, \fIip_spec.t\fP. +.PP +The interpreter uses the third method. Several other approaches +can be designed, with increasing efficiency and equally increasing complexity. +They are briefly treated below. +.NH 3 +The Dispatch Table, Method 1. +.PP +When the interpreter starts, it reads the ip_spec.t file and constructs from it +a dispatch table. This table (of which there are actually three, +for primary, secondary +and tertiary opcodes) has 256 entries, each describing an instruction with +indications on how to decode the operand. For each instruction executed, the +interpreter finds the entry in the dispatch table, finds information there on +how to access the operand, constructs the operand and calls the appropriate +routine with the operand as calculated. There is one routine for each +instruction, which is called with the ready-made operand. Method 1 is easy to +program but requires constant interpretation of the dispatch table. +.NH 3 +Intelligent Routines, Method 2. +.PP +For each opcode there is a separate routine, and since an opcode uniquely +defines the instruction and the operand format, the routine knows how to get +the operand; this knowledge is built into the routine. Preferably the heading +of the routine is generated automatically from the ip_spec.t file. Operand +decoding is immediate, and no dispatch table is needed. Generation of the +469 required routines is, however, far from simple. Either a generated array +of routine names or a generated switch statement is used to map the opcode onto +the correct routine. The switch approach has the advantage that parameters can +be passed to the routines. +.NH 3 +Intelligent Calls, Method 3. +.PP +The call in the switch statement does full operand construction, and the +resulting operand is passed to the routine. This reduces the number of +routines to 133, the number of EM instructions. Generation of the switch +statement from ip_spec.t is more complicated, but the routine space is +much cleaner. This does not give any speed-up since the same actions are still +required; they are just performed in a different place. +.NH 3 +Static Evaluation. +.PP +It can be observed that the evaluation of the operand of a given instruction in +the text segment will always give the same result. It is therefore possible to +preprocess the text segment, decomposing the instructions into structs which +contain the address, the instruction code and the operand. No operand decoding +will be necessary at run-time: all operands have been precalculated. This will +probably give a considerable speed-up. Jumps, especially GTO jumps, will, +however, require more attention. +.NH 2 +Disassembly. +.PP +A disassembly facility is available, which gives a readable but not +letter-perfect disassembly of the EM object. The procedure structure is +indicated by placing the indication \fBP[n]\fP at the entry point of each +procedure, where \fBn\fP is the procedure identifier. The number of locals is +given in a comment. +.LP +The disassembler was generated by the software in the directory \fIswitch\fP +and then further processed by hand. diff --git a/doc/int/txt3 b/doc/int/txt3 new file mode 100644 index 0000000..ed469a2 --- /dev/null +++ b/doc/int/txt3 @@ -0,0 +1,180 @@ +.\" Logging +.\" +.\" $Id: txt3,v 2.3 1994/06/24 10:05:39 ceriel Exp $ +.bp +.NH +THE LOGGING MACHINE. +.PP +Since messages and warnings provided by \fBint\fP include source code file +names and line numbers, they alone often suffice to identify the error. +If, however, the necessity arises, much more extensive debugging information +can be obtained by activating the the Logging Machine. +This Logging Machine, which monitors all actions of the EM machine, is the +subject of this chapter. +.NH 2 +Implementation. +.PP +When inspecting the source code of \fBint\fP, many lines in the +following format will show up: +.DS +LOG(("@<\fIletter\fP><\fIdigit\fP> message", args)); +.DE +or +.DS +LOG(("\ <\fIletter\fP><\fIdigit\fP> message", args)); +.DE +The double parentheses are needed, because \fILOG()\fP is +declared as a define, and has a printf-like argument structure. +.PP +The <\fIletter\fP> classifies the log message and corresponds to an entry in +the \fIlogmask\fP, which holds a threshold for each class of messages. +The following classes exist: +.TS +tab(@); +l l l. +\(bu A\-Z@the flow of instructions: +@A: array +@B: branch +@C: convert +@F: floating point arithmetic +@I: integer arithmetic +@L: load +@M: miscellaneous +@P: procedure call +@R: pointer arithmetic +@S: store +@T: compare +@U: unsigned arithmetic +@X: logical +@Y: sets +@Z: increment/decrement/zero +\(bu d@stack dumping. +\(bu g@gda & heap manipulation. +\(bu s@stack manipulation. +\(bu r@reading the loadfile. +\(bu q@floating point calculations during reading the loadfile. +\(bu x@the instruction count, contents and file position. +\(bu m@monitor calls. +\(bu p@procedure calls and returns. +\(bu t@traps. +\(bu w@warnings. +.TE +.LP +When the interpreter reaches a LOG(()) statement it scans its first argument; +if \fIletter\fP +occurs in the logmask, and if \fIdigit\fP is lower or equal to the +threshold in the logmask, the message is given. +Depending on the first character, the message will be preceded by a +position indication (with the @) or will be printed as is (with the +space). +The \fIletter\fP is determines the message class +and the \fIdigit\fP is used to distinguish various levels +of logging, with a lower digit indicating a more important message. +We will call the <\fIletter\fP><\fIdigit\fP> combination the \fBid\fP of +the logging. +.LP +In general, the lower the \fIdigit\fP following the \fIletter\fP, +the more important the message. +E.g. m5 reports about unsuccessful monitor calls only, m9 also reports +about successful monitors (which are obviously less interesting). +New logging messages can be added to the source code on relevant places. +.LP +Reasonable settings for the logmask are: +.TS +tab(@); +l l l. + @A\-Z9d4twx9@advised setting when trouble shooting (default). + @A\-Zx9@shows the flow of instructions & global information. + @pm9@shows the procedure & monitor calls. + @tw9@shows warning & trap information. +.TE +.PP +An EM interpreter without a Logging Machine can be obtained by undefining the +macro \fICHECKING\fP in the file \fIchecking.h\fP. +.NH 2 +Controlling the Logging machine. +.PP +The actions of the Logging Machine are controlled by a set of internal +variables (one of which is the log mask). +These variables can be set through assignments on the command line, as +explained int the manual page \fIint.1\fP, q.v. +Since there are a great many logging statements in the program, of which only a +few will be executed in any call of the interpreter, it is important to be able +to decide quickly if a given \fIid\fP has to be checked at all. +To this end all logging statements are guarded (in the #define) by a test for +the boolean variable \fIlogging\fP. +This variable will only be set if the command line assignments show the +potential need for logging (\fImust_log\fP) and the instruction count +(\fIinr\fP) is at least equal to \fIlog_start\fP (which derives from the +parameter \fBLOG\fP). +.LP +The log mask can be set by the assignment +.DS +"LOGMASK=\fIlogstring\fP" +.DE +which sets the current logmask to \fIlogstring\fP. +A logstring has the following form: +.DS +[ [ \fIletter\fP | \fIletter\fP \- \fIletter\fP ]+ \fIdigit\fP ]+ +.DE +E.g. LOGMASK=A\-D8x9R7c0hi4 will print all messages belonging to loggings +with \fBid\fPs: +\fIA0..A8,B0..B8,C0..C8,D0..D8,x0..x9,R0..R7,c0,h0..h4,i0..i4\fP. +.PP +The logging variable STOP can be used to prevent run-away logging +past the point where the user expects an error to occur. +STOP=\fInr\fP will stop the interpreter after instruction number \fInr\fP. +.PP +To simplify the use of the logging machine, a number of abbreviations have been +defined. +E.g., AT=\fInr\fP can be thought of as an abbreviation of LOG=\fInr\-1\fP +STOP=\fInr+1\fP; this causes three stack dumps, one before the suspect +instruction, one on it and one after it; then the interpreter stops. +.PP +Logging results will appear in a special logging file (default: \fIint.log\fP). +.NH 2 +Dumps. +.PP +There are three routines available to examine the memory contents: +.TS +tab(@); +l l l. + @\fIstd_all()\fP@dumps the contents of the stack (\fId1\fP or \fId2\fP must be in the logmask). + @\fIgdad_all()\fP@dumps the contents of the gda (\fI+1\fP must be in the logmask). + @\fIhpd_all()\fP@dumps the contents of the heap (\fI*1\fP must be in the logmask). +.TE +.LP +These routines can be used everywhere in the program to examine the +contents of memory. +The internal variables allow the +gda and heap to be dumped only once (according to the +corresponding internal variable). +The stack is dumped after each +instruction if the log mask contains d1 or d2; d2 gives a full formatted +dump, d1 produces a listing of the Return Status Blocks only. +An attempt is made to format the stack correctly, based on the shadow +bytes, which identify the Return Status Block. +.LP +Remember to set the correct \fBid\fP in the LOGMASK, and to give +LOG the correct value. +If dumping is needed before the first instruction, then LOG must be +set to 0. +.LP +The dumps of the global data area and the heap are controlled internally by +the id-s +1 and *1 resp.; the corresponding logmask entries are set +automatically by setting the GDA and HEAP variables. +.NH 2 +Forking. +.PP +As mentioned earlier, a call to \fIfork()\fP, causes an image of the current +program to start running. +To prevent a messy logfile, the child process gets its own logfile +(and message file, tally file, etc.). +These logfiles are distinguished from the parent logfile by the a +postfix, e.g., +\fIlogfile_1\fP for the first child, \fIlogfile_2\fP for the second child, +\fIlogfile_1_2\fP for the second child of the first child, etc. +.br +\fINote\fP: the implementation of this feature is shaky; it works for the log +file but should also work for other files and for the names of the logging +variables. diff --git a/doc/lint/abstract b/doc/lint/abstract new file mode 100644 index 0000000..01b05e6 --- /dev/null +++ b/doc/lint/abstract @@ -0,0 +1,18 @@ +.TL +Lint, a C Program Checker +.AU +Frans Kunst +.AI +Vrije Universiteit +Amsterdam +.AB +This document describes an implementation of a program which +does an extensive consistency and plausibility check on a set +of C program files. +This may lead to warnings which help the programmer to debug +the program, to remove useless code and to improve his style. +The program has been used to test itself and has found +bugs in sources of some heavily used code. +.AE +.LP +.bp diff --git a/doc/lint/appendix_A b/doc/lint/appendix_A new file mode 100644 index 0000000..ce62b34 --- /dev/null +++ b/doc/lint/appendix_A @@ -0,0 +1,56 @@ +.ds +.SH +Appendix A +.LP +.SH +The warnings +.LP +.SH +Pass one warnings +.DS +.ft CW +%s may be used before set +maybe %s used before set +%s unused in function %s +%s set but not used in function %s +argument %s unused in function %s +static [variable, function] %s unused +%s declared extern but never used + +long conversion may lose accuracy +comparison of unsigned with negative constant +unsigned comparison with 0? +degenerate unsigned comparison +nonportable character comparison +possible pointer alignment problem + +%s evaluation order undefined + +null effect +constant in conditional context +use if-else construction +while (0) ? +do ... while (0) ? +[case, default] statement in strange context + +function %s has return(e); and return; +statement not reached +function %s declared %s but no value returned +.ft P +.DE +.SH +Pass two warnings +.DS +.ft CW +%s variable # of args +%s arg %d used inconsistently +%s multiply defined +%s value declared inconsistently +%s used but not defined +%s defined (%s(%d)) but never used +%s declared but never defined +%s value is used but none is returned +%s returns value which is [sometimes, always] ignored +%s also defined in library +.ft P +.DE diff --git a/doc/lint/appendix_B b/doc/lint/appendix_B new file mode 100644 index 0000000..362dbd2 --- /dev/null +++ b/doc/lint/appendix_B @@ -0,0 +1,52 @@ +.SH +Appendix B +.TL +The Ten Commandments for C Programmers +.AU +Henry Spencer +.IP 1 +Thou shalt run \fIlint\fR frequently and study its pronouncements with +care, for verily its perception and judgement oft exceed thine. +.IP 2 +Thou shalt not follow the NULL pointer, for chaos and madness await thee at +its end. +.IP 3 +Thou shalt cast all function arguments to the expected type if they are not +of that type already, even when thou art convinced that this is unnecessary, +lest they take cruel vengeance upon thee when thou least expect it. +.IP 4 +If thy header files fail to declare the return types of thy library functions, +thou shalt declare them thyself with the most meticulous care, +lest grievous harm befall thy program. +.IP 5 +Thou shalt check the array bounds of all strings (indeed, all arrays), +for surely where thou typest ``foo'' someone someday shall type +``supercalifragilisticexpialidocious''. +.IP 6 +If a function be advertised to return an error code in the event of +difficulties, thou shalt check for that code, yea, even though the checks +triple the size of thy code and produce aches in thy typing fingers, +for if thou thinkest ``it cannot happen to me'', +the gods shall surely punish thee for thy arrogance. +.IP 7 +Thou shalt study thy libraries and strive not to re-invent them without cause, +that thy code may be short and readable and thy days pleasant and productive. +.IP 8 +Thou shalt make thy program's purpose and structure +clear to thy fellow man by using the +One True Brace Style, +even if thou likest it not, +for thy creativity is better used in solving problems than in creating +beautiful new impediments to understanding. +.IP 9 +Thy external identifiers shall be unique in the first six characters, +though this harsh discipline be irksome and the years of its necessity +stretch before thee seemingly without end, +lest thou tear thy hair out and go mad on that fateful day when +thou desirest to make thy program run on an old system. +.IP 10 +Thou shalt foreswear, renounce, and abjure the vile heresy which claimeth +that ``All the world's a VAX'', and have no commerce with the +benighted heathens who cling to this barbarous belief, +that the days of thy program may be long even though the days of thy +current machine be short. diff --git a/doc/lint/chap1 b/doc/lint/chap1 new file mode 100644 index 0000000..580e9b6 --- /dev/null +++ b/doc/lint/chap1 @@ -0,0 +1,34 @@ +.NH 1 +Introduction +.PP +C [1][2] is a dangerous programming language. +The programmer is allowed to do almost anything, as long as +the syntax of the program is correct. +This has a reason. In this way it is possible to make a fast +compiler which produces fast code. +The compiler will be fast because it doesn't do much checking +at compile time. +The code is fast because the compiler doesn't generate run time +checks. +The programmer should protect himself against producing error +prone code. +One way to do that is to obey the +.I +Ten Commandments for C programmers +.R +[appendix B]. +This document describes an implementation of the +.I lint +program, as referred to in Commandment 1. +It is a common error to run +.I lint +only after a few hours of debugging and some +bug can't be found. +.I Lint +should be run when large pieces of new code are accepted by the +compiler and as soon as bugs arise. +Even for working programs it is useful to run +.I lint, +because it can find constructions that may lead to problems in +the future. +.bp diff --git a/doc/lint/chap2 b/doc/lint/chap2 new file mode 100644 index 0000000..aac7c33 --- /dev/null +++ b/doc/lint/chap2 @@ -0,0 +1,57 @@ +.NH +Outline of the program +.PP +The program can be divided into three parts. A first pass, which +parses C program files and outputs definitions, a second pass which +processes the definitions and a driver, +which feeds the set of files to the first pass and +directs its output to the second pass. Both passes produce the +warnings on standard error output, which are redirected to standard +output by the driver. +.PP +The first pass is based on an existing C front end, called +.I cem +[3]. +.I Cem +is part of the Amsterdam Compiler Kit (ACK), as described in [4]. +.PP +Most of the code of +.I cem +is left unchanged. This has several reasons. A lot of work, which +is done by +.I cem +, must also be done by +.I lint. +E.g. the lexical analysis, the macro expansions, +the parsing part and the semantical analysis. +Only the code generation part is turned off. +An advantage of this approach is, that a person who understands +.I cem +will not have to spend to much time in understanding +.I lint. +.PP +All changes and extensions to +.I cem +can be turned off by not defining the compiler directive +.ft CW +LINT. +.R +Compiling should then result in the original C compiler. +.PP +The second pass is a much less complex program. +It reads simple definitions generated by the first pass and +checks their consistency. +This second pass gives warnings +about wrong usage of function arguments, their results and +about external variables, which are used and defined in more +than one file. +.PP +The driver is a shell program, to be executed by the +.UX +shell +.I sh. +It executes the two passes and let them communicate through a +filter (sort). +Actually it is simplex communication: the first pass only talks to +the second pass through the filter. +.bp diff --git a/doc/lint/chap3 b/doc/lint/chap3 new file mode 100644 index 0000000..333529c --- /dev/null +++ b/doc/lint/chap3 @@ -0,0 +1,294 @@ +.NH +What lint checks +.NH 2 +Set, used and unused variables +.PP +We make a distinction between two classes of variables: +the class of automatic variables (including register variables) +and the other variables. +The other variables, global variables, static variables, formal +parameters et cetera, are assumed to have a defined value. +Global variables e.g., are initialized by the compiled code at +zeros; formal parameters have a value which is equal to the value +of the corresponding actual parameter. +These variables can be used without explicitly initializing them. +The initial value of automatic variables is undefined (if they are +not initialized at declaration). +These variables should be set before they are used. +A variable is set by +.IP +.RS +.IP 1. +an assignment (including an initialization) +.IP 2. +taking the address +.RE +.PP +The first case is clear. The second case is plausible. +It would take to much effort (if at all possible) to check +if a variable is set through one of its aliases. +Because +.I lint +should not warn about correct constructs, it does this conservative +approach. +Structures (and unions) can also be set by setting at +least one member. +Again a conservative approach. +An array can be set by using its name (e.g. as actual parameter +of a function call). +.I Lint +warns for usage as +.I rvalue +of automatic variables which are not set. +.PP +A variable is used if +.IP +.RS +.IP 1. +it is used as a +.I rvalue +.IP 2 +its address is taken +.IP +Arrays and structures (and unions) are also used if one entry +or one member respectively is used. +.RE +.PP +When a variable is never used in the part of the program where it is +visible, a warning is given. +For variables declared at the beginning of a compound statement, +a check is made at the end of this statement. +For formal parameters a check is made at the end of the function +definition. +At the end of a file this is done for global static definitions. +For external variables a warning can be given when all the files +are parsed. +.NH 2 +Flow of control +.PP +The way +.I lint +keeps track of the flow of control is best explained by means of +an example. +See the program of figure 1. +.KF +.DS B +.ft CW +if (cond) + /* a statement which is executed if cond is true, + * the if-part + */ +else + /* the else-part */ +.DE +.br +.ce +.I +figure\ 1. +.R +.KE +.PP +After evaluation of \f(CWcond\fP, two things can happen. +The if-part is executed or the else-part is executed (but not both). +Variables which are set in the if-part but not in the else-part, +need not be set after the if statement, and vice versa. +.I Lint +detects this and assumes these variables after the if statement to +be \fImaybe set\fR. +(See figure 2.) +.KF +.DS B +.ft CW +int cond; + +main() +{ + int i, j; + + if (cond) { + i = 0; + j = 0; + } + else + use(i); /* i may be used before set */ + use(j); /* maybe j used before set */ +} +.DE +.br +.ce +.I +figure 2. +.R +.KE +.PP +If both the if-part and the else-part are never left (i.e. they +contain an endless loop or a return statement), +.I lint +knows that the if statement is never left too. +Besides the if statement, +.I lint +knows the possible flows of control in while, do, for and +switch statements. +It also detects some endless loops like \f(CWwhile(1)\fP, +\f(CWdo ... while (1)\fP, \f(CWfor (;;)\fP. +.NH 2 +Functions +.PP +Most C compilers will not complain if a function is called with actual +parameters of a different type than the function expects. +Using a function in one file as a function of +type +.I A +while defining it in another file as a function of type +.I B +is also allowed by most compilers. +It needs no explanation that this can lead to serious trouble. +.PP +.I Lint +checks if functions are called with the correct number of arguments, +if the types of the actual parameters correspond with the types of +the formal parameters and if function values are used in a way +consistently with their declaration. +When the result of a function is used, a check is made to see if +the function returns a value. +When a function returns a value, +.I lint +checks if the values of all calls of this function are used. +.NH 2 +Undefined evaluation order +.PP +The semantics of C do not define evaluation orders for some +constructs, which, at first sight, seem well defined. +The evaluation order of the expression +.ft CW +a[i]\ =\ i++; +.R +e.g., is undefined. +It can be translated to something with the semantics of +.ft CW +a[i]\ =\ i; i++; +.R +which is what probably was meant, or +.ft CW +a[i+1]\ =\ i; i++;. +.R +An easier example to explain why, is +.ft CW +j\ =\ a[i]\ +\ i++;. +.R +`\f(CW+\fR' Is a so called +.I commutative +operator (with respect to the evaluation order) , as is `\f(CW=\fR'. +This allows the compiler to choose which term to evaluate first. +It is easy to see, that it makes a difference for the value of +.ft CW +j, +.R +which order is chosen. +The expression +.ft CW +i++ +.R +is said to have +.I +side effects. +.R +It affects the value of +.ft CW +i. +.R +Because this value is used in the other term, this gives a conflict. +.PP +A function call with reference to a variable as argument can have +side effects to. +Therefor, the evaluation order of +.ft CW +i +.R +in the expression +.ft CW +f(&i)\ +\ i +.R +is undefined. +When a function is called with an array as argument, this array +can be affected by the function, because only the address of the +array is passed to the function. +(In Pascal a copy of the array is passed to the function if the +formal parameter is not declared \fIvar\fP.) +So the evaluation order of +.ft CW +a +.R +in the expression +.ft CW +f(a)\ +\ a[0] +.R +is undefined. +This one is not yet detected by +.I lint. +.PP +Global variables can still cause trouble. +If function +.ft CW +f +.R +affects the global variable +.ft CW +i, +.R +the value of the expression +.ft CW +f()\ +\ i +.R +is undefined, because the evaluation order of \f(CWi\fP is undefined. +.PP +The evaluation order of the arguments of a function is not +defined, so the expression +.ft CW +f(i,\ i++) +.R +gives a warning +.ft CW +i evaluation order undefined. +.R +.NH 2 +Pointer alignment problems +.PP +For pointers to objects of different types there are different +alignment restrictions. +On some machines pointers to type char can have both odd and even +values, whereas pointers to type int should contain an even address. +.I Lint +could warn for all pointer conversions. +This is not what +.I lint +does. +.I Lint +assumes that some pointers are more restricted than others, and +that pointers of some types can safely be converted to a pointer +of a less restrictive type. +The order of restriction is as follows (`\(<=' means +`is not more restricted than') : +.PP +.ce +char \(<= short \(<= int \(<= long +.ce +float \(<= double +.NH 2 +Libraries +.PP +C is a small language. +As a matter of fact it has no i/o routines. +To make it a useful language, C is supported by libraries. +These libraries contain functions and variables that can be used by any +C program. +.I Lint +knows some libraries too. +At this moment it knows the `-\fIlc\fR', `-\fIlm\fR' and +`-\fIlcurses\fR' libraries. +The `-\fIlc\fR' library, containing definitions for functions from +chapter two and three of the \s-2UNIX\s+2 programmers manual, is default. +.I Lint +warns for definitions of functions or global variables with the +same name as a function definition in a library. +.bp diff --git a/doc/lint/chap4 b/doc/lint/chap4 new file mode 100644 index 0000000..009caa3 --- /dev/null +++ b/doc/lint/chap4 @@ -0,0 +1,979 @@ +.NH 1 +How lint checks +.NH 2 +The first pass first pass data structure +.PP +The data structure of +.I cem +is changed a little and some structures have been added. +.NH 3 +The changes +.NH 4 +Idf descriptor +.PP +A member +.ft CW +id_line +.R +is added +to the +.I idf +selector. +This line number is used for some warnings. +.NH 4 +Def descriptor +.PP +The +.I def +selector is extended with the members +.ft CW +df_set +.R and +df_line. +.R +The +.ft CW +df_used +.R +member did exist already, but was only used for code generation. +This usage is eliminated so it can be used by +.I lint. +The meaning of these members should be clear. +.NH 3 +The additions +.NH 4 +Lint_stack_entry descriptor +.DS B +.ft CW +struct lint_stack_entry { + struct lint_stack_entry *next; + struct lint_stack_entry *previous; + short ls_class; + int ls_level; + struct state *ls_current; + union { + struct state *S_if; + struct state *S_end; + struct switch_states switch_state; + } ls_states; +}; +.R +.DE +.PP +Structure to simulate a stacking mechanism. +.IP \f(CWnext\fP 15 +Pointer to the entry on top of this one. +.IP \f(CWprevious\fP +Pointer to the entry beneath this one. +.IP \f(CWls_class\fP +The class of statement this entry belongs to. +Possible classes are \f(CWIF\fP, \f(CWWHILE\fP, \f(CWDO\fP, +\f(CWFOR\fP, \f(CWSWITCH\fP and \f(CWCASE\fP. +.IP \f(CWls_level\fP +The level the corresponding statement is nested. +.IP \f(CWls_current\fP +A pointer to the state descriptor which describes the state +of the function (the state of the automatic variables, if the next +statement can be reached, et cetera) if control passes the +flow of control to the part of the program currently parsed. +The initialization of this state is as follows +.RS +.IP +If \f(CWls_class\fP in [\f(CWIF\fP, \f(CWSWITCH\fP] the state +after parsing the conditional expression. +.IP +If \f(CWls_class\fP in [\f(CWWHILE\fP, \f(CWFOR\fP] the state +after parsing the code between the brackets. +.IP +If \f(CWls_class\fP in [\f(CWDO\fP, \f(CWCASE\fP] the state at +entrance of the statement after the \f(CWDO\fP or \f(CWCASE\fP +token. +.RE +.IP \f(CWls_states\fP 15 +Union of pointers to state descriptors containing different information +for different values of \f(CWls_class\fP. +.RS +.IP +If \f(CWls_class\fP is \f(CWIF\fP and in case of parsing an else part, +\f(CWls_states.S_if\fP points to the state that is reached after the +if part. +.IP +If \f(CWls_class\fP in [\f(CWWHILE\fP, \f(CWFOR\fP, \f(CWDO\fP] +then \f(CWls_states.S_end\fP contains a conservative description +of the state of the program after `jumping' +to the end of the statement after the \f(CWWHILE\fP, \f(CWDO\fP +or \f(CWFOR\fP token. +I.e. the state at reaching a break (not inside a switch) or +continue statement. +.IP +If ls_class is \f(CWSWITCH\fP, \f(CWls_states\fP is used as a structure +.DS B +.ft CW +struct switch_states { + struct state S_case; + struct state S_break; +}; +.R +.DE +containing two pointers to state descriptors. +\f(CWls_states.switch_state.S_case\fP contains +a conservative description +of the state of the program after \f(CWcase ... case\fP +parts are parsed. +\f(CWls_states.switch_state.S_break\fP the state after parsing +all the \f(CWcase ... break\fP parts. +The reason for \f(CWls_states.switch_state.default_met\fP should be +self-explanatory. +.IP +In case \f(CWls_class\fP is \f(CWCASE\fP, \f(CWls_states\fP is not used. +.RE +.NH 4 +State descriptor +.DS B +.ft CW +struct state { + struct state *next; + struct auto_def *st_auto_list; + int st_nrchd; + int st_warned; +}; +.R +.DE +.IP \f(CWst_auto_list\fP 15 +Pointer to a list of definitions of the automatic variables whose +scope contain the current position in the program. +.IP \f(CWst_nrchd\fP +True if the next statement can't be reached. +.IP \f(CWst_warned\fP +True if a warning has already been given. +.NH 4 +Auto_def descriptor +.DS B +.ft CW +struct auto_def { + struct auto_def *next; + struct idf *ad_idf; + struct def *ad_def; + int ad_used; + int ad_set; + int ad_maybe_set; +}; +.R +.DE +.IP \f(CWnext\fP 15 +Points to the next auto_definition of the list. +.IP \f(CWad_idf\fP +Pointer to the idf descriptor associated with this auto_definition. +.IP \f(CWad_def\fP +Ditto for def descriptor. +.IP \f(CWad_used\fP +Indicates the state of this automatic variable. +Ditto for \f(CWad_set\fP and \f(CWad_maybe_set\fP. +Only one of \f(CWad_set\fP and \f(CWad_maybe_set\fP may be true. +.NH 4 +Expr_state descriptor +.DS B +.ft CW +struct expr_state { + struct expr_state *next; + struct idf *es_idf; + arith es_offset; + int es_used; + int es_set; +}; +.R +.DE +.PP +This structure is introduced to keep track of which variables, +array entries and structure members (union members) are set +and/or used in evaluating an expression. +.IP \f(CWnext\fP 15 +Pointer to the next descriptor of this list. +.IP \f(CWes_idf\fP +Pointer to the idf descriptor this descriptor belongs to. +.IP \f(CWes_offset\fP +In case of an array, a structure or union, this member contains +the offset the compiler would generate for locating the array +entry or structure/union member. +.IP \f(CWes_used\fP +True if the indicated memory location is used in evaluating the +expression. +.IP \f(CWes_set\fP +Ditto for set. +.NH 4 +Outdef descriptor +.DS B +.ft CW +struct outdef { + int od_class; + char *od_name; + char *od_file; + unsigned int od_line; + int od_nrargs; + struct tp_entry *od_entry; + int od_returns; + struct type *od_type; +}; +.DE +.R +.PP +As structures of this type are not allocated dynamically by a +storage allocator, it contains no next member. +An outdef can be given to to \f(CWoutput_def()\fP to be passed to the +second pass. +Basically this forms the interface with the second pass. +.IP \f(CWod_class\fP 15 +Indicates what kind of definition it is. +Possible classes are \f(CWEFDF\fP, \f(CWEVDF\fP, \f(CWSFDF\fP, +\f(CWSVDF\fP, \f(CWLFDF\fP, \f(CWLVDF\fP, +\f(CWEFDC\fP, \f(CWEVDC\fP, \f(CWIFDC\fP, \f(CWFC\fP, \f(CWVU\fP. +([\f(CWE\fPxternal, \f(CWS\fPtatic, \f(CWL\fPibrary, \f(CWI\fPmplicit] +[\f(CWF\fPunction, \f(CWV\fPariable] +[\f(CWD\fPe\f(CWF\fPinition, \f(CWD\fPe\f(CWC\fPlaration, +\f(CWC\fPall, \f(CWU\fPsage]) +.IP \f(CWod_name\fP +The name of the function or variable. +.IP \f(CWod_file\fP +The file this definition comes from. +.IP \f(CWod_nrargs\fP +If \f(CWod_class\fP is one of \f(CWEFDF\fP, \f(CWSFDF\fP or +\f(CWLFDF\fP, this member contains the +number of arguments this function has. +If the function was preceded by the pseudocomment +\f(CW/*\ VARARGS\ */\fP, +\f(CWod_nrargs\fP gets the value \f(CW-1-n\fP. +.IP \f(CWod_entry\fP +A pointer to a list of \f(CWod_nrargs\fP cells, each containing a +pointer to the type descriptor of an argument. (\f(CW-1-od_nrargs\fP +cells if +\f(CWod_nrargs < 0\fP.) +\f(CWTp_entry\fP is defined as +.DS B +.ft CW +struct tp_entry { + struct tp_entry *next; /* pointer to next cell */ + struct type *te_type; /* an argument type */ +}; +.R +.DE +.IP \f(CWod_returns\fP 15 +For classes \f(CWEFDF\fP, \f(CWSFDF\fP and \f(CWLFDF\fP this +member tells if the function returns an expression or not. +In case \f(CWod_class\fP is \f(CWFC\fP it is true if the value +of the function is used, false otherwise. +For other classes this member is not used. +.IP \f(CWod_type\fP +A pointer to the type of the function or variable defined or +declared. +Not used for classes \f(CWFC\fP and \f(CWVU\fP. +.NH 2 +The first pass checking mechanism +.PP +In the description of the implementation of the pass one +warnings, it is assumed that the reader is familiar with the +\fILLgen\fP parser generator, as described in [6]. +.NH 3 +Used and/or set variables +.PP +To be able to give warnings like +.ft CW +%s used before set +.R +and +.ft CW +%s set but not used in function %s +.R +, there needs to be a way to keep track of the state of a variable. +A first approach to do this was by adding two fields to the +\fIdef\fP selector: +.ft CW +df_set +.R +and +.ft CW +df_used. +.R +While parsing the program, each time an expression was met +this expression was analyzed and the fields of each \fIdef\fP +selector were possibly set during this analysis. +This analysis was done by passing each expression to a +function +.ft CW +lint_expr +.R +, which walks the expression tree in a way similar to the function +\f(CWEVAL\fP in the file \fIeval.c\fP of the original +.I +cem +.R +compiler. +This approach has one big disadvantage: it is impossible to keep +track of the flow of control of the program. +No warning will be given for the program fragment of figure 3. +.KF +.DS B +.ft CW +func() +{ + int i; + + if (cond) + i = 0; + else + use(i); /* i may be used before set */ +} +.I +.DE +.br +.ce +figure\ 3. +.R +.KE +.PP +It is clear that it would be nice having +.I lint +warn for this construction. +.PP +This was done in the second approach. +When there was a choice between two statements, each statement +was parsed with its own copy of the state at entrance of the +.I +choosing statement. +.R +A state consisted of the state of the automatic variables +(including register variables). +In addition to the possibilities of being used and set, +a variable could be \fImaybe set\fP. +These states were passed between the statement parsing routines +using the \fILLgen\fP parameter mechanism. +At the end of a choosing statement, the two states were merged +into one state, which became the state after this statement. +The construction of figure 4 was now detected, but switch +statements still gave problems and continue and break statements +were not understood. +The main problem of a switch statement is, that the closing bracket +(`\f(CW)\fP') has to be followed by a \fIstatement\fP. +The syntax shows no choice of statements, as is the case with +if, while, do and for statements. +Using the \fILLgen\fP parameter mechanism, it is not a trivial +task to parse the different case parts of a switch statement +with the same initial state and to merge the results into one +state. +This observation led to the third and final approach, as described +next. +.PP +Instead of passing the state of the program through the statements +parsing routines using the \fILLgen\fP parameters, a special stack is +introduced, the +.I lint_stack. +When a choosing statement is parsed, an entry is pushed on the stack +containing the information that is needed to keep track of the +state of the program. +Each entry contains a description of the +.I current +state of the program and a field that indicates what part of the +program the parser is currently parsing. +For all the possible choosing statements I describe the actions +to be taken. +.PP +At entrance of an if statement, an entry is pushed on the stack +with the current state being a copy of the current state of the +stack element one below. +The class of this entry is \f(CWIF\fP. +At reaching the else part, the current state is moved to +another place in this stack entry (to \f(CWS_IF\fP), and a new copy +of the current state at entrance of this if statement is made. +At the end of the else part, the two states are merged into +one state, the new current state, and the \f(CWIF\fP entry is +popped from the stack. +If there is no else part, then the state that is reached after +parsing the if part is merged with the current state at entrance +of the if statement into the new current state. +.PP +At entrance of a while statement a \f(CWWHILE\fP entry is pushed +on the stack containing a copy of the current state. +If a continue or break statement is met in the while statement, +the state at reaching this continue or break statement is +merged with a special state in the \f(CWWHILE\fP entry, called +\f(CWS_END\fP. +(If \f(CWS_END\fP did not yet contain a state, the state is copied +to \f(CWS_END\fP.) +At the end of the while statement this \f(CWS_END\fP is merged with the +current state, which result is merged with the state at entrance +of the while statement into the new current state. +.PP +A for statement is treated similarly. +A do statement is treated the same way too, except that \f(CWS_END\fP +isn't merged with the state at entrance of the do statement, +but becomes the new current state. +.PP +For switch statements a \f(CWSWITCH\fP entry is pushed on the stack. +Apart from the current state, this entry contains two other +states, \f(CWS_BREAK\fP and \f(CWS_CASE\fP. +\f(CWS_BREAK\fP initially contains no state, \f(CWS_CASE\fP +initially contains a +copy of the current state at entrance of the switch statement. +After parsing a case label, a \f(CWCASE\fP entry is pushed on the stack, +containing a copy of the current state. +If, after zero or more statements, we meet another case label, +the state at reaching this case label is merged with \f(CWS_CASE\fP +of the \f(CWSWITCH\fP entry below and a new copy of the state +at entrance +of the switch statement is put in the \f(CWCASE\fP entry. +If we meet a break statement, we merge the current state with +\f(CWS_BREAK\fP of the \f(CWSWITCH\fP entry below and pop the +\f(CWCASE\fP entry. +In addition to this, the occurrence of a default statement +inside the switch statement is recorded in the \f(CWSWITCH\fP entry. +At the end of the switch statement we check if we have met a +default statement. +If not, \f(CWS_BREAK\fP is merged with the current state at entrance +of the switch statement. (Because it is possible that no case +label will be chosen.) +Next the \f(CWS_CASE\fP is `special_merged' with \f(CWS_BREAK\fP +into the new current state. +For more details about these merge functions see the sources. +.PP +With the approach described above, +.I lint +is aware of the flow +of control in the program. +There still are some doubtful constructions +.I lint +will not detect and there are some constructions (although rare) +for which +.I lint +gives an incorrect warning (see figure 4). +.KF +.DS B +.ft CW +{ + int i; + + for (;;) { + if (cond) { + i = 0; + break; + } + } + use(i); + /* lint warns: maybe i used before set + * although the fragment is correct + */ +} +.DE +.br +.I +.ce +figure\ 4. +.R +.KE +.PP +A nice advantage of the method is, that the parser stays clear, +i.e. it isn't extended with extra parameters which must pass the +states. +In this way the parser still is very readable and we have a nice +interface with +.I lint +using function calls. +.NH 3 +Undefined evaluation orders +.PP +In expressions the values of some variables are used and some +variables are set. +Of course, the same holds for subexpressions. +The compiler is allowed to choose the order of evaluation of +subexpressions involving a commutative and associative operator +(\f(CW*\fP, \f(CW+\fP, \f(CW&\fP, \f(CW|\fP, \f(CW^\fP), +the comma in a parameter list or an assignment operator. +In section 3.4 it is made clear that this will lead to +statements with ambiguous semantics. +.PP +The way these constructs are detected is rather straight forward. +The function which parses an expression (\f(CWlint_expr\fP) +returns a linked +list containing information telling which variables are set and +which variables are used. +A variable is indicated by its +.I idf +descriptor and an +.I offset. +This offset is needed for discriminating entries of the same +array and members of the same structure or union, so it is +possible to warn about the statement +.ft CW +a[b[0]]\ =\ b[0]++;. +.R +When \f(CWlint_expr\fP meets a commutative operator (with respect to the +evaluation order), it calls itself recursively with the operands +of the operator as expression. +The returned results are checked for undefined evaluation orders +and are put together. +This is done by the function \f(CWcheck_and_merge\fP. +.NH 3 +Useless statements +.PP +Statements which compute a value that is not used, +are said to have a \fInull effect\fP. +Examples are \f(CWx = 2, 3;\fP, \f(CWf() + g();\fP and +\f(CW*p++;\fP. +(\f(CW*\fP and \f(CW++\fP have the same precedence and associate +from right to left.) +.PP +A conditional expression computes a value too. +If this value isn't used, it is better to use an if-else +statement. +So, if +.I lint +sees +.DS B +.ft CW +b ? f() : g(); +.R +.DE +.LP +it warns \f(CWuse if-else construction\fP. +.NH 3 +Not-reachable statements +.PP +The algorithm to detect not-reachable statements (including not +reachable initializations) is as follows. +Statements after a label and a case statement and the compound +statement of a function are always reachable. +Other statements are not-reachable after: +.QS +.RS +.IP - 1 +a goto statement +.IP - +a return statement +.IP - +a break statement +.IP - +a continue statement +.IP - +a switch statement +.IP - +an endless loop (a while, do or for loop with a conditional +which always evaluates to true and without a break statement) +.IP - +an if-else statement of which both if part and else part +end up in a not-reachable state +.IP - +a switch statement of which all \f(CWcase ... break\fP parts +(including +a \f(CWdefault ... break\fP part) end up in a not-reachable state +.IP - +the pseudocomment \f(CW/*\ NOTREACHED\ */\fP +.RE +.QE +.PP +The algorithm is easily implemented using the \f(CWst_nrchd\fP selector +in the +.I state +descriptor. +The \f(CWst_warned\fP selector is used to prevent superfluous warnings. +To detect an endless loop, after a while (), for (..;;..) +and do part the current state of the stack entry beneath the top one +is set to not reached. +If, in the statement following, a break statement is met, this same +state is set to reached. +If the while () part of the do statement is met, this state +is set to reached if doesn't evaluates to true. +The detection of not-reachable statements after a switch statement +is done in a similar way. +In addition it is checked if a default statement isn't met, in +which case the statement after the switch statement can be reached. +The warning \f(CWstatement not reached\fP is not given for compound +statements. +If +.I lint +did, it would warn for the compound statement in a switch statement, +which would be incorrect. +.PP +Not-reachable statements are still interpreted by +.I lint. +I.e. when +.I lint +warns that some statement can't be reached, it assumes this is +not what the programmer really wants and it ignores this fact. +In this way a lot of useless warnings are prevented in the case of +a not-reachable statement. +See figure 5. +.KF +.DS B +.ft CW +{ + int i; + + for (;;) { + /* A loop in which the programmer + * forgot to introduce a conditional + * break statement. + * Suppose i is not used in this part. + */ + } + /* some more code in which i is used */ +} +/* The warning "statement not reached" highlights the bug. + * An additional warning "i unused in function %s" is + * formally correct, but doesn't provide the programmer + * with useful information. + */ +.DE +.I +.ce +figure\ 5. +.R +.KE +.NH 3 +Functions returning expressions and just returning +.PP +Each time a return statement is met, +.I lint +checks if an expression is returned or not. +If a function has a return with expression and a return without +expression, +.I lint +warns +.ft CW +function %s has return(e); and return;. +.R +If the flow of control can +.I +fall through +.R +the end of the compound statement of a function, this indicates +an implicit return statement without an expression. +If the end of the compound statement of the function can be reached, +.I lint +introduces this implicit return statement without expression. +.PP +Sometimes the programmer knows for sure that all case parts inside +a switch statement include all possible cases, so he doesn't +introduce a default statement. +This can lead to an incorrect warning. +Figure 6 shows how to prevent this warning. +.KF +.DS B +.ft CW + func() + { + switch (cond) { + case 0: return(e0); + case 1: return(e1); + } + /* NOTREACHED */ + } +/* no warning: "function func has return(e); and return; */ +.DE +.I +.ce +figure\ 6. +.R +.KE +.PP +The pseudocomment \f(CW/*\ NOTREACHED\ */\fP can also be used to tell +.I lint +that some function doesn't return. See figure 7. +.KS +.DS B +.ft CW + func() + { + switch (cond) { + case 0: return(e0); + case 1: return(e1); + default: error(); /* calls exit or abort */ + /* NOTREACHED */ + } + } +/* no warning: "function func has return(e); and return;" */ +.I +.DE +.ce +figure\ 7. +.R +.KE +.NH 3 +Output definitions for the second pass +.PP +The first pass can only process one program file. +To be able to process a program that spreads over more than one file, +the first pass outputs definitions that are processed by a second +pass. +The format of such a definition is different for different classes: +.PP +For class in {EFDF, SFDF, LFDF} +.DS C +::::::: +.DE +.LP +A negative \fInr of args\fP indicates that the function can be called with +a varying number of arguments. +.PP +For class = FC +.DS C +::::: +.DE +.LP +The \fIvalue is used\fP part can have three meanings: +the value of the function is ignored; +the value of the function is used; +the value of the function is cast to type \fIvoid\fP. +.PP +For other classes +.DS C +:::: +.DE +.LP +Definitions of class VU (Variable Usage) are only output for \fIused\fP +global variables. +.PP +Structure and union types that are output to the intermediate file +are simplified. +(The following occurrences of \fIstructure\fP should be +read as \fIstructure or union\fP and \fIstruct\fP as \fIstruct or +union\fP.) +Structures that are identified by a \fIstructure tag\fP are output +to the intermediate file as \f(CWstruct \fP. +Structures without a structure tag are output as +\f(CWstruct {}\fP with \f(CW\fP a semicolon-separated +list of types of the members of this structure. +An alternative way would be to output the complete structure definition. +However, this gives practical problems. +It is allowed to define some object of a structure type with a +structure tag, without this structure being defined at that place. +The first approach leaves errors, such as in figure 8, undetected. +.KF +.DS B +.ft CW + "a.c" "b.c" + +struct str { struct str { + float f; int i; +} s; }; + +main() func(s) +{ struct str s; + func(s); {} +} +.I +.DE +.ce +figure\ 8. +.R +.KE +.PP +To be able to detect these errors, the first pass should also output +definitions of structure tags. +The example of figure 8 would then get a warning like +.ft CW +structure str defined inconsistently +.R +.PP +More information on these definitions in section 4.3 and 4.4. +.NH 3 +Generating libraries +.PP +.I Lint +knows the library `-lc', `-lm' and `-lcurses'. +If a program uses some other library, it is possible to generate +a corresponding \fIlint library\fP. +To do this, precede all the C source files of this library by +the pseudocomment \f(CW/*\ LINTLIBRARY\ */\fP. +Then feed these files one by one to the first pass of +.I lint +collecting the standard output in a file and ignoring the warnings. +The resulting file contains library definitions of the functions +and external variables defined in the library sources, and not more +than that. +If this file is called `llib-l\fIname\fP.ln +.I lint +can be told to search the library by passing it as argument in +the command line `-llib-l\fIname\fP.ln. +The implementation of this feature is simple. +.PP +As soon as the pseudocomment \f(CW/*\ LINTLIBRARY\ */\fP is met, +only function and variable definitions are output with class LFDF +and LVDF respectively. +Other definitions, which otherwise would have been output, are +discarded. +.PP +Instead of generating a special lint library file, one can make a +file containing the library definitions and starting with +\f(CW/* LINTLIBRARY */\fP. +This file can then be passed to +.I lint +just by its name. +This method isn't as efficient as the first one. +.NH 3 +Interpreting the pseudocomments +.PP +The interpretation of the pseudocomments is done by the lexical +analyzer, because this part of the program already took care of the +comments. +At first sight this seems very easy: as soon as some pseudocomment +is met, raise the corresponding flag. +Unfortunately this doesn't work. +The lexical analyzer is a \fIone token look ahead scanner\fP. +This causes the above procedure to raise the flags one token too +soon. +A solution to get the right effect is to reserve two flags per +pseudocomment. +The first is set as soon as the corresponding pseudocomment is +scanned. +At the returning of each token this flag is moved to the second flag. +The delay in this way achieved makes the pseudocomments have effect +at the correct place. +.NH 2 +The second pass data structure +.NH 3 +Inp_def descriptor +.DS B +.ft CW +struct inp_def { + struct inp_def *next; + int id_class; + char id_name[NAMESIZE]; + char id_file[FNAMESIZE]; + unsigned int id_line; + int id_nrargs; + char argtps[ARGSTPSSIZE]; + int id_returns; + char id_type[TYPESIZE]; + int id_called; + int id_used; + int id_ignored; + int id_voided; +}; +.R +.DE +.PP +This description is almost similar to the \fIoutdef\fP descriptor as +described in 4.1.2.5. +There are some differences too. +.IP \f(CWnext\fP 15 +As structures of this type are allocated dynamically, this field +is added so the same memory allocator as used in the first pass can be +used. +.LP +\f(CWid_called +.br +id_used +.br +id_ignored\fP +.IP \f(CWid_voided\fP 15 +Some additional fields only used for function definitions.Their +meaning should be clear. +.PP +The other fields have the same meaning as the corresponding fields +in the \fIoutdef\fP descriptor. +Some attention should be paid to \f(CWid_argtps\fP and \f(CWid_type\fP. +These members have type \f(CWarray of char\fP, in contrast to +their counterparts in the \fIoutdef\fP descriptor. +The only operation performed on types is a check on equality. +Types are output by the first pass as a string describing the type. +The type of \f(CWi\fP in \f(CWint *i();\fP e.g. is output as +\f(CWint *()\fP. +Such a string is best put in an \f(CWarray of char\fP to be compared +easily. +.NH 2 +The second pass checking mechanism +.PP +After all the definitions that are output by the first pass are +sorted by name, the definitions belonging to one name are ordered +as follows. +.QS +.RS +.IP - 1 +external definitions +.IP - +static definitions +.IP - +library definitions +.IP - +declarations +.IP - +function calls +.IP - +variable usages +.RE +.QE +.PP +The main program of the second pass is easily explained. +For all different names, do the following. +First read the definitions. +If there is more than one definition, check for conflicts. +Then read the declarations, function calls and variable usages and +check them against the definitions. +After having processed all the declarations, function calls and +variable usages, check the definitions to see if they are used +correctly. +The next three paragraphs will explain the three most important +functions of the program. +.NH 3 +Read_defs() +.PP +This function reads all definitions belonging to the same name. +Only one external definition is allowed, so if there are more, a +warning is given. +In different files it is allowed to define static functions or +variables with the same name. +So if a static function is read, \f(CWread_defs\fP checks if there isn't +already an external definition, and if not it puts the static +definition in the list of static definitions, to be used later. +If no external or static definitions are met, a library definition is +taken as definition. +If a function or a variable is defined with the same name as a function +or a variable in a library (which is allowed) +.I lint +gives a warning. +Of course it is also possible that there is no definition at all. +In that case \f(CWcheck\fP will warn. +.NH 3 +Check() +.PP +\f(CWCheck\fP verifies declarations, function calls and variable +usages against the definitions. +For each of these entries the corresponding definition is looked up. +As there may be more than one static definition, first a static +definition from the same file as the entry is searched. +If not present, the external definition (which may be a library +definition) is taken as definition. +If no definition can be found and the current entry is an external +declaration, +.I lint +warns. +However in the case of an implicit function declaration +.I lint +will not warn, because +we will get a warning \f(CW%s used but not defined\fP later on. +Next a check is done if the declarations are consistent with their +definitions. +After the declarations, the function calls and variable usages are +verified against their corresponding definitions. +If no definition exists, +.I lint +warns. +Else the field \f(CWid_called\fP is set to 1. +(For variable definitions this should be interpreted as \fIused\fP.) +For variable usages this will be all. +If we are processing a function call we also check the number and types +of the arguments and we warn for function values which are used from +functions that don't return a value. +For each function call we administrate if a function value is used, +ignored or voided. +.NH 3 +Check_usage() +.PP +Checks if the external definition and static definitions are used +correctly. +If a function or variable is defined but never used, +.I lint +warns, except for library definitions. +Functions, which return a value but whose value is always or +sometimes ignored, get a warning. +(A function value which is voided (cast to void) is not ignored, +but it isn't used either.) +.bp diff --git a/doc/lint/chap5 b/doc/lint/chap5 new file mode 100644 index 0000000..28c4f7c --- /dev/null +++ b/doc/lint/chap5 @@ -0,0 +1,107 @@ +.NH 1 +How to make lint shut up +.PP +It can be very annoying having +.I lint +warn about questionable constructs of which the programmer already is +aware. +There should be a mechanism to give +.I lint +some extra information in the source code. +This could be done by introducing some special keywords, which +would have a special meaning to +.I lint. +This is a bad solution, because these keywords would cause existing +C compilers not to work on these programs. +A neater solution is to invent some comments having a special +meaning to +.I lint. +We call these comments +.I pseudocomments. +The pseudocomments have no meaning to existing C compilers, so +compilers will not have to be rewritten for C programs containing +the previously proposed special keywords. +The following pseudocomments are recognized by +.I lint. +.LP +\f(CW/* VARARGS\fIn\fP */\fR +.br +.in 5 +The next function can be called with a variable number of arguments. +Only check the first \fIn\fP arguments. +The \fIn\fP must follow the word \f(CWVARARGS\fP immediately. +This pseudocomment is useful for functions like e.g. printf. +(The definition of the function printf should be preceded by +\f(CW/*\ VARARGS1\ */\fP.) +.in +.LP +\f(CW/* VARARGS */\fP +.br +.in 5 +Means the same as \f(CW/* VARARGS0 */\fP. +.in +.LP +\f(CW/* ARGSUSED */\fP +.br +.in 5 +Don't complain about unused arguments in the next function. +When we are developing a program we sometimes write functions of +which we do not yet use the arguments. +Because we do want to use +.I lint +on these programs, it is nice to have this pseudocomment. +.in +.LP +\f(CW/* NOTREACHED */\fP +.br +.in 5 +.I Lint +makes no attempt to discover functions which never return, +although it \fIis\fP possible to find functions that don't return. +This would require a transitive closure with respect to the already +known \fInot-returning\fP functions; an inacceptable time consuming +process. +To make +.I lint +aware of a function that doesn't return, a call of this function +should be followed by the pseudocomment \f(CW/*\ NOTREACHED\ */\fP. +This pseudocomment can also be used to indicate that some case part +inside a switch (especially a default part) can't be reached. +The above mentioned cases of use of this pseudocomment are +examples. +The comment can be used just to indicate that some part of the +program can't be reached. +It sometimes is necessary to introduce an extra compound statement +to get the right effect. +See figure 9. +.KF +.DS B +.ft CW + if (cond) + /* if part */ ; + else { + error(); /* doesn't return */ + /* NOTREACHED */ + } +/* Without the compound else part, lint would assume + * the statement after the if statement to be NOTREACHED, + * instead of the end of the else part. + */ +.I +.DE +.ce +figure\ 9. +.R +.KE +.in +.LP +\f(CW/* LINTLIBRARY */\fP +.br +.in 5 +All definitions following this comment are assumed to be library +definitions. +It shuts off complaints about unused functions and variables. +See also section 4.2.7 for how to use this comment for generating +lint libraries. +.in +.bp diff --git a/doc/lint/chap6 b/doc/lint/chap6 new file mode 100644 index 0000000..6ba75b3 --- /dev/null +++ b/doc/lint/chap6 @@ -0,0 +1,107 @@ +.NH 1 +User options +.PP +.I Lint +recognizes the following command line flags. +Some of them are identical to the flags of +.I cem. +.I Lint +warns for flags it doesn't know. +.LP +\f(CW-D +.br +-D=\fP +.br +.in 5 +Causes \f(CW\fP to be defined as a macro. +The first form is equivalent to `\f(CW-D=1\fP'. +The second form is equivalent to putting `\f(CW#define \fP' +in front of all the source files. +.in +.LP +\f(CW-U\fP +.br +.in 5 +Acts as if the line `\f(CW#undef \fP' is put in front of all +the source files. +.in +.LP +\f(CW-I\fP +.br +.in 5 +This puts \f(CW\fP in the include directory +list. +.in +.LP +\f(CW-R\fP +.br +.in 5 +Turn off the `strict' option. +Default +.I lint +checks the program according to the Reference Manual, because this +gives a definition of the language with which there is a better chance +of writing portable programs. +With this flag on, some constructs, otherwise not allowed, are +accepted. +.in +.LP +\f(CW-l +.br +-llib-l.ln +.br +-l\fP +.br +.in 5 +`\f(CW-l\fP' tells +.I lint +to search the lint library +\f(CWllib-l.ln\fP for missing +definitions of functions and variables. +The option `\f(CW-llib-l.ln\fP' makes +.I lint +search the lint library file \f(CWllib-l.ln\fP in the current +directory for missing definitions. +Default is `\f(CW-lc\fP'; this default can be suppressed by +`\f(CW-l\fP'. +.in +.LP +\f(CW-a\fP +.br +.in 5 +Warn for conversions from integer to long and vice versa. +.in +.LP +\f(CW-b\fP +.br +.in 5 +Don't report not-reachable break statements. +This flag is useful for running +.I lint +on a \fIlex\fP- or \fIyacc\fP-generated source file. +.in +.LP +\f(CW-h\fP +.br +.in 5 +Check for useless statements and possible pointer alignment problems. +.in +.LP +\f(CW-n\fP +.br +.in 5 +Don't complain about unused and undefined functions and variables. +.in +.LP +\f(CW-v\fP +.br +.in 5 +Don't warn about unused arguments of functions. +.in +.LP +\f(CW-x\fP +.br +.in 5 +Complain about unused external variables. +.in +.bp diff --git a/doc/lint/chap7 b/doc/lint/chap7 new file mode 100644 index 0000000..d224f1f --- /dev/null +++ b/doc/lint/chap7 @@ -0,0 +1,139 @@ +.NH +Ideas for further development +.PP +Although the program in its current state is a useful program, +there are still a lot of features that should be implemented +in following versions. +I'll summarize them in this section. +.IP \(bu +Actually the program consists of three passes. +The filter +.I sort +is a complete pass, just as the first and the second pass. +I think we speed up the program by removing the filter and making +the second pass accept an unsorted file. +The sorting process can be done in parallel to the first pass if +both processes communicate through a pipe. +In addition to this sorting, the second pass can generate already +some warnings. +(Warnings like \f(CW%s defined but never used\fP can only be +generated after having processed all the input.) +These warnings generated in parallel to the warnings of the first pass, +should be sent to an intermediate file, otherwise the warnings would +get messed up. +Such an improvement will have best effect on a multi processing +machine, but even on single processing machines this will give a better +performance. (On a single processing machine the pipe should be +replaced by an intermediate file.) +.IP \(bu +Expressions could be classified so +.I lint +can warn for some classes of expressions in strange contexts. +Suppose as class . +\f(CWb\fP Will be of class if e.g. \f(CWb\fP is assigned to +the expression \f(CW || \fP. +The following expression should then give a warning +.DS B +.ft CW +b + i; /* weird expression */ +.R +.DE +.IP \(bu +A mechanism to check printf like routines. +This mechanism should verify the format string against the following +arguments. +There is a public domain program that can be used to do this job. +It is called printfck and should be used as a filter between the +source files and +.I lint. +.IP \(bu +Raise warnings for incomplete initializer lists like +.DS B +.ft CW +int a[10] = {0, 1, 2}; +/* initializer list not complete */ +.R +.DE +.IP \(bu +Warnings for constructs like +.DS B +.ft CW +for (i = 0; i < 10; i++) { + . . . . + i--; + /* loop control variable affected */ + . . . . +} +.R +.DE +and +.DS B +.ft CW +while (var) { + /* statements in which the value + * of var is never changed + */ +} +/* loop control variable not updated */ +.R +.DE +.IP \(bu +A warning \f(CWbad layout\fP for program fragments like +.DS B +.ft CW +if (cond1) + if (cond2) + statement(); +else /* bad layout */ + statement(); +.R +.DE +.IP \(bu +A warning \f(CWassignment in conditional context\fP in case of +.DS B +.ft CW +if (a = b) +.R +.DE +.IP +The programmer probably meant \f(CWif (a == b)\fP. +No warning should be given for \f(CWif ((a = b) != c)\fP, +nor for \f(CWif ((a = b))\fP. +.IP \(bu +Warnings for empty statements in strange contexts, like +.DS B +.ft CW +if (cond); /* mistake */ + statement(); +.R +.DE +.IP +(This mistake would also be detected by a warning \f(CWbad layout\fP.) +.IP \(bu +A mechanism to prevent the warning \f(CWpossible pointer alignment +problem\fP for functions of which the programmer already knows that +no problem will arise. +E.g. for functions like malloc and family. +.IP \(bu +The current version of +.I lint +warns for conversions from long to int (if -a flag is +on). +It even warns if the programmer used the proper cast, as e.g. +.DS B +.ft CW +int i; +long l = 0L; + +i = (int)l; +.R +.DE +.IP +In this case I think +.I lint +need not warn. +The explicit cast indicates that the programmer knows what he is +doing. +This feature is not implemented because the expression tree doesn't +show if the cast was implicit or explicit. +.bp diff --git a/doc/lint/chap8 b/doc/lint/chap8 new file mode 100644 index 0000000..eac0ea6 --- /dev/null +++ b/doc/lint/chap8 @@ -0,0 +1,56 @@ +.NH 1 +Testing the program +.PP +There is no test-suite for testing +.I lint. +I have written a lot of small files that each test one +particular property of the program. +At this moment there are about 220 test programs. +.PP +It would take a lot of time and effort to run these tests by hand. +To ease this work I wrote a program that runs these tests +automatically. +The test program (the program that runs the tests) needs, associated +with each .c file, a .w file, containing from each expected warning +a substring. E.g. when the following warnings should be given by +.I lint: +.DS B +.ft CW + file t.c, line 3, i evaluation order undefined + file t.c, line 6, a set but not used in function main +.R +.DE +it is sufficient to write a file \f(CWt.w\fP containing +.DS B +.ft CW + a set but not used in function main + i evaluation order undefined +.R +.DE +The test program is called with all the .c files to be tested +as arguments. +.PP +Sometimes it is necessary to test +.I lint +on two files. +The test program runs +.I lint +on two files when two consecutive +arguments are of the form \fIname\fPa.c and \fIname\fPb.c. +It then compares the output of +.I lint +with the file \fIname\fP.w. +.PP +.I Lint +is also tested by running it on existing programs. +.I Lint +has been run on some \s-2UNIX\s+2 utility programs in +/usr/src/cmd, on Unipress Emacs (consisting of more than 30,000 +lines of code) and the program itself. +Bugs have been found in e.g. /usr/src/cmd/cat.c and +/usr/src/cmd/ld.c. +To test the robustness of the program, it was run on the +password file /etc/passwd and on `mixed' C program files. +These mixed C program files are C program files that were +broken in chunks and then put together in a different order. +.bp diff --git a/doc/lint/chap9 b/doc/lint/chap9 new file mode 100644 index 0000000..fca2bb6 --- /dev/null +++ b/doc/lint/chap9 @@ -0,0 +1,48 @@ +.NH 1 +References +.IP [1] +Dennis M. Ritchie, +.I +C Reference Manual, +.R +Bell Laboratories, +Murray Hill, +New Jersey, +1978. +.IP [2] +B.W. Kernighan and D.M. Ritchie, +.I +The C Programming Language, +.R +Prentice Hall, +1978. +.IP [3] +Eric H. Baalbergen, Dick Grune, Maarten Waage, +.I +The CEM Compiler, +.R +Manual IM-4, Vrije Universiteit, Amsterdam, +1985. +.IP [4] +Andrew S. Tanenbaum et al., +.I +A practical tool kit for making portable compilers, +.R +Comm. ACM, +Sep. 1983. +.IP [5] +S. C. Johnson, +.I +Lint, a C program verifier, +.R +Bell Laboratories, +Murray Hill, +New Jersey, +1978. +.IP [6] +Dick Grune, Ceriel J. H. Jacobs, +.I +A Programmer-friendly LL(1) Parser Generator, +.R +IR 127, Vrije Universiteit, Amsterdam, +1987. diff --git a/doc/lint/contents b/doc/lint/contents new file mode 100644 index 0000000..93538fe --- /dev/null +++ b/doc/lint/contents @@ -0,0 +1,59 @@ +.DS + + + + + + +.DE +.SH +Contents +.R +.sp 1 +.IP 1. +Introduction +.IP 2. +Outline of the program +.IP 3. +What lint checks +.RS +.IP 3.1 +Set, used and unused variables +.IP 3.2 +Flow of control +.IP 3.3 +Functions +.IP 3.4 +Undefined evaluation order +.IP 3.5 +Pointer alignment problems +.IP 3.6 +Libraries +.RE +.IP 4. +How lint checks +.RS +.IP 4.1 +The first pass data structure +.IP 4.2 +The first pass checking mechanism +.IP 4.3 +The second pass data structure +.IP 4.4 +The second pass checking mechanism +.RE +.IP 5. +How to make lint shut up +.IP 6. +User options +.IP 7. +Ideas for further development +.IP 8. +Testing the program +.IP 9. +References +.LP +Appendix A \- The warnings +.br +Appendix B \- The Ten Commandments for C programmers +.bp diff --git a/doc/lint/frontpage b/doc/lint/frontpage new file mode 100644 index 0000000..7ac2266 --- /dev/null +++ b/doc/lint/frontpage @@ -0,0 +1,14 @@ +.TL +.sp 5 +Lint, a C Program Checker +.AU +Frans Kunst +.AI +Vrije Universiteit +Amsterdam +.LP +.sp 8 +.ce +Afstudeer verslag +.ce +18 mei 1988 diff --git a/doc/lint/proto.make b/doc/lint/proto.make new file mode 100644 index 0000000..f139030 --- /dev/null +++ b/doc/lint/proto.make @@ -0,0 +1,24 @@ +# $Id: proto.make,v 1.2 1994/06/24 10:06:30 ceriel Exp $ + +#PARAMS do not remove this line! + +SRC_DIR = $(SRC_HOME)/doc/lint + +FP = $(SRC_DIR)/frontpage + +DOC = $(SRC_DIR)/abstract \ + $(SRC_DIR)/contents \ + $(SRC_DIR)/chap1 \ + $(SRC_DIR)/chap2 \ + $(SRC_DIR)/chap3 \ + $(SRC_DIR)/chap4 \ + $(SRC_DIR)/chap5 \ + $(SRC_DIR)/chap6 \ + $(SRC_DIR)/chap7 \ + $(SRC_DIR)/chap8 \ + $(SRC_DIR)/chap9 \ + $(SRC_DIR)/appendix_A \ + $(SRC_DIR)/appendix_B + +$(TARGET_HOME)/doc/lint.doc: $(FP) $(DOC) + cat $(FP) $(DOC) > $(TARGET_HOME)/doc/lint.doc diff --git a/doc/m2ref.doc b/doc/m2ref.doc new file mode 100644 index 0000000..9468e40 --- /dev/null +++ b/doc/m2ref.doc @@ -0,0 +1,545 @@ +.\" $Id: m2ref.doc,v 2.14 1994/06/24 10:02:03 ceriel Exp $ +.\" troff -ms m2ref.doc +.TL +The ACK Modula-2 Compiler +.AU +Ceriel J.H. Jacobs +.AI +Department of Mathematics and Computer Science +Vrije Universiteit +Amsterdam +The Netherlands +.AB no +.AE +.NH +Introduction +.PP +This document describes the implementation-specific features of the +ACK Modula-2 compiler. +It is not intended to teach Modula-2 programming. +For a description of the Modula-2 language, +the reader is referred to [1]. +.PP +The ACK Modula-2 compiler is currently available for use with the VAX, +Motorola MC68020, +Motorola MC68000, +PDP-11, +and Intel 8086 code-generators. +For the 8086, +MC68000, +and MC68020, +floating point emulation is used. +This is made available with the \fI-fp\fP +option, +which must be passed to \fIack\fP[4,5]. +.NH +The language implemented +.PP +This section discusses the deviations from the Modula-2 language as described +in the "Report on The Programming Language Modula-2", +as it appeared in [1], +from now on referred to as "the Report". +Also, +the Report sometimes leaves room for interpretation. +The section numbers +mentioned are the section numbers of the Report. +.NH 2 +Syntax (section 2) +.PP +The syntax recognized is that of the Report, +with some extensions to +also recognize the syntax of an earlier definition, +given in [2]. +Only one compilation unit per file is accepted. +.NH 2 +Vocabulary and Representation (section 3) +.PP +The input "\f(CW10..\fP" is parsed as two tokens: "\f(CW10\fP" and "\f(CW..\fP". +.PP +The empty string \f(CW""\fP has type +.DS +.ft CW +ARRAY [0 .. 0] OF CHAR +.ft P +.DE +and contains one character: \f(CW0C\fP. +.PP +When the text of a comment starts with a '\f(CW$\fP', +it may be a pragma. +Currently, +the following pragmas exist: +.DS +.ft CW +(*$F (F stands for Foreign) *) +(*$R[+|-] (Runtime checks, on or off, default on) *) +(*$A[+|-] (Array bound checks, on or off, default off) *) +(*$U (Allow for underscores within identifiers) *) +.ft P +.DE +The Foreign pragma is only meaningful in a \f(CWDEFINITION MODULE\fP, +and indicates that this +\f(CWDEFINITION MODULE\fP describes an interface to a module written in another +language (for instance C, +Pascal, +or EM). +Runtime checks that can be disabled are: +range checks, +\f(CWCARDINAL\fP overflow checks, +checks when assigning a \f(CWCARDINAL\fP to an \f(CWINTEGER\fP and vice versa, +and checks that \f(CWFOR\fP-loop control-variables are not changed +in the body of the loop. +Array bound checks can be enabled, +because many EM implementations do not +implement the array bound checking of the EM array instructions. +When enabled, +the compiler generates a check before generating an +EM array instruction. +Even when underscores are enabled, +they still may not start an identifier. +.PP +Constants of type \f(CWLONGINT\fP are integers with a suffix letter \f(CWD\fP +(for instance \f(CW1987D\fP). +Constants of type \f(CWLONGREAL\fP have suffix \f(CWD\fP if a scale factor is missing, +or have \f(CWD\fP in place of \f(CWE\fP in the scale factor (f.i. \f(CW1.0D\fP, +\f(CW0.314D1\fP). +This addition was made, +because there was no way to indicate long constants, +and also because the addition was made in Wirth's newest Modula-2 compiler. +.NH 2 +Declarations and scope rules (section 4) +.PP +Standard identifiers are considered to be predeclared, +and valid in all +parts of a program. +They are called \fIpervasive\fP. +Unfortunately, +the Report does not state how this pervasiveness is accomplished. +However, +page 87 of [1] states: "Standard identifiers are automatically +imported into all modules". +Our implementation therefore allows +redeclarations of standard identifiers within procedures, +but not within +modules. +.NH 2 +Constant expressions (section 5) +.PP +Each operand of a constant expression must be a constant: +a string, +a number, +a set, +an enumeration literal, +a qualifier denoting a +constant expression, +a type transfer with a constant argument, +or one of the standard procedures +\f(CWABS\fP, +\f(CWCAP\fP, +\f(CWCHR\fP, +\f(CWLONG\fP, +\f(CWMAX\fP, +\f(CWMIN\fP, +\f(CWODD\fP, +\f(CWORD\fP, +\f(CWSIZE\fP, +\f(CWSHORT\fP, +\f(CWTSIZE\fP, +or \f(CWVAL\fP, +with constant argument(s); +\f(CWTSIZE\fP and \f(CWSIZE\fP may also have a variable as argument. +.PP +Floating point expressions are never evaluated compile time, +because the compiler basically functions as a cross-compiler, +and thus cannot +use the floating point instructions of the machine on which it runs. +Also, +\f(CWMAX(REAL)\fP and \f(CWMIN(REAL)\fP are not allowed. +.NH 2 +Type declarations (section 6) +.NH 3 +Basic types (section 6.1) +.PP +The type \f(CWCHAR\fP includes the ASCII character set as a subset. +Values range from +\f(CW0C\fP to \f(CW377C\fP, +not from \f(CW0C\fP to \f(CW177C\fP. +.NH 3 +Enumerations (section 6.2) +.PP +The maximum number of enumeration literals in any one enumeration type +is \f(CWMAX(INTEGER)\fP. +.NH 3 +Record types (section 6.5) +.PP +The syntax of variant sections in [1] is different from the one in [2]. +Our implementation recognizes both, +giving a warning for the older one. +However, +see section 3. +.NH 3 +Set types (section 6.6) +.PP +The only limitation imposed by the compiler is that the base type of the +set must be a subrange type, +an enumeration type, +\f(CWCHAR\fP, +or \f(CWBOOLEAN\fP. +So, +the lower bound may be negative. +However, +if a negative lower bound is used, +the compiler gives a warning of the \fIrestricted\fP class (see the manual +page of the compiler). +.PP +The standard type \f(CWBITSET\fP is defined as +.DS +.ft CW +TYPE BITSET = SET OF [0 .. 8*SIZE(INTEGER)-1]; +.ft P +.DE +.NH 2 +Expressions (section 8) +.NH 3 +Operators (section 8.2) +.NH 4 +Arithmetic operators (section 8.2.1) +.PP +The Report does not specify the priority of the unary +operators \f(CW+\fP or \f(CW-\fP: +It does not specify whether +.DS +.ft CW +- 1 + 1 +.ft P +.DE +means +.DS +.ft CW +- (1 + 1) +.ft P +.DE +or +.DS +.ft CW +(-1) + 1 +.ft P +.DE +I have seen some compilers that implement the first alternative, +and others that implement the second. +Our compiler implements the second, +which is suggested by the fact that their priority is not specified, +which might indicate that it is the same as that of their binary counterparts. +And then the rule about left to right decides for the second. +On the other hand one might argue that, +since the grammar only allows for one unary operator in a simple expression, +it must apply to the whole simple expression, +not just the first term. +.NH 2 +Statements (section 9) +.NH 3 +Assignments (section 9.1) +.PP +The Report does not define the evaluation order in an assignment. +Our compiler certainly chooses an evaluation order, +but it is explicitly left undefined. +Therefore, +programs that depend on it may cease to work later. +.PP +The types \f(CWINTEGER\fP and \f(CWCARDINAL\fP are assignment-compatible with +\f(CWLONGINT\fP, +and \f(CWREAL\fP is assignment-compatible with \f(CWLONGREAL\fP. +.NH 3 +Case statements (section 9.5) +.PP +The size of the type of the case-expression must be less than or equal to +the word-size. +.PP +The Report does not specify what happens if the value of the case-expression +does not occur as a label of any case, +and there is no \f(CWELSE\fP-part. +In our implementation, +this results in a runtime error. +.NH 3 +For statements (section 9.8) +.PP +The Report does not specify the legal types for a control variable. +Our implementation allows the basic types (except \f(CWREAL\fP), +enumeration types, +and subranges. +A runtime warning is generated when the value of the control variable +is changed by the statement sequence that forms the body of the loop, +unless runtime checking is disabled. +.NH 3 +Return and exit statements (section 9.11) +.PP +The Report does not specify which result-types are legal. +Our implementation allows any result type. +.NH 2 +Procedure declarations (section 10) +.PP +Function procedures must exit through a RETURN statement, +or a runtime error occurs. +.NH 3 +Standard procedures (section 10.2) +.PP +Our implementation supports \f(CWNEW\fP and \f(CWDISPOSE\fP +for backwards compatibility, +but issues warnings for their use. +However, +see section 3. +.PP +Also, +some new standard procedures were added, +similar to the new standard procedures in Wirth's newest compiler: +.IP \- +\f(CWLONG\fP converts an argument of type \f(CWINTEGER\fP or \f(CWREAL\fP to the +types \f(CWLONGINT\fP or \f(CWLONGREAL\fP. +.IP \- +\f(CWSHORT\fP performs the inverse transformation, +without range checks. +.IP \- +\f(CWFLOATD\fP is analogous to \f(CWFLOAT\fP, +but yields a result of type +\f(CWLONGREAL\fP. +.IP \- +\f(CWTRUNCD\fP is analogous to \f(CWTRUNC\fP, +but yields a result of type +\f(CWLONGINT\fP. +.NH 2 +System-dependent facilities (section 12) +.PP +The type \f(CWBYTE\fP is added to the \f(CWSYSTEM\fP module. +It occupies a storage unit of 8 bits. +\f(CWARRAY OF BYTE\fP has a similar effect to \f(CWARRAY OF WORD\fP, +but is safer. +In some obscure cases the \f(CWARRAY OF WORD\fP mechanism does not quite +work properly. +.PP +The procedure \f(CWIOTRANSFER\fP is not implemented. +.NH 1 +Backwards compatibility +.PP +Besides recognizing the language as described in [1], +the compiler recognizes most of the language described in [2], +for backwards compatibility. +It warns the user for old-fashioned +constructions (constructions that [1] does not allow). +If the \fI-Rm2-3\fP option (see [6]) is passed to \fIack\fP, +this backwards compatibility feature is disabled. +Also, +it may not be present on some +smaller machines, +like the PDP-11. +.NH 1 +Compile time errors +.PP +The compile time error messages are intended to be self-explanatory, +and not listed here. +The compiler also sometimes issues warnings, +recognizable by a warning-classification between parentheses. +Currently, +there are 3 classifications: +.IP "(old-fashioned use)" +.br +These warnings are given on constructions that are not allowed by [1], +but are allowed by [2]. +.IP (strict) +.br +These warnings are given on constructions that are supported by the +ACK Modula-2 compiler, +but might not be supported by others. +Examples: functions returning structured types, +SET types of subranges with +negative lower bound. +.IP (warning) +.br +The other warnings, +such as warnings about variables that are never assigned, +never used, +etc. +.NH 1 +Runtime errors +.PP +The ACK Modula-2 compiler produces code for an EM machine as defined in [3]. +Therefore, +it depends on the implementation +of the EM machine for detection some of the runtime errors that could occur. +.PP +The \fITraps\fP module enables the user to install his own runtime +error handler. +The default one just displays what happened and exits. +Basically, +a trap handler is just a procedure that takes an INTEGER as +parameter. +The INTEGER is the trap number. +This INTEGER can be one of the +EM trap numbers, +listed in [3], +or one of the numbers listed in the +\fITraps\fP definition module. +.PP +The following runtime errors may occur: +.IP "array bound error" +.br +The detection of this error depends on the EM implementation. +.IP "range bound error" +.br +Range bound errors are always detected, +unless runtime checks are disabled. +.IP "set bound error" +.br +The detection of this error depends on the EM implementation. +The current implementations detect this error. +.IP "integer overflow" +.br +The detection of this error depends on the EM implementation. +.IP "cardinal overflow" +.br +This error is detected, +unless runtime checks are disabled. +.IP "cardinal underflow" +.br +This error is detected, +unless runtime checks are disabled. +.IP "real overflow" +.br +The detection of this error depends on the EM implementation. +.IP "real underflow" +.br +The detection of this error depends on the EM implementation. +.IP "divide by 0" +.br +The detection of this error depends on the EM implementation. +.IP "divide by 0.0" +.br +The detection of this error depends on the EM implementation. +.IP "undefined integer" +.br +The detection of this error depends on the EM implementation. +.IP "undefined real" +.br +The detection of this error depends on the EM implementation. +.IP "conversion error" +.br +This error occurs when assigning a negative value of type INTEGER to a +variable of type CARDINAL, +or when assigning a value of CARDINAL that is > MAX(INTEGER), +to a variable of type INTEGER. +It is detected, +unless runtime checking is disabled. +.IP "stack overflow" +.br +The detection of this error depends on the EM implementation. +.IP "heap overflow" +.br +The detection of this error depends on the EM implementation. +Might happen when ALLOCATE fails. +.IP "case error" +.br +This error occurs when non of the cases in a CASE statement are selected, +and the CASE statement has no ELSE part. +The detection of this error depends on the EM implementation. +All current EM implementations detect this error. +.IP "stack size of process too large" +.br +This is most likely to happen if the reserved space for a coroutine stack +is too small. +In this case, +increase the size of the area given to +\f(CWNEWPROCESS\fP. +It can also happen if the stack needed for the main +process is too large and there are coroutines. +In this case, +the only fix is to reduce the stack size needed by the main process, +f.i. by avoiding local arrays. +.IP "too many nested traps + handlers" +.br +This error can only occur when the user has installed his own trap handler. +It means that during execution of the trap handler another trap has occurred, +and that several times. +In some cases, +this is an error because of overflow of some internal tables. +.IP "no RETURN from function procedure" +.br +This error occurs when a function procedure does not return properly +("falls" through). +.IP "illegal instruction" +.br +This error might occur when floating point operations are used on an +implementation that does not have floating point. +.PP +In addition, +some of the library modules may give error messages. +The \fBTraps\fP-module has a suitable mechanism for this. +.NH 1 +Calling the compiler +.PP +See [4,5,6] for a detailed explanation. +.PP +The compiler itself has no version checking mechanism. +A special linker +would be needed to do that. +Therefore, +a makefile generator is included [7]. +.NH 1 +The procedure call interface +.PP +Parameters are pushed on the stack in reversed order, +so that the EM AB +(argument base) register indicates the first parameter. +For VAR parameters, +its address is passed, +for value parameters its value. +The only exception to this rule is with conformant arrays. +For conformant arrays, +the address is passed, +and an array descriptor is +passed. +The descriptor is an EM array descriptor. +It consists of three +fields: the lower bound (always 0), +upper bound - lower bound, +and the size of the elements. +The descriptor is pushed first. +If the parameter is a value parameter, +the called routine must make sure +that its value is never changed, +for instance by making its own copy +of the array. +The Modula-2 compiler does exactly this. +.PP +When the size of the return value of a function procedure is larger than +the maximum of \f(CWSIZE(LONGREAL)\fP and twice the pointer-size, +the caller reserves this space on the stack, +above the parameters. +Callee then stores +its result there, +and returns no other value. +.NH 1 +References +.IP [1] +Niklaus Wirth, +.I +Programming in Modula-2, third, corrected edition, +.R +Springer-Verlag, Berlin (1985) +.IP [2] +Niklaus Wirth, +.I +Programming in Modula-2, +.R +Stringer-Verlag, Berlin (1983) +.IP [3] +A.S.Tanenbaum, J.W.Stevenson, Hans van Staveren, E.G.Keizer, +.I +Description of a machine architecture for use with block structured languages, +.R +Informatica rapport IR-81, Vrije Universiteit, Amsterdam +.IP [4] +UNIX manual \fIack\fP(1) +.IP [5] +UNIX manual \fImodula-2\fP(1) +.IP [6] +UNIX manual \fIem_m2\fP(6) +.IP [7] +UNIX manual \fIm2mm\fP(1) diff --git a/doc/m68020.doc b/doc/m68020.doc new file mode 100644 index 0000000..daeb464 --- /dev/null +++ b/doc/m68020.doc @@ -0,0 +1,1408 @@ +.nr PS 11 +.nr VS 13p +.EQ +delim @@ +.EN +.EQ +gfont R +.EN +.ND +.RP +.TL +A back end table for the Motorola MC68000, MC68010 and MC68020 microprocessors +.AU +Frank Doodeman +.AB +A back end table is part of the Amsterdam Compiler Kit (ACK). It is used +to produce the actual back end, a program that translates the intermediate +language family EM to assembly language for some target machine. The table +discussed here can be used for two back ends, suitable for in total three +machines: the MC68000 and MC68010 (the difference between these two is +so small that one back end table can be used for either one), or +for the MC68020. +.AE +.NH +Introduction +.PP +To simplify the task of producing portable (cross) compilers and interpreters +the Vrije Universiteit designed an integrated collection of programs, the +Amsterdam Compiler Kit (ACK) [2]. It is based on the old UNCOL idea [1] which +attempts to solve the problem of how to make a compiler for each of @ N @ +languages on @ M @ different machines without having to write @ N times M @ +programs. +.PP +The UNCOL approach is to write @ N @ +.I +front ends, +.R +which translate the +source language into a common intermediate language UNCOL (Universal Computer +Oriented Language), and @ M @ +.I +back ends, +.R +each of which translates programs in +UNCOL into a specific machine language. Under these conditions only @ M + N @ +programs must be written to provide all @ N @ languages on all @ M @ +machines, instead of @ M times N @ programs. +.PP +The intermediate language for the Amsterdam Compiler Kit is the machine language +for a simple stack machine called EM (Encoding Machine) [3]. So a back end for +the MC68020 translates EM code into MC68020 assembly language. Writing such a +table [4] suffices to get the back end. +.PP +The back end is a single program that is driven by a machine dependent driving +table. This table, the back end table, defines the mapping of EM code to +the MC68000, MC68010 or MC68020 assembly language. +.NH +The MC68000 and MC68020 micro processors +.PP +In this document the name MC68000 will be used for both the MC68000 and the +MC68010 micro processors, because as far as the back end table is concerned +there is no difference between them. For a complete and detailed description +of the MC68020 one is referred to [5]; for the MC68000 one might also use [6]. +In this section some relevant parts will be handled. +.NH 2 +Registers +.PP +Both the MC68000 and the MC68020 have eight 32-bit data registers (@ D sub 0 @-@ D sub 7 @) that can +be used for byte (8-bit), word (16-bit) and long word (32-bit) data operations. +They also have seven 32-bit address registers (@ A sub 0 @-@ A sub 6 @) that may be used as +software stack pointers and base address registers; address register @ A sub 7 @ is +used as the system stack pointer. Address registers may also be used for +word and long word address operations. +.NH 2 +Addressing modes +.PP +First the MC68000 addressing modes will be discussed. Since the MC68020's +set of addressing modes is an extension of the MC68000's set, of course this +section also applies to the MC68020. +.PP +In the description we use: +.IP @ A sub n @ +for address register; +.IP @ D sub n @ +for data register; +.IP @ R sub n @ +for address or data register; +.IP @ X sub n @ +for index register (either data or address register); +.IP @ PC @ +for program counter; +.IP @ d sub 8 @ +for 8 bit displacement integer; +.IP @ d sub 16 @ +for 16 bit displacement integer; +.IP @ bd @ +for base displacement (may be null, word or long); +.IP @ od @ +for outer displacement (may be null, word or long). +.NH 3 +General addressing modes +.NH 4 +Register Direct Addressing +.IP Syntax: 8 +@ R sub n @ +.PP +This addressing mode (it can be used with either a data register or an address +register) specifies that the operand is in one of +the 16 multifunction registers. +.NH 4 +Address Register Indirect +.IP Syntax: 8 +@ ( A sub n ) @ +.PP +The address of the operand is in the address register specified. +.NH 4 +Address Register Indirect With Postincrement +.IP Syntax: 8 +@ ( A sub n )+ @ +.PP +The address of the operand is in the address register specified. After the +operand address is used, the address register is incremented by one, two or +four depending upon whether the size of the operand is byte, word or long. +If the address register is the stack pointer and the operand size is byte, the +address register is incremented by two rather than one to keep the stack pointer +on a word boundary. +.NH 4 +Address Register Indirect With Predecrement +.IP Syntax: 8 +@ -( A sub n ) @ +.PP +The address of the operand is in the address register specified. Before the +operand address is used, the address register is decremented by one, two or +four depending upon whether the size of the operand is byte, word or long. +If the address register is the stack pointer and the operand size is byte, the +address register is decremented by two rather than one to keep the stack pointer +on a word boundary. +.NH 4 +Address Register Indirect With Displacement +.IP Syntax: 8 +@ d sub 16 ( A sub n ) @ for the MC68000, @ ( d sub 16 , A sub n ) @ for the MC68020 +.PP +This address mode requires one word of extension. The address of the operand is +the sum of the contents of the address register and the sign extended 16-bit +integer in the extension word. +.NH 4 +Address Register Indirect With Index +.IP Syntax: 8 +@ d sub 8 ( A sub n , X sub n .size) @ for the MC68000, @ ( d sub 8 , A sub n , X sub n .size) @ for the MC68020 +.PP +This address mode requires one word of extension according to a certain format, +which specifies +.IP 1. +which register to use as index register; +.IP 2. +a flag that indicates whether the index register is a data register or an +address register; +.IP 3. +a flag that indicates the index size; this is +.I word +when the low order part of the index register is to be used, and +.I long +when the whole long value in the register is to be used as index; +.IP 4. +an 8-bit displacement integer (the low order byte of the extension word). +.PP +The address of the operand is the sum of the contents of the address register, +the possibly sign extended contents of index register and the sign +extended 8-bit displacement. +.NH 4 +Absolute Data Addressing +.IP Syntax: 8 +@ address @ for the MC68000, @ ( address ) @ for the MC68020 +.PP +Two different kinds of this mode are available: +.IP 1. +Absolute Short Address; this mode requires one word of extension. The address of +the operand is the sign extended 16-bit extension word. +.IP 2. +Absolute Long Address; this mode requires two words of extension. The address of +the operand is developed by concatenation of the two extension words; the high +order part of the address is the first extension word, the low order part is +the second. +.NH 4 +Program Counter With Displacement. +.IP Syntax: 8 +@ d sub 16 ( PC ) @ for the MC68000, @ ( d sub 16 , PC ) @ for the MC68020 +.PP +This mode requires one word of extension. The address of the operand is the sum +of the address in the program counter and the sign extended 16-bit displacement +integer in the extension word. The value in the program counter is the +address of the extension word. +.NH 4 +Program Counter With Index +.IP Syntax: 8 +@ d sub 8 ( PC , X sub n .size ) @ for the MC68000, @ ( d sub 8 , PC, X sub n .size ) @ for the MC68020 +.PP +This mode requires one word of extension as described under +.I +Address Register Indirect With Index. +.R +The address of the operand is the sum of the value in the +program counter, the possibly sign extended index register and the sign +extended 8-bit displacement integer in the extension word. +The value in the program counter is the address of the extension word. +.NH 4 +Immediate Data +.IP Syntax: 8 +@ "\#data" @ +.PP +This addressing mode requires either one or two words of extension, depending +on the size of the operation; +.IP +byte operation - the operand is in the low order byte of extension word; +.IP +word operation - the operand is in the extension word; +.IP +long operation - the operand is in the two extension words, the high order +16-bits are in the first extension word, the low order 16-bits in the second. +.NH 3 +Extra MC68020 addressing modes +.PP +The MC68020 has three more addressing modes. These modes all use a displacement +(some even two), an address register and an index register. Instead of the +address register one may also use the program counter. Any of these +may be omitted. If all addends are omitted the processor creates an +effective address of zero. All of these three modes require at least one +extension word, the +.I +Full Format Extension Word, +.R +which specifies: +.IP 1. +the index register number (0-7); +.IP 2. +the index register type (address or data register); +.IP 3. +the size of the index (only low order part or the whole register) +.IP 4. +a scale factor. This is a number from 0 to 3 which specifies how many bits +the contents of the index register is to be shifted to the left before being +used as an index; +.IP 5. +a flag that specifies whether the base (address) register is to be added or +to be suppressed; +.IP 6. +a flag that specifies whether to add or suppress the index operand; +.IP 7. +two bits that specify the size of the base displacement (null, word or long); +.IP 8. +three bits that in combination with (6) above specify which of the three +addressing modes (described below) to use and, if used, the size of the +outer displacement (null, word or long). +.IP N.B. +All modes mentioned above for the MC68000 +that use an index register may have this register +scaled (only when using the MC68020). +.PP +The three extra addressing modes are: +.NH 4 +Address Register Indirect With Index (Base Displacement) +.IP Syntax: 8 +@ ( bd , A sub n , X sub n .size*scale ) @ (MC68020 only) +.PP +The address of the operand is the sum of the contents of the address register, +the scaled contents of the possibly scaled index register and the possibly +sign extended base displacement. When the program counter is used instead +of the address register, the value in the program counter is the address +of the full format extension word. This mode requires one or two more extension +words when the size of the base displacement is word or long respectively. +.PP +Note that without the index operand, this mode is an extension of the +.I +Address Register Indirect With Displacement +.R +mode; when using the MC68020 one is no longer limited to a 16-bit displacement. +Also note that with the index operand added, this mode is an extension +of the +.I +Address Register Indirect With Index +.R +mode; when using the MC68020 one is no longer limited to an 8-bit displacement. +.NH 4 +Memory Indirect Post-Indexed +.IP Syntax: 8 +@ ( [ bd , A sub n ] , X sub n .size*scale , od ) @ (MC68020 only) +.PP +This mode may use an outer displacement. First an intermediate memory +address is calculated by adding the contents of the address register and +the possibly sign extended base displacement. This address is used +for in indirect memory access of a long word, followed by adding +the index operand (scaled and possibly signed extended). Finally the +outer displacement is added to yield the address of the operand. +When the program counter is used, the value in the program counter is the +address of the full format extension word. +.NH 4 +Memory Indirect Pre-Indexed +.IP Syntax: 8 +@ ( [ bd , A sub n , X sub n .size*scale ] , od ) @ (MC68020 only) +.PP +This mode may use an outer displacement. First an intermediate memory +address is calculated by adding the contents of the address register, +the scaled contents of the possibly sign extended index register and +the possibly sign extended base displacement. This address is used +for an indirect memory access of a long word, followed by adding +the outer displacement to yield the address of the operand. +When the program counter is used, the value in the program counter is the +address of the full format extension word. +.NH 3 +Addressing modes used in the table +.PP +Not all addressing modes mentioned above are used in code generation. It is +clear that none of the modes that use the program counter PC can be used, +since at code generation time nothing is known about the value in PC. +Also some of the possibilities of the three MC68020 addressing modes are not +used; e.g. it is possible to use a +.I +Data Register Indirect +.R +mode, which actually is the +.I +Address Register Indirect With Index +.R +mode, with the address register and the displacement left out. However +such a mode would require two extra bytes for the full format extension word, +and it would also be much slower than using +.I +Address Register Indirect. +.R +For this kind of reasons several possible addressing modes are not used in the +generation of code. +In the table address registers are only used for holding addresses, and +for index registers only data registers are used. +.NH +The M68000 and MC68020 back end table +.PP +The table itself has to be run through the C preprocessor +before it can be used to generate +the back end (called +.I +code generator +.R +or +.I cg +for short). When no flags are given to +the preprocessor an MC68020 code generator is produced; for the MC68000 +code generator one has to run the table through the preprocessor using the +.I -Dm68k4 +flag. +.PP +The table is designed as described in [4]. For the overall design of a back +end table one is referred to this document. This section only deals +with problems encountered in writing the table and other things worth noting. +.NH 2 +Constant Definitions +.PP +Wordsize and pointersize (EM_WSIZE and EM_PSIZE respectively) are defined +as four (bytes). EM_BSIZE, the hole between AB (the parameter base) and +LB (the local base), is eight bytes: only +the return address and the localbase are saved. +.NH 2 +Properties +.PP +Since Hans van Staveren in his document [4] clearly states that +.I cg +execution time is negatively influenced by the number of properties, only +four different properties have been defined. Besides, since the registers +really are multifunctional, these four are really all that are needed. +.NH 2 +Registers +.PP +The table uses register variables: @ D sub 3 @ - @ D sub 7 @ are used as general register +variables, and address registers @ A sub 2 @ - @ A sub 5 @ are used as pointer register +variables. @ A sub 6 @ is reserved for the localbase. +.NH 2 +Tokens +.PP +At first glance one might wonder about the amount of tokens, especially +for the MC68020, considering the small amount of different addressing modes. +However, the last three addressing modes mentioned for the MC68020 may +omit any of the addends, and this leads to a large amount of different tokens. +I did consider the possibility of enlarging the number of tokens and sets +even further, because there might be assemblers that don't handle displacements +of zero optimally (they might generate a 2 byte extension word holding zero). +The small profit in bytes in the generated code +however does not justify the increase +in size of the token section, the set section and the patterns section, +so this idea was not developed any further. +.PP +The timing cost of the tokens may be incorrect for some MC68000 tokens. +This is because the MC68000 uses a 16-bit data bus which causes the need +of two separate memory accesses for getting 32-bit operands. +.NH 3 +Token names +.PP +The amount of tokens and the limited capability of the authors imagination +might have caused the names of some tokens not to be very clarifying. +Some information about the names may be in place here. +.PP +Whenever part of a token name is in capitals that part is memory indirected +(i.e. in square brackets). In token names +.I OFF +and +.I off +mean an offsetted address register, so an address register with a displacement +(either base displacement or outer displacement). +.I +IND, ind +.R +and +.I index +stand for indexed, or index register. +.I ABS +and +.I abs +stand for absolute, which actually is just a displacement (base or outer). +These `rules' only apply to names of tokens that represent actual operands. +There are also tokens that represent addresses of operands. These +(with a few exceptions) contain +.I +regA, regX +.R +and +.I con +as parts of there names, which stand for address register, index register and +displacement (always base displacement) respectively. If the address to which +the token refers uses memory indirection, that part of the name comes first +(in small letters), followed by an underscore. The memory indirection part +follows the `rules' for operand token names. +.PP +Of course there are exceptions to these `rules' but in those cases the names +are self explanatory. +.PP +Two special cases: +.I ext_regX +is the name of the token that represents the +address of an absolute indexed operand, syntax @ ( bd , X sub n .size*scale ) @; +.I regX +does not represent any real mode, but is used with EM array instructions and +pointer arithmetic. +.NH 3 +Special tokens for the MC68000 +.PP +The MC68000 requires two extra tokens, which are called +.I t_regAcon +and +.I +t_regAregXcon. +.R +They are necessary because +.I regAcon +can only have a 16-bit displacement on the MC68000, and +.I regAregXcon +uses only 8 bits for its displacement. To prevent these addressing modes to +be used with displacements that are too large, the extra tokens are needed. +Whenever the displacements become too large and they need +to be used in the generation +of assembly code, these tokens are transformed into other tokens. +To prevent the table from becoming too messy I defined +.I t_regAcon +and +.I t_regAregXcon +to be identical to +.I regAcon +and +.I regAregXcon +respectively for the MC68020. +.NH 2 +Sets +.PP +Most set names used in the table are self explanatory, especially to the reader +who is familiar with the four addressing categories as mentioned in [5]: +.I +data, memory, alterable +.R +and +.I +control. +.R +In the sets definition part some sets are defined that are not used elsewhere in +the table, but are only used to be part of the definition of +some other set. This keeps the +set definition part from getting too unreadable. +.PP +The sets called +.I imm_cmp +consist of all tokens that can be used to compare with a constant. +.NH 2 +Instructions +.PP +Only the instructions that are used in code generation are listed here. +The first few instructions are meant especially for the use with register +variables. The operand LOCAL used here refers to a register variable. +The reader may not conclude that these operations are also allowed on +ordinary locals. The space and timing cost of these instructions have been +adapted, but the use of the word LOCAL for register variables causes these cost +to be inaccurate anyway. +.PP +The +.I killreg +instruction, which generates a comment in the assembly language output and +which is meant to let +.I cg +know that the data register operand has its contents destroyed, +needs some explaining but this explanation is better in place +in the discussion of groups 3 and 4 of the section about patterns. +.PP +The timing cost of the instructions are probably not very accurate for the +MC68020 because the MC68020 uses an instruction cache and prefetch. The +cost used in the table are the `worst case cost' as mentioned in section 9 +of [5]. +.NH 2 +Moves +.PP +These are all pretty straightforward, except perhaps when +.I t_regAcon +and +.I t_regAregXcon +are used. In these cases the size of the displacement has to be checked +before moving. This also applies to the stacking rules and the coercions. +.NH 2 +Tests +.PP +These three tests (one fore each operation size) could not be more +straightforward than they are now. +.NH 2 +Stackingrules +.PP +The only peculiar stackingrule is the one for +.I +regX. +.R +This token is only used with EM array instructions and +with pointer arithmetic. Whenever it is put +on the fake stack, some EM instructions are left in the instruction stream +to remove this token. Consequently it should never have to be stacked. However +the +.I +code generator generator +.R +(or +.I cgg +for short) +complained about not having a stackingrule for this token, so it had to +be added nevertheless. +.NH 2 +Coercions +.PP +These are all straightforward. There are no splitting coercions since +the fake stack never contains any tokens that can be split. +There are only two unstacking coercions. +The rest are all transforming coercions. Almost all coercions transform +tokens into either a data register or an address register, except in the +MC68000 part of the table the +.I t_regAcon +and +.I t_regAregXcon +tokens are transformed into real +.I regAcon +and +.I regAregXcon +tokens with displacements that are properly sized. +.NH 2 +Patterns +.PP +This is the largest part of the table. It is subdivided into 17 groups. +We will take a closer look at the more interesting groups. +.NH 3 +Group 0: rules for register variables +.PP +This group makes sure that EM instructions using register variables are +handled efficiently. This group includes: local loads and +stores; arithmetic, shifts and logical operations on locals and indirect locals +and pointer handling, where C expressions like +.I +*cp++ +.R +are handled. For such an expression there are several EM instruction +sequences the front end might generate. For an integer pointer e.g.: +.DS +.B +lol lol adp stl loi $1==$2 && $1==$4 && $3==4 && $5==4 +.I +.DE +or +.DS +.B +lol loi lol adp stl $1==$3 && $3==$5 && $2==4 && $5==4 +.I +.DE +or perhaps even +.DS +.B +lil lol adp stl $1==$2 && $2==$4 && $3==4 +.I +.DE +Each of these is included, since which one is generated is is up to the front +end. If the front end is consistent this will mean that some of these patterns +will never be used in code generation. This might seem a waist, but anyone +who thinks that will certainly change his mind when his new C front end +generates a different EM instruction sequence. +.NH 3 +Groups 1 and 2: load and store instructions +.PP +In these groups +.B lof +and +.B stf +, +.B loi +and +.B sti +, +.B ldf +and +.B sdf +are the important instructions. +These are the large parts in this group, especially the +.B loi +and +.B sti +instructions, because they come in three basic sizes (byte, word and long). +Note that with these instructions in the MC68000 part the +.I exact +is omitted in front of +.I regAcon +and +.I +regAregXcon. +.R +This makes sure that +.I t_regAcon +and +.I t_regAregXcon +are transformed into proper tokens before they are used as addresses. +.PP +Also note that the +.I regAregXcon +token is completely left out from the +\fBlof\fR, \fBstf\fR, \fBldf\fR and \fBsdf\fR +instruction handling. This is because the sum of the token displacement +and the offset provided in the instruction cannot be checked and is likely +to exceed 8 bits. Unfortunately +.I cgg +does not allow the inspection of subregisters of tokens that are on the +fake stack. This same problem might also occur with the +.I regAcon +token, but this is less likely because it +uses 16-bit displacements. Besides if it would have been left out the +\fBlof\fR, \fBstf\fR, \fBldf\fR and \fBsdf\fR +instructions would have been handled considerably less efficient. +.NH 3 +Groups 3 and 4: integer and unsigned arithmetic +.PP +EM instruction +.B sbi +also works with address registers, because the +.B cmp +instruction in group 12 is replaced by \fBsbi 4\fR. +.PP +For the MC68000 \fBmli\fR, \fBmlu\fR, \fBdvi\fR, \fBdvu\fR, \fBrmi\fR +and \fBrmu\fR are handled +by library routines. This is because the MC68000 has only 16-bit multiplications +and divisions. +.PP +The MC68020 does have 32-bit multiplications and divisions, but for the +.B rmi +and +.B rmu +EM instructions peculiar things happen anyway: they generate the +.I killreg +instruction. This is necessary because the data register that +first held the dividend now holds the quotient; the original contents are +destroyed without +.I cg +knowing about it (the destruction of the two registers that make up the +.I DREG_pair +token couldn't be noted in the instructions part of the table). +To let +.I cg +know that these contents are destroyed, we have to use this `pseudo instruction' +from lack of a better solution. +.NH 3 +Group 5: floating point arithmetic +.PP +Since floating point arithmetic is not implemented traps will be generated here. +.NH 3 +Group 6: pointer arithmetic +.PP +This also is a very important group, along with groups 1 and 2. The MC68020 +has many different addressing modes and if possible they should be used in +the generation of assembly language. +.PP +The +.I regX +token is generated here too. It is meant to make efficient use of the +MC68020 possibility of scaling index registers. +.PP +Note that I would have liked one extra pattern to handle C-statements +like +.DS +.I +pointer += expr ? constant1 : constant2; +.R +.DE +efficiently. This pattern would have looked like: +.DS +pat ads +with const +leaving adp %1.num +.DE +but when +.I cg +is coming to the EM replacement part, the constant has already been removed +from the fake stack, causing +.I %1.num +to have a wrong value. +.NH 3 +Group 9: logical instructions +.PP +The EM instructions \fBand\fR, +.B ior +and +.B xor +are so much alike that procedures can be used here, except for the +.B +xor $1==4 +.R +instruction, because the MC68000 +.I eor +instruction does not allow as many kinds of operands as +.I and +and +.I +or. +.R +.NH 3 +Group 11: arrays +.PP +This group also tries to make efficient use of the available addressing modes, +but it leaves the actual work to group 6 mentioned above. +.PP +The +.I regX +token is also generated here. In this group this token is very useful for +handling array instructions for arrays with one, two, four or eight byte +elements; the array index goes into the index register, which can then +be scaled appropriately. An offset is used when the +first array element has an index other than zero. +.PP +I would have liked some extra patterns here too but they won't work +for the same reasons as explained in the discussion of group 6. +.NH 3 +Group 14: procedure calls instructions +.PP +The function return area consists of registers @ D sub 0 @ and @ D sub 1 @. +.NH 3 +Group 15: miscellaneous instructions +.PP +In many cases here library routines are called. These will be discussed +later. +.PP +Two special EM instructions are included here: \fBdch\fR, and \fBlpb\fR. +I don't know when they are generated by a front end, but these +instructions were also in the back end table for the PDP. In the PDP table +these instructions were replaced by +.B +loi 4 +.R +and +.B +adp 8 +.R +respectively. I included them both, since they couldn't do any harm. +.NH 3 +Extra group: optimalization +.PP +This group is handling EM patterns with more than one instruction. This group +is not absolutely necessary but it makes the generation of code +more efficient. Among the things that are handled here are: arithmetic and +logical operations on locals, externals and indirect locals; shifting +of locals, externals and indirect locals by one; some pointer arithmetic; tests +in combination with logical and's and or's or with branches. Finally +there are sixteen patterns about divisions that could be handled more +efficiently by right shifts and which I think should be handled by the +peephole optimizer (since it also handles +the same patterns with multiplication). +.NH +The library routines +.PP +The table is supplied with two separate libraries: one for the MC68000 and one +for the MC68020. The MC68000 uses a couple more routines than the MC68020 +because it doesn't have 32-bit division and multiplication. +.PP +The routines that need to pop their operands first store their return address. +Routines that need other register besides @ D sub 0 @-@ D sub 2 @ and @ A sub 0 @-@ A sub 1 @ first store +the original contents of those registers. @ D sub 0 @-@ D sub 2 @ and @ A sub 0 @-@ A sub 1 @ do not have +to be saved because if they contain anything useful, their contents +are pushed on the stack before the routine is called. +.PP +The +.I .trp +routine just prints a message stating the trap number and exits (except +of course when that particular trap number is masked). Usually higher +level languages use their own trap handling routines. +.PP +The +.I .mon +routine doesn't do anything useful at all. It just prints a message stating that +the specified system call is not implemented and then exits. Front ends +usually generate calls to special routines rather than the EM +instruction \fBmon\fR. +These routines have to be supplied in another library. They +may be system dependent (e.g. the MC68000 machine this table was tested on +first moves the parameters to registers, then moves the system call number +to @ D sub 0 @ and then executes +.I +trap #0, +.R +whereas the MC68020 machine this table was tested on required the parameters +to be on the stack rather than in registers). Therefor this library is not +discussed here. +.PP +The +.I .printf +routine is included for EM diagnostic messages. It can print strings using %s, +16-bit decimal numbers using %d and 32-bit hexadecimal numbers using %x. +.PP +The +.I .strhp +routine stores a new EM heap pointer, and sometimes it needs to allocate more +heap space. This is done by calling the system call routine \fI_brk\fR. +Chunks of 1K bytes are allocated, but this can easily be changed into +larger or smaller chunks. +.PP +The MC68000 library also contains a routine to handle the EM instruction \fBrck\fR. +The MC68020 has an instruction +.I cmp2 +that is specially meant for range checking so the MC68020 library can do without +that routine. +.PP +The MC68000 library has two multiplication routines, one for unsigned and the other +for signed multiplication. The one for signed multiplication +first tests the sizes of the operands, to see if it can perform +the 16 bit machine instruction instead of the routine. If not, it considers +it's two operands being two digit numbers in a 65535-radix system. It +uses the 16-bit unsigned multiply instruction +.I mulu +three times (it does not calculate the high order result), +and adds up the intermediary results the proper way. The signed +multiplication routine calculates the sign of the result, calculates +the result as it it were an unsigned multiplication, and +adjusts the sign of the result. Here testing +the operands for there sizes would be less simple, because the operands +are signeds; so that is not done here. +.PP +The MC68000 library also has two division routines. The routine for unsigned +division uses the popular algorithm, where the divisor is shifted out and +the quotient shifted in. The signed division routine calculates the sign of +both the quotient and the remainder, calls the unsigned division routine +and adjusts the signs for the quotient and the remainder. +.PP +The +.I .nop +routine is included for testing purposes. This routine prints the line +number and the value in the stack pointer. Calls to this routine +are generated by the EM instruction \fBnop\fR, which is ordinarily +left out by the peephole optimizer. +.NH +Testing the table +.PP +There are special test programs available for testing back end tables. +First there is the EM test set, which tests most EM instructions, making +good use of the +.B nop +instruction. Then there are the Pascal and C test programs. The Pascal +test programs report errors, which makes it relatively easy +to find out what was wrong in the table. The C test programs just +generate some output, which then has to be compared to the expected +output. Differences are +not only caused by errors but also e.g. by the use of four +byte integers and unsigneds (which this table does), +the use of signed characters +instead of unsigned characters (the C front end I used generated signed +characters) or because the back end +does not support floating point. +These differences have to be `filtered out' to reveal +the differences caused by actual errors in the back end table. +These errors then have to be found out by examining the assembly code, for +no proper diagnostic messages are generated. +.PP +After these three basic tests there still remain a number of patterns that +haven't been tested yet. Fortunately +.I cgg +offers the possibility of generating a special +.I cg +that can print a list of patterns that haven't been used in +code generation yet. +For these patterns the table writer has to write his own test programs. +This may complicate things a bit because errors may now be caused by +errors in the back end table as well as errors in the test programs. +The latter happened quite often to me, because I found EM +to be an uncomfortable programming language (of course it isn't meant to +be a programming language, but an intermediary language). +.PP +There still remain a couple of patterns in this table that haven't been tested +yet. However these patterns all have very similar cases that have been +tested (an example of this is mentioned in the section on group 0 +of the patterns section of the table). Some patterns have to +do with floating point numbers. These EM instructions all generate +traps, so they didn't all have to be tested. The two instructions +.B dch +and +.B lpb +haven't been tested in this table, but since they only use EM replacement +and they have been tested in the PDP back end table, these two should +be all right. +.NH +Performance of the back end +.PP +To test the performance of the back end I gathered a couple of +C programs and compiled them on the machines I used to test the back ends on. +I compiled them using the C compiler that was available there and +I also compiled them using the back end. I then compared the sizes +of the text segments in the object files. +The final results of these comparisons are in fig. 1 and fig. 2. +.KF +.TS +center box; +cfI s s s s s +c s s s s s +c c | c s | c s +c c | c s | c s +c | c | c c | c c +l | n | n n | n n. +Differences in text segment sizes for the MC68000 +parts of the back end compiled by itself +_ +original old m68k4 new MC68000 +compiler (100%) back end back end +_ +name size size perc. size perc. +_ +codegen.c 13892 16224 116.7% 12860 92.5% +compute.c 4340 4502 103.7% 4530 104.3% +equiv.c 680 662 97.3% 598 87.9% +fillem.c 8016 7304 91.1% 6880 85.8% +gencode.c 1356 1194 88.0% 1130 83.3% +glosym.c 224 202 90.1% 190 84.8% +main.c 732 672 91.8% 634 86.6% +move.c 1876 1526 81.3% 1410 75.1% +nextem.c 1288 1594 123.7% 1192 92.5% +reg.c 1076 1014 94.2% 916 85.1% +regvar.c 1352 1188 87.8% 1150 85.0% +salloc.c 1240 1100 88.7% 1024 82.5% +state.c 628 600 95.5% 532 84.7% +subr.c 6948 6382 91.8% 5680 81.7% += +averages 2939 3155 95.8% 2766 86.6% +.TE +.DS C +fig 1. +.DE +.KE +.KF +.TS +center box; +cfI s s s +cfI s s s +c s s s +c s s s +c c | c s +c c | c s +c | c | c c +l | n | n n. +Differences in text segment sizes +for the MC68020 +parts of the back end +compiled by itself +_ +original MC68020 +compiler (100%) back end +_ +name size size perc. +_ +codegen.c 12608 12134 96.2% +compute.c 4624 4416 95.5% +equiv.c 572 504 88.1% +fillem.c 7780 6976 89.6% +gencode.c 1320 1086 82.2% +glosym.c 228 182 79.8% +main.c 736 596 80.9% +move.c 1392 1280 91.9% +nextem.c 1176 1066 90.6% +reg.c 1052 836 79.4% +regvar.c 1196 968 80.9% +salloc.c 1200 932 77.6% +state.c 580 528 91.0% +subr.c 6136 5268 85.8% += +averages 2900 2627 86.4% +.TE +.DS C +fig 2. +.DE +.KE +Fig. 1 also includes results of an old m68k4 back end (a back end +for the MC68000 with four byte word and pointersize). The table for +this back end was given to me as an example, but I thought it didn't make +good use of the MC68000's addressing capabilities, it hardly did any +optimalization, and it sometimes even +generated code that the assembler would not swallow. +This was sufficient reason for me to write a completely new table. +.PP +The results from the table may not be taken too seriously. The sizes measured +are the sizes of the text segments of the user programs, i.e. without the +inclusion of library routines. Of course these segments do contain calls +to these routines. Another thing is that the +.I rom +segment may be included in the text segment (this is why the +results for the MC68000 for +.I compute.c +look so bad). +.PP +Some other things must be said about these results. +The quality of EM code +generated by the C front end is certainly not optimal. The front end +uses temporary locals (extra locals that are used to evaluate expressions) +far too quickly: for a simple C expression like +.DS +.I +*(pointer) += constant +.R +.DE +where +.I pointer +is a register variable, the C front end generates (for obscure reasons) +a temporary local that holds the contents of \fIpointer\fR. This way +the pattern for +.DS +.B +loc lil adi sil $2==$4 && $3==4 +.R +.DE +for register variables is not used and longer, less efficient +code is generated. But even in spite of this, the back end seems to +generate rather compact code. +.NH +Some timing results +.PP +In order to measure the performance of the code generated by the back end +some timing tests were done. The reason I chose these particular tests is +that they were also done for many other back ends; the reader can compare +the results if he so wishes (of course comparing the results only +show a global difference in speed of the various machines; it doesn't +show whether some back end generates relatively better code than another). +.PP +On the MC68000 machine the statements were executed one million times. +On the MC68020 machine the statements had to be executed four million times +because this machine was so fast that timing results would be very +unreliable if the statements were executed only one million times. +.PP +For testing I used the following C test program: +.DS +.I +main() +{ + int i, j, ... + ... + for (i=0; i<1000; i++) + for (j=0; j<1000; j++) + STATEMENT; +} +.R +.DE +where +.I STATEMENT +is any of the test statements or the empty statement. For the MC68020 +tests I used 2000 instead of 1000. +The results of the test with the empty statement were used to calculate +the execution times of the other test statements. +.PP +Figures 3 and 4 show many results. For each machine actually two tests were +done: one with register variables, and the other without them. +I noticed that the original C compilers on both machines did not generate +the use of register variables, unless specifically requested. The +back end uses register variables when and where they are profitable, even +if the user did not ask for them. +.KF +.TS +center box; +cfI s s s s +c s s s s +c | c s | c s +cw(1.5i) | c c | c c +c | c c | c c +lp-2fI | n n | n n. +timing results for the MC68000 +times in @ mu @seconds +_ +test statement without register variables with register variables +_ + original new MC68000 original new MC68000 + C compiler back end C compiler back end +_ +int1=0; 2.8 2.7 0.5 0.5 +int1=int2-1; 4.1 4.1 1.3 1.3 +int1=int1+1; 4.1 4.1 1.3 1.3 +int1=int2*int3; 40.0 40.5 36.2 36.8 +T{ +int1=(int2<0); +\/*true*/ +T} 5.5 7.3 2.0 4.5 +T{ +int1=(int2<0); +\/*false*/ +T} 4.7 8.5 2.8 5.6 +T{ +int1=(int2<3); +\/*true*/ +T} 6.2 7.7 2.6 5.4 +T{ +int1=(int2<3); +\/*false*/ +T} 5.4 8.9 3.6 6.5 +T{ +.na +int1=((int2>3)||(int2<3)); +\/* true || false */ +T} 6.0 7.8 3.4 5.4 +T{ +.na +int1=((int2>3)||(int2<3)); +\/* false || true */ +T} 9.1 10.2 5.7 7.1 +T{ +.na +switch (int1) { +case 1: int1=0; break; +case 2: int1=1; break; +} +T} 6.3 17.8 5.3 14.0 +T{ +.na +if (int1=0) int2=3; +\/*true*/ +T} 5.1 4.7 1.3 1.3 +T{ +.na +if (int1=0) int2=3; +\/*false*/ +T} 2.2 2.1 1.9 1.1 +while (int1>0) int1=int1-1; 2.2 2.1 1.1 1.1 +int1=a[int2]; 6.8 6.7 4.0 3.1 +p3(int1); 14.3 11.1 13.4 10.0 +int1=f(int2); 17.7 14.5 14.8 11.7 +s.overhead=5400; 2.8 2.7 2.9 2.7 +.TE +.DS C +Fig. 3 +.DE +.KE +.KF +.TS +center box; +cfI s s s s +c s s s s +c | c s | c s +cw(1.5i) | c c | c c +c | c c | c c +lp-2fI | n n | n n. +timing results for the MC68020 +times in @ mu @seconds +_ +test statement without register variables with register variables +_ + original new MC68020 original new MC68020 + C compiler back end C compiler back end +_ +int1=0; .25 .25 .15 .15 +int1=int2-1; 1.3 1.3 .38 .38 +int1=int1+1; 1.2 .90 .38 .15 +int1=int2*int3; 4.4 4.2 3.0 3.1 +T{ +int1=(int2<0); +\/*true*/ +T} 1.6 2.7 1.1 2.3 +T{ +int1=(int2<0); +\/*false*/ +T} 1.9 2.9 .80 2.1 +T{ +int1=(int2<3); +\/*true*/ +T} 1.7 2.8 1.2 2.6 +T{ +int1=(int2<3); +\/*false*/ +T} 2.1 3.0 .85 2.3 +T{ +.na +int1=((int2>3)||(int2<3)); +\/* true || false */ +T} 2.1 3.1 1.2 2.5 +T{ +.na +int1=((int2>3)||(int2<3)); +\/* false || true */ +T} 3.4 4.2 1.8 3.2 +T{ +.na +switch (int1) { +case 1: int1=0; break; +case 2: int1=1; break; +} +T} 2.7 8.0 2.0 6.9 +T{ +.na +if (int1=0) int2=3; +\/*true*/ +T} 1.2 1.3 .63 .63 +T{ +.na +if (int1=0) int2=3; +\/*false*/ +T} 1.7 1.6 .50 .53 +while (int1>0) int1=int1-1; 1.2 1.3 .55 .53 +int1=a[int2]; 1.8 1.8 1.0 1.0 +p3(int1); 14.8 5.5 14.1 5.0 +int1=f(int2); 16.3 6.6 15.2 5.9 +s.overhead=5400; .48 .48 .50 .50 +.TE +.DS C +Fig. 4 +.DE +.KE +.PP +The reader may have noticed that on both machines the back end seems +to generate considerably slower code for tests where a `condition' is +used in the rhs of an assignment statement. This is in fact not true: it is +the front end that generates bad code. Two examples: for the C statement +.DS +.I +int1 = (int2 < 0); +.R +.DE +the front end generates the following code for the rhs (I +used arbitrary labels): +.DS +.B +lol -16 +zlt *10 +loc 0 +bra *11 +10 +loc 1 +11 +.R +.DE +while in this case (to my opinion) it should have generated +.DS +.B +lol -16 +tlt +.R +.DE +which is much shorter. Another example: for the C statement +.DS +.I +int1 = (int2 < 3); +.B +.DE +the front end generates for the rhs +.DS +.B +lol -16 +loc 3 +blt *10 +loc 0 +bra *11 +10 +loc 1 +11 +.R +.DE +while a much better translation would be +.DS +.B +lol -16 +loc 3 +cmi 4 +tlt +.R +.DE +.PP +Another statement that the back end seems to generate slower code for is +the C switch statement. This is true, but it is also caused by +the way these things are done in EM. EM uses the +.B csa +or +.B csb +instruction, and for these two I had to use library routines. On larger +switch statements the +.I .csa +routine will perform relatively better. +.PP +The back end generates considerably faster code for procedure and function +calls, especially in the MC68020 case, and also for the C statement +.DS +.I +int1 = int1 + 1; +.R +.DE +The original C compilers use the same method for this instruction +as for +.DS +.I +int1 = int2 - 1; +.R +.DE +they perform the addition in a scratch register, and then store the +result. For the former C statement this is not necessary, because +the MC68000 and MC68020 have an instruction that can add constants +to almost anything (in this case: to locals). The MC68000 and MC68020 +back ends do use this instruction. +.NH +Some final remarks +.PP +As mentioned a few times before, the C front end compiler does not +generate optimal code and as a consequence of this the +back end does not always generate optimal code. This is especially +the case with temporary locals, which the front end generates much +too quickly, and also with conditional expressions that are +used in the rhs of an assignment statement (fortunately this is not +needed so much). +.PP +If +.I cgg +would have been able to accept operands separated by any character +instead of just by commas (in the instruction definitions part), +I wouldn't have had the need of the +.I killreg +pseudo instruction. It would also be handy to have +.I cgg +accept all normal C operators. At the moment +.I cgg +does not accept binary ands, ors and exors, even though in [4] +it is stated that +.I cgg +does accept all normal C operators. As it happens I did not need the +binary operators, but at some time in developing the table I thought +I did. +.PP +I would also like +.I cg +to do more with the condition codes information that is supplied with +each instruction in the instruction definitions section of the table. +Sometimes +.I cg +generates test instructions which actually were not necessary. This +of course causes the generated +programs to be slightly larger and slightly slower. +.PP +In spite of the few minor shortcomings mentioned above I found +.I cgg +a very comfortable tool to use. +.SH +References +.PP +.IP [1] +T. B. Steel Jr., +.I +UNCOL: The myth and the Fact, +.R +in Ann. Rev. Auto. Prog., +R. Goodman (ed.), Vol. 2 (1969), pp 325 - 344 +.IP [2] +A. S. Tanenbaum, H. van Staveren, E. G. Keizer, J. W. Stevenson, +.I +A practical toolkit for making portable compilers, +.R +Informatica Report 74, Vrije Universiteit, Amsterdam, 1983 +.IP [3] +A. S. Tanenbaum, H. van Staveren, E. G. Keizer, J. W. Stevenson, +.I +Description of an experimental machine architecture for use with +block structured languages, +.R +Informatica Report 81, Vrije Universiteit, Amsterdam, 1983 +.IP [4] +H. van Staveren +.I +The table driven code generator from the Amsterdam Compiler Kit, +Second Revised Edition, +.R +Vrije Universiteit, Amsterdam +.IP [5] +.I +MC68020 32-bit Microprocessor User's Manual, +.R +Second Edition, +Motorola Inc., 1985, 1984 +.IP [6] +.I +MC68000 16-bit Microprocessor User's Manual, +Preliminary, +.R +Motorola Inc., 1979 diff --git a/doc/ncg.doc b/doc/ncg.doc new file mode 100644 index 0000000..bd3edfe --- /dev/null +++ b/doc/ncg.doc @@ -0,0 +1,3020 @@ +.\" $Id: ncg.doc,v 1.11 1994/06/24 10:02:07 ceriel Exp $ +.RP +.ND +.TL +The table driven code generator +.br +from the +.br +Amsterdam Compiler Kit +.br +Second Revised Edition +.AU +Hans van Staveren +.AI +Dept. of Mathematics and Computer Science +Vrije Universiteit +Amsterdam, The Netherlands +.AB +The Amsterdam Compiler Kit is a collection of tools +designed to help automate the process of compiler building. +Part of it is a table driven code generator, +called +.I cg , +and a program to check and translate machine description +tables called +.I cgg . +This document provides a description of the internal workings of +.I cg , +and a description of syntax and semantics of the driving table. +This is required reading for those wishing to write a new table. +.AE +.NH 1 +Introduction +.PP +Part of the Amsterdam Compiler Kit is a code generator system consisting +of a code generator generator (\fIcgg\fP for short) and some machine +independent C code. +.I Cgg +reads a machine description table and creates two files, +tables.h and tables.c. +These are then used together with other C code to produce +a code generator for the machine at hand. +.PP +This in turn reads compact EM code and produces +assembly code. +The remainder of this document will first broadly describe +the working of the code generator, +then the machine table will be described after which +some light is shed onto +the internal workings of the code generator. +.PP +The reader is assumed to have at least a vague notion about the +semantics of the intermediary EM code. +Someone wishing to write a table for a new machine +should be thoroughly acquainted with EM code +and the assembly code of the machine at hand. +.NH 1 +What has changed since version 1 ? +.PP +This section can be skipped by anyone not familiar with the first version. +It is not needed to understand the current version. +.PP +This paper describes the second version of the code generator system. +Although the code generator itself is for the main part unchanged, +the table format has been drastically redesigned and the opportunities +to make faulty tables are reduced. +The format is now aesthaticly more pleasing (according to \fIme\fP that is), +mainly because the previous version was designed for one line code rules, +which did not work out that way. +.PP +The `SCRATCH' property is now automatically generated by +.I cgg , +.I erase +and +.I setcc +calls and their ilk are now no longer needed +(read: can no longer be forgotten) +and all this because the table now +.I knows +what the machine instructions look like and what arguments they +destroy. +.PP +Checks are now made for register types, so it is no longer possible +to generate a `regind2' token with a floating point register as a base. +In general, if the instructions of the machine are correctly defined, +it is no longer possible to generate code that does not assemble, +which of course does not mean that it is not possible to generate +assembly code that does not do what was intended! +.PP +Checks are made now for missing moves, tests, coercions, etc. +There is a form of procedure call now to reduce table size: +it is no longer necessary to write the code for conditional +instructions six times. +.PP +The inreg() pseudo-function returns other results!! +.NH 1 +Global overview of the workings of the code generator. +.PP +The code generator or +.I cg +tries to generate good code by simulating the stack +of the compiled program and delaying emission of code as long +as possible. +It also keeps track of register contents, which enables it to +eliminate redundant moves, and tries to eliminate redundant tests +by keeping information about condition code status, +if applicable for the machine. +.PP +.I Cg +maintains a `fake stack' containing `tokens' that are built +by executing the pseudo code contained in the code rules given +by the table writer. +One can think of the fake stack as a logical extension of the real +stack the compiled program will have when run. +Alternatively one can think of the real stack as an infinite extension +at the bottom of the fake stack. +Both ways, the concatenation of the real stack and the fake stack +will be the stack as it would have been on a real EM machine (see figure). +.TS +center; +cw(3.5c) cw(3c) cw(3.5c) +cw(3.5c) cw(3c) cw(3.5c) +|cw(3.5c)| cw(3c) |cw(3.5c)| . +EM machine target machine + + + + + + real stack + stack + grows +EM stack \s+2\(br\s0 + \s+2\(br\s0 + \s+2\(br\s0 _ + \s+2\(br\s0 + \s+2\(da\s0 + fake stack + + + +_ _ +.T& +ci s s. +Relation between EM stack, real stack and fake stack. +.TE +During code generation tokens will be kept on the fake stack as long +as possible but when they are moved to the real stack, +by generating code for the push, +all tokens above\v'-.25m'\(dg\v'.25m' +.FS +\(dg in this document the stack is assumed to grow downwards, +although the top of the stack will mean the first element that will +be popped. +.FE +the pushed tokens will be pushed also, +so the fake stack will not contain holes. +.PP +The information about the machine that +.I cg +needs has to be given in a machine description table, +with as a major part a list of code rules telling +.I cg +what to do when certain EM-instructions occur +with certain tokens on the fake stack. +Not all possible fake stack possibilities have to be given of course, +there is a possibility for providing rewriting rules, or +.I coercions +as they are called in this document. +.PP +The main loop of +.I cg +is: +.IP 1) +find a pattern of EM instructions starting at the current one to +generate code for. +This pattern will usually be of length one but longer patterns can be used. +Process any pseudo-instructions found. +.IP 2) +Select one of the possibly many stack patterns that go with this +EM pattern on the basis of heuristics, look ahead or both. +The cost fields provided in the token definitions and +instruction definitions are used +to compute costs during look ahead. +.IP 3) +Force the current fake stack contents to match the pattern. +This may involve +copying tokens to registers, making dummy transformations, e.g. to +transform a `local' into an `indexed from register' or might even +cause the move of the complete fake stack contents to the real stack +and then back into registers if no suitable coercions +were provided by the table writer. +.IP 4) +Execute the pseudocode associated with the code rule just selected, +this may cause registers to be allocated, +code to be emitted etc.. +.IP 5) +Put tokens onto the fake stack to reflect the result of the operation. +.IP 6) +Insert some EM instructions into the stream; +this is possible but not common. +.IP 7) +Account for the cost. +The cost is kept in a (space, time) vector and look ahead decisions +are based on a linear combination of these. +The code generator calls on itself recursively during look ahead, +and the recursive incarnations return the costs they made. +The costs the top-level code generator makes is of course irrelevant. +.PP +The table that drives +.I cg +is not read in every time, +but instead is used at compile time +of +.I cg +to set parameters and to load pseudocode tables. +A program called +.I cgg +reads the table and produces large lists of numbers that are +compiled together with machine independent code to produce +a code generator for the machine at hand. +.PP +Part of the information needed is not easily expressed in this table +format and must be supplied in two separate files, +mach.h and mach.c. +Their contents are described later in this document. +.NH 1 +Register variables +.PP +If the machine has more than enough registers to generate code with, +it is possible to reserve some of them for use as register variables. +If it has not, this section may be skipped and any references +to register variables in the rest of this document may be ignored. +.PP +The front ends generate messages to the back ends telling them which +local variables could go into registers. +The information given is the offset of the local, its size and type +and a scoring number, roughly the number of times it occurs. +.PP +The decision which variable to put in which register is taken by the +machine independent part of +.I cg +with the help of a scoring function provided by the table writer in mach.c. +The types of variables known are +.IP reg_any 12 +Just a variable of some integer type. +Nothing special known about it. +.IP reg_float +A floating point variable. +.IP reg_loop +A loop control variable. +.IP reg_pointer +A pointer variable. +Usually they are better candidates to put in registers. +.PP +If register variables are used, +more functions must be supplied in mach.c. +These functions are explained later. +.NH 1 +Description of the machine table +.PP +The machine description table consists of the +concatenation of the following sections: +.IP 1) +Constant definitions +.IP 2) +Property definitions +.IP 3) +Register definitions +.IP 4) +Token definitions +.IP 5) +Set definitions +.IP 6) +Instruction definitions +.IP 7) +Move definitions +.IP 8) +Test definitions +.IP 9) +Stack definitions +.IP 10) +Coercions +.IP 11) +Code rules +.PP +This is the order in the table +but the descriptions in this document will use a slightly different +order. +All sections except the first start with an uppercase header word. +Examples may be given in early stages that use knowledge that is explained +in a later stage. +If something is not clear the first time, please read on. +All will clear up in a couple of pages. +.PP +Input is in free format, white space and newlines may be used +at will to improve legibility. +Identifiers used in the table have the same syntax as C identifiers, +upper and lower case considered different, all characters significant. +Here is a list of reserved words; all of these are unavailable as identifiers. +.TS +box; +l l l l l. +ADDR STACKINGRULES gen proc test +COERCIONS TESTS highw reg_any to +INSTRUCTIONS TIMEFACTOR inreg reg_float topeltsize +INT TOKENS is_rom reg_loop ufit +MOVES call kills reg_pointer uses +PATTERNS cost lab regvar with +PROPERTIES defined labeldef return yields +REGISTERS exact leaving reusing +SETS example loww rom +SIZEFACTOR fallthrough move samesign +STACK from pat sfit +.TE +C style comments are accepted. +.DS +/* this is a comment */ +.DE +If the standard constant facility is not enough the C-preprocessor can +be used to enhance the table format. +.PP +Integers in the table have the normal C-style syntax. +Decimal by default, octal when preceded by a 0 +and hexadecimal when preceded by 0x. +.NH 2 +Constant section +.PP +In the first part of the table some constants can be defined, +most with the syntax +.DS +NAME=value +.DE +value being an integer or string. +Three constants must be defined here: +.IP EM_WSIZE 14 +Number of bytes in a machine word. +This is the number of bytes +a \fBloc\fP instruction will put on the stack. +.IP EM_PSIZE +Number of bytes in a pointer. +This is the number of bytes +a \fBlal\fP instruction will put on the stack. +.IP EM_BSIZE +Number of bytes in the hole between AB and LB. +If the calling sequence just saves PC and LB this +size will be twice the pointersize. +.PP +EM_WSIZE and EM_PSIZE are checked when a program is compiled +with the resulting code generator. +EM_BSIZE is used by +.I cg +to add to the offset of instructions dealing with locals +having positive offsets, +i.e. parameters. +.PP +Other constants can be defined here to be used as mnemonics +later in the table. +.PP +Optional is the definition of a printformat for integers in the code file. +This is given as +.DS +FORMAT = string +.DE +The string must be a valid printf(III) format, +and defaults to "%ld". +For example on the PDP-11 one can use +.DS +FORMAT= "0%lo" +.DE +to satisfy the old UNIX assembler that reads octal unless followed by +a period, and the ACK assembler that follows C conventions. +.PP +Tables under control of source code control systems like +.I sccs +or +.I rcs +can put their id-string here, for example +.DS +rcsid="$\&Header$" +.DE +These strings, like all strings in the table, will eventually +end up in the binary code generator produced. +.PP +Optionally one can give the factors with which the size and time +parts of the cost vector have to be multiplied to ensure they have the +same order of magnitude. +This can be done as +.DS +SIZEFACTOR = C\d3\u/C\d4\u +.sp +TIMEFACTOR = C\d1\u/C\d2\u +.DE +Above numbers must be read as rational numbers. +Defaults are 1/1 for both of them. +These constants set the default size/time tradeoff in the code generator, +so if TIMEFACTOR and SIZEFACTOR are both 1 the code generator will choose +at random between two code sequences where one has +cost (10,4) and the other has cost (8,6). +See also the description of the cost field below. +.NH 2 +Property definition +.PP +This part of the table defines the list of properties that can be used +to differentiate between register classes. +It consists of a list of user-defined +identifiers optionally followed by the size +of the property in parentheses, default EM_WSIZE. +Example for the PDP-11: +.TS +l l. +PROPERTIES /* The header word for this section */ + +GENREG /* All PDP registers */ +REG /* Normal registers (allocatable) */ +ODDREG /* All odd registers (allocatable) */ +REGPAIR(4) /* Register pairs for division */ +FLTREG(4) /* Floating point registers */ +DBLREG(8) /* Same, double precision */ +GENFREG(4) /* generic floating point */ +GENDREG(8) /* Same, double precision */ +FLTREGPAIR(8) /* register pair for modf */ +DBLREGPAIR(16) /* Same, double precision */ +LOCALBASE /* Guess what */ +STACKPOINTER +PROGRAMCOUNTER +.TE +Registers are allocated by asking for a property, +so if for some reason in later parts of the table +one particular register must be allocated it +has to have a unique property. +.NH 2 +Register definition +.PP +The next part of the tables describes the various registers of the +machine and defines identifiers +to be used in later parts of the tables. +Syntax: +.DS + : REGISTERS + : ':' '.' + : ident [ '(' string ')' ] [ '=' ident [ '+' ident ] ] +.DE +Example for the PDP-11: +.TS +l l. +REGISTERS + +r0,r2,r4 : GENREG,REG. +r1,r3 : GENREG,REG,ODDREG. +r01("r0")=r0+r1 : REGPAIR. +fr0("r0"),fr1("r1"),fr2("r2"),fr3("r3") : GENFREG,FLTREG. +dr0("r0")=fr0,dr1("r1")=fr1, + dr2("r2")=fr2,dr3("r3")=fr3 : GENDREG,DBLREG. +fr01("r0")=fr0+fr1,fr23("r2")=fr2+fr3 : FLTREGPAIR. +dr01("r0")=dr0+dr1,dr23("r2")=dr2+dr3 : DBLREGPAIR. +lb("r5") : GENREG,LOCALBASE. +sp : GENREG,STACKPOINTER. +pc : GENREG,PROGRAMCOUNTER. +.TE +.PP +The names in the left hand lists are names of registers as used +in the table. +They can optionally be followed by a string in parentheses, +their name as far as the assembler is concerned. +The default assembler name is the same as the table name. +A name can also be followed by +.DS += othername +.DE +or +.DS += othername + othername +.DE +which says that the register is composed of the parts +after the '=' sign. +The identifiers at the right hand side of the lists are +names of properties. +The end of each register definition is a period. +.PP +It might seem wise to list every property of a register, +so one might give r0 the extra property MFPTREG named after the not +too well known MFPT instruction on newer PDP-11 types, +but this is not a good idea, +especially since no use can be made of that instruction anyway. +Every extra property means the register set is more unorthogonal +and +.I cg +execution time is influenced by that, +because it has to take into account a larger set of registers +that are not equivalent. +So try to keep the number of different register classes to a minimum. +When faced with the choice between two possible code rules +for a nonfrequent EM sequence, +one being elegant but requiring an extra property, +and the other less elegant, +elegance should probably loose. +.PP +Tables that implement register variables must mark registers to be used +for variable storage here by following the list of properties by one +of the following: +.DS +regvar \fIor\fP regvar(reg_any) +regvar(reg_loop) +regvar(reg_pointer) +regvar(reg_float) +.DE +meaning they are candidates for that type of variable. +All register variables of one type must be of the same size, +and they may have no subregisters. +Such registers are not available for normal code generation. +.NH 2 +Stack token definition +.PP +The next part describes all possible tokens that can reside on +the fake stack during code generation. +Attributes of a token are described as a C struct declaration; +this is followed by the size of the token in bytes, +optionally followed by the cost of the token when used as an addressing mode +and the format to be used on output. +.PP +In general, when writing a table, it is not wise to try +to think of all necessary tokens in advance. +While writing the necessity or advisability for some token +will be seen and it can then be added together with the +stacking rules and coercions needed. +.PP +Tokens should usually be declared for every addressing mode +of the machine at hand and for every size directly usable in +a machine instruction. +Example for the PDP-11 (incomplete): +.TS +l l. +TOKENS + +const2 = { INT num; } 2 cost(2,300) "$" num . +addr_local = { INT ind; } 2 . +addr_external = { ADDR off; } 2 "$" off. + +regdef2 = { GENREG reg; } 2 "*" reg. +regind2 = { GENREG reg; ADDR off; } 2 off "(" reg ")" . +reginddef2 = { GENREG reg; ADDR off; } 2 "*" off "(" reg ")" . +regconst2 = { GENREG reg; ADDR off; } 2 . +relative2 = { ADDR off; } 2 off . +reldef2 = { ADDR off; } 2 "*" off. +.TE +.PP +Types allowed in the struct are ADDR, INT and all register properties. +The type ADDR means a string and an integer, +which is output as string+integer, +and arithmetic on mixed ADDR and INT is possible. +This is the right mode for anything that can be an +assembler address expression. +The type of the register in the token is strict. +At any assignment of an expression of type register to a token attribute +of type register +.I cgg +will check if the set of possible results from the expression is a subset +of the set of permissible values for the token attribute. +.PP +The cost-field is made up by the word +.I cost +followed by two numbers in parentheses, the size and timecosts +of this token when output in the code file. +If omitted, zero cost is assumed. +While generating code, +.I cg +keeps track of a linear combination of these costs together +with the costs of the instructions itself which we will see later. +The coefficients of this linear combination are influenced +by two things: +.IP 1) +The SIZEFACTOR and TIMEFACTOR constants, +as mentioned above. +.IP 2) +A run time option to +.I cg +that can adjust the time/space tradeoff to all positions +from 100% time to 100% space. +.LP +By supplying different code rules in certain situations +it is possible to get a code generator that can adjust its +code to the need of the moment. +This is probably most useful with small machines, +experience has shown that on the larger micro's and mini's +the difference between time-optimal and space-optimal code +is often small. +.PP +The printformat consists of a list of strings intermixed with +attributes from the token. +Strings are output literally, attributes are printed according +to their type and value. +Tokens without a printformat should never be output, +and +.I cgg +checks for this. +.PP +Notice that tokens need not correspond to addressing modes; +the regconst2 token listed above, +meaning the sum of the contents of the register and the constant, +has no corresponding addressing mode on the PDP-11, +but is included so that a sequence of add constant, load indirect, +can be handled efficiently. +This regconst2 token is needed as part of the path +.DS +REG -> regconst2 -> regind2 +.DE +of which the first and the last "exist" and the middle is needed +only as an intermediate step. +.PP +Tokens with name `LOCAL' or `DLOCAL' are a special case when +register variables are used, this is explained further in the +section on token descriptions. +.NH 2 +Sets +.PP +Usually machines have certain collections of addressing modes that +can be used with certain instructions. +The stack patterns in the table are lists of these collections +and since it is cumbersome to write out these long lists +every time, there is a section here to give names to these +collections. +Please note that it is not forbidden to write out a set +in the remainder of the table, +but for clarity it is usually better not to. +.LP +Example for the PDP-11 (incomplete): +.TS +l l. +SETS + +src2 = GENREG + regdef2 + regind2 + reginddef2 + relative2 + + \h'\w'= 'u'reldef2 + addr_external + const2 + LOCAL + ILOCAL + + \h'\w'= 'u'autodec + autoinc . +dst2 = src2 - ( const2 + addr_external ) . +xsrc2 = src2 + ftoint . +src1 = regdef1 + regind1 + reginddef1 + relative1 + reldef1 . +dst1 = src1 . +src1or2 = src1 + src2 . +src4 = relative4 + regdef4 + DLOCAL + regind4 . +dst4 = src4 . +.TE +Permissible in the set construction are all the usual set operators, i.e. +.IP + +set union +.IP - +set difference +.IP * +set intersection +.PP +Normal operator priorities apply, and parentheses can be +used. +Every token identifier is also a set identifier +denoting the singleton collection of tokens containing +just itself. +Every register property as defined above is also a set +matching all registers with that property. +The standard set identifier ALL denotes the collection of +all tokens. +.NH 2 +Instruction definitions +.PP +In the next part of the table the instructions for the machine +are declared together with information about their operands. +Example for the PDP-11(very incomplete): +.DS +.ta 8 16 24 32 40 48 56 64 +INSTRUCTIONS +/* default cost */ + +cost(2,600) + +/* Normal instructions */ + +adc dst2:rw:cc . +add src2:ro,dst2:rw:cc cost(2,450). +ash src2:ro,REG:rw:cc . +ashc src2:ro,REGPAIR+ODDREG:rw . +asl dst2:rw:cc . +asr dst2:rw:cc . +bhis "bcc" label . + +/* floating point instructions */ + +movf "ldf" fsrc,freg . +movf "stf" freg,fdst . +.DE +As the examples show an instruction definition consists of the name +of the instruction, +optionally followed by an assembler mnemonic in +quotes-default is the name itself-and then +a list of operands, +optionally followed by the cost and then a period. +If the cost is omitted the cost just after the word +INSTRUCTIONS is assumed, +if that is also omitted the cost is zero. +The cost must be known by +.I cg +of course if it has multiple +code generation paths to choose from. +.PP +For each operand we have the set of possible token values, +followed by a qualifier that can be +.IP :ro +signifies that this operand is read only, +so it can be replaced by a register with the same contents +if available. +.IP :rw +signifies that the operand is read-write +.IP :wo +signifies that the operand is write only. +.IP :cc +says that after the instruction is finished, the condition codes +are set to this operand. +If none of the operands have the :cc qualifier set, +.I cg +will assume that condition codes were unaffected +(but see below). +.PP +The first three qualifiers are of course mutually exclusive. +The :ro qualifier does not cause any special action in the current +implementation, and the :wo and :rw qualifiers are treated equal. +It must be recommended however to be precise in the specifications, +since later enhancements to the code generator might use them. +.PP +As the last examples show it is not necessary to give one definition +for an instruction. +There are machines that have very unorthogonal instruction sets, +in fact most of them do, +and it is possible to declare each possible combination +of operands. +The +.I cgg +program will check all uses of the instruction to find out which +one was meant. +.PP +Although not in the PDP-11 example above there is a possibility +to describe instructions that have side effects to registers not +in the operand list. +The only thing possible is to say that the instruction is destructive +to some registers or the condition codes, by following the operand list +with the word +.I kills +and a list of the things destroyed. +Example for some hypothetic accumulator machine: +.DS +add source2:ro kills ACCU :cc . +.DE +.PP +The cost fields in the definitions for tokens and instructions +are added together when generating code. +It depends on the machine at hand whether the costs are orthogonal +enough to make use of both these costs, +in extreme cases every combination of instructions and operands +can be given in this section, +all with their own costs. +.NH 2 +Expressions +.PP +Throughout the rest of the table expressions can be used in some +places. +This section will give the syntax and semantics of expressions. +There are four types of expressions: integer, address, register and undefined. +Really the type register is nonexistent as such, +for each register expression +.I cgg +keeps a set of possible values, +and this set can be seen as the real type. +.PP +Type checking is performed by +.I cgg . +An operator with at least one undefined operand returns undefined except +for the defined() function mentioned below. +An undefined expression is interpreted as FALSE when it is needed +as a truth value. +It is the responsibility of the table writer to ensure no undefined +expressions are ever used as initialisers for token attributes. +This is unfortunately almost impossible to check for +.I cgg +so be careful. +.LP +Basic terms in an expression are +.IP number 16 +A number is a constant of type integer. +Also usable is an identifier defined to a number in the constant +definition section. +.IP """string""" +A string within double quotes is a constant of type address. +All the normal C style escapes may be used within the string. +Also usable is an identifier defined to a string in the constant +definition section. +.IP [0-9][bf] +This must be read as a grep-pattern. +It evaluates to a string that is the label name for the +temporary label meant. +More about this in the section on code rules. +.IP REGIDENT +The name of a register is a constant of type register. +.IP $\fIi\fP +A dollarsign followed by a number is the representation of the argument +of EM instruction \fI\fP. +The type of the operand is dependent on the instruction, +sometimes it is integer, +sometimes it is address. +It is undefined when the instruction has no operand. +Instructions with type-letter w can occur without an operand. +This can be checked in the code rule with the defined() pseudo function. +.br +If it is unimaginable for the operand of the instruction ever to be +something different from a plain integer, the type is integer, +otherwise it is address. +.br +Those who want to know it exactly, the integer instruction types +are the instructions marked with the +type-letters c,f,l,n,o,s,r,w,z in the EM manual. +.br +.I Cg +makes all necessary conversions, +like adding EM_BSIZE to positive arguments of instructions +dealing with locals, +prepending underlines to global names, +converting code labels into a unique representation etc. +Details about this can be found in the section about +machine dependent C code. +.IP %1 +This in general means the token mentioned first in the +stack pattern. +When used inside an expression the token must be a simple register. +Type of this is register. +.IP %1.off +This means attribute "off" of the first stack pattern token. +Type is the same as that of attribute "off". +To use this expression implies a check that all tokens +in the set used have the same attribute in the same place. +.IP %off +This means attribute "off" in the `current' token. +This can only be used when no confusion is possible about which token +was meant, eg. in the optional boolean expressions following token sets +in the move and test rules, in coercions or in the kills section inside +the code rules. +Same check as above. +.IP %1.1 +This is the first subregister of the first token. +Previous comments apply. +.IP %b +A percent sign followed by a lowercase letter +stands for an allocated register. +This is the second allocated register. +.IP %a.2 +The second subregister of the first allocated register. +.PP +All normal C operators apply to integers, +the + operator on addresses behaves as one would expect +and the only operators allowed on register expressions +are == and != . +Furthermore there are some special `functions': +.IP defined(e) 16 +Returns 1 if expression +.I e +is defined, 0 otherwise. +.IP samesign(e1,e2) +Returns 1 if integer expression +.I e1 +and +.I e2 +have the same sign. +.IP sfit(e1,e2) +Returns 1 if integer expression +.I e1 +fits as a signed integer +into a field of +.I e2 +bits, 0 otherwise. +.IP ufit(e1,e2) +Same as above but now for unsigned +.I e1 . +.IP rom($a,n) +Integer expression giving word +.I n +from the \fBrom\fP descriptor +pointed at by EM instruction +number +.I a +in the EM-pattern. +Undefined if that descriptor does not exist. +.IP is_rom($a) +Integer expression indicating whether EM instruction number +.I a +in the EM-pattern refers to ROM. This may be useful for generating +position-independent code with the ROM in read-only memory. +.I Is_rom +enables one to see the difference between ROM references and other data +references. +.IP loww($a) +Returns the lower half of the argument of EM instruction number +.I a . +This is used to split the arguments of a \fBldc\fP instruction. +.IP highw($a) +Same for upper half. +.LP +The next two `functions' are only needed in a table that +implements register variables. +.IP inreg(e) 16 +Returns the status of the local variable with offset +.I e +from the localbase. +Value is an integer, +negative if the local was not allowed as a register +variable, +zero if it was allowed but not assigned to a register, +and the type of the register if it was assigned to a register. +This makes it possible to write +.DS +inreg($1)==reg_pointer +.DE +and similar things. +.IP regvar(e,t) +Type of this is register. +It returns the register the local with offset +.I e +is assigned to. +The table writer guarantees the register is one of type +.I t , +with +.I t +one of reg_any, reg_loop, reg_pointer or reg_float. +If +.I t +is omitted reg_any is assumed. +Undefined if inreg(\fIe\fP)<=0 . +.LP +The next two `functions' are only needed in a table that +uses the top element size information. +.IP topeltsize($a) 16 +Returns the size of the element on top of the EM-stack at the label +identified by $a. This can be used to put the top of the stack in a +register at the moment of an unconditional jump. At an unconditional jump, +the size of the top-element will always look 0. +.IP fallthrough($a) +Returns 1 if the label identified by $a can be reached via fallthrough, 0 +otherwise. +.NH 2 +Token descriptions +.PP +Throughout the rest of the table tokens must be described, +be it as operands of instructions or as stack-replacements. +In all those cases we will speak about a token description. +The possibilities for these will be described here. +.PP +All expressions of type register are token descriptions. +The construct %1 means the token matched first in the stack pattern. +All other token descriptions are those that are built on the spot. +They look like this: +.DS +{ , } +.DE +All expressions are type-checked by +.I cgg , +and the number of initializers is also checked. +.PP +A special case of the last token descriptions occurs when +the token name is `LOCAL' or `DLOCAL' and the table uses register +variables. The first token attribute then must be of type integer and +the token description is automagically replaced by the register chosen +if the LOCAL (wordsize) or DLOCAL (twice the wordsize) was assigned +to a register. +.NH 2 +Code rules +.PP +The largest section of the tables consists of the code generation rules. +They specify EM patterns, stack patterns, code to be generated etc. +Broadly the syntax is +.DS L +code rule : EM-part code-part +EM-part : EM-pattern | procedure-heading +code-part : code-description | procedure-call +code-description : stackpattern kills allocates generates yields leaving +.DE +Ignoring the "procedure"-part for now, the description for the EM-pattern +and the code-description follows. +Almost everything here is optional, the minimum code rule +is: +.DS +pat nop +.DE +that will simply throw away +.I nop +instructions. +.NH 3 +The EM pattern +.PP +The EM pattern consists of a list of EM mnemonics +preceded by the word +.I pat +optionally followed by a boolean expression. +Examples: +.DS +pat \fBloe\fP +.DE +will match a single \fBloe\fP instruction, +.DS +pat \fBloc\fP \fBloc\fP \fBcif\fP $1==2 && $2==8 +.DE +is a pattern that will match +.DS +\fBloc\fP 2 +\fBloc\fP 8 +\fBcif\fP +.DE +and +.DS +pat \fBlol\fP \fBinc\fP \fBstl\fP $1==$3 +.DE +will match for example +.DS +.ta 10m 20m 30m 40m 50m 60m +\fBlol\fP 6 \fBlol\fP -2 \fBlol\fP 4 +\fBinc\fP \fBinc\fP but \fInot\fP \fBinc\fP +\fBstl\fP 6 \fBstl\fP -2 \fBstl\fP -4 +.DE +A missing boolean expression evaluates to TRUE. +.PP +The code generator will match the longest EM pattern on every occasion, +if two patterns of the same length match the first in the table will be chosen, +while all patterns of length greater than or equal to three are considered +to be of the same length. +This rule of three is an unfortunate implementation dependent restriction, +but patterns longer than three EM instructions are luckily not needed +too often. +.PP +The EM mnemonic may also be the pseudo-instruction \fBlab\fP, which matches +a label. Its argument can be used in testing on topeltsize and +fallthrough. When this pattern is specified, the label should be defined +explicitly with a +.I labeldef +statement. +.PP +Following the EM-pattern there may be more than one code +rule, +.I cg +will choose using heuristics and the cost +information provided with the instruction and token +definitions. +Owing to parsing reasons of the table, the word +.I with +(see below) +is mandatory when there are more code rules attached to one +EM-pattern. +The stack pattern may be empty however. +.NH 3 +The stack pattern +.PP +The optional stack pattern is a list of token sets preceded by the word +.I with . +The token sets are usually represented by set identifiers for clarity. +No boolean expression is allowed here. +The first expression is the one that matches the top of the stack. +.PP +If the pattern is followed by the word STACK +it only matches if there is nothing +else on the fake stack, +and the code generator will stack everything not matched at the start +of the rule. +.PP +The pattern can be preceded with the word +.I exact +following the +.I with +that tells the code generator not to try to coerce to the pattern +but only to use it when it is already present on the fake stack. +There are two reasons for this construction, +correctness and speed. +It is needed for correctness when the pattern contains a register +that is not transparent when data is moved through it. +.LP +Example: on the PDP-11 the shortest code for +.DS +\fBlae\fP a +\fBloi\fP 8 +\fBlae\fP b +\fBsti\fP 8 +.DE +is +.DS +movf _a,fr0 +movf fr0,_b +.DE +if the floating point processor is in double +precision mode and fr0 is free. +Unfortunately this is not correct since a trap can occur on certain +kinds of data. +This could happen if there was a stack pattern for \fBsti\fP\ 8 +like this: +.DS +with DBLREG +.DE +The code generator would then find that coercing the 8-byte global _a +to a floating point register and then storing it to _b was the cheapest, +if the space/time knob was turned far enough to space. +This can be prevented by changing the stack pattern to +.DS +with exact DBLREG +.DE +It is unfortunate that the type information is no longer present, +since if _a really is a floating point number the move could be +made without error. +.PP +The second reason for the +.I exact +construct is speed. +When the code generator has a long list of possible stack patterns +for one EM pattern it can waste much time trying to find coercions +to all of them, while the mere presence of such a long list +indicates that the table writer has given many special cases. +Prepending all the special cases by +.I exact +will stop the code generator from trying to find things +that either cannot be done, +or are too expensive anyway. +.PP +So in general it is wise to prepend all stack patterns that +cannot be made by coercions with +.I exact . +.PP +Using both +.I exact +and STACK in the stack pattern has the effect that the rule will +only be taken if there is nothing else on the fake stack. +.NH 3 +The kills part +.PP +The optional kills part describes certain tokens +that should neither remain on +the fake stack, nor remembered as contents of registers. +This is usually only required with store operations. +The entire fake stack, except for the part matched in the stack pattern, +is searched for tokens matching the expression and they are copied +to the real stack. +Every register that contains the token is marked as empty. +.PP +Syntax is +.DS +kills +thing to kill : token set optionally followed by boolean expression +.DE +Example: +.DS +kills regind2 %reg != lb || %off == $1 +.DE +is a kills part used for example in the \fBinl\fP or \fBstl\fP code rule. +It removes all register offsetted tokens where the register is not the +localbase plus the local in which the store is done. +The necessity for this can be seen from the following example: +.DS +\fBlol\fP 4 +\fBinl\fP 4 +\fBstl\fP 6 +.DE +Without a proper kills part in the rule for \fBinl\fP code would +be generated as here +.DS +inc 4(r5) +mov 4(r5),6(r5) +.DE +so local 6 would be given the new value of local 4 instead of the old +as the EM code prescribed. +.PP +When generating code for an EM-instruction like +.B sti +it is necessary to write a line in the table like +.DS +kills all_except_constant_or_register +.DE +where the long identifier is a set containing all tokens +that can be the destination of some random indirect store. +These indirect stores are the main reason to prevent this +.I kills +line to be deduced automatically by +.I cgg . +.PP +When generating something like a branch instruction it +might be needed to empty the fake stack completely. +This can of course be done with +.DS +kills ALL +.DE +or by ending the stack pattern with the word STACK, +if the stack pattern does not start with +.I exact . +The latter does not erase the contents of registers. +.PP +It is unfortunate that this part is still present in the table +but it is too much for now to let the +.I cgg +program discover what rules ruin what kind of tokens. +Maybe some day ..... +.NH 3 +The allocates part +.PP +The optional register allocation part describes the registers needed. +Syntax is +.DS +uses +.DE +where itemlist is a list of three kinds of things: +.IP 1) +.I reusing +< a token description >, for example %1. +.br +This will instruct the code generator that all registers +contained in this token can be reused if they are not used +in another token on the fakestack, +so that they are available for allocation in this +.I uses +line +if they were only used in that token. +See example below. +.IP 2) +a register property. +.br +This will allocate a register with that property, +that is marked as empty at this point. +Look ahead can be performed if there is more than one register available. +.IP 3) +a register property with initialization. +.br +This will allocate the register as in 2) but will also +initialize it. +This eases the task of the code generator because it can +find a register already filled with the right value +if it exists. +.LP +Examples: +.DS +uses ODDREG +.DE +will allocate an odd register, while +.DS +uses REG={regind2,lb,$1} +.DE +will allocate a register while simultaneously filling it with +the asked value. +.br +Inside the coercion from xsrc2 to REG in the PDP-11 table +the following line can be found. +.DS +uses reusing %1, REG=%1 +.DE +This tells the code generator that registers contained in %1 can be used +again and asks to fill the register allocated with %1. +So if %1={regind2,r3,"4"} and r3 is not in use elsewhere on the fake stack +the following code might be generated. +.DS +mov 4(r3),r3 +.DE +In the rest of the line the registers allocated can be named by +%a and %b.1,%b.2, i.e. with lower case letters +in order of allocation. +.NH 3 +The generates part +.PP +Code to be generated, also optionally, is specified as +the word +.I gen +followed by a list of items of the following kind: +.IP 1) +An instruction name followed by a comma-separated +list of token descriptions. +.I Cgg +will search the instruction definitions for the machine to find a suitable +instruction. +At code generation time the assembler name of the +instruction will be output followed by a space, +followed by a comma separated list of tokens. +.br +In the table an instruction without operands must be +followed by a period. +The author of +.I cgg +could not get +.I yacc +to accept his syntax without it. +Sorry about this. +.IP 2) +a +.I move +call. +This has the following syntax: +.DS +move , +.DE +Moves are handled specially since that enables the code generator +to keep track of register contents. +Example: +.DS +move r3,{regind2,lb,$1} +.DE +will generate code to move r3 to $1(r5) except when +r3 already was a copy of $1(r5). +Then the code will be omitted. +The rules describing how to move things to each other +can be found in the move definitions section described below. +.IP 3) +For machines that have condition codes, +which alas most of them do, +there are provisions to remember condition code settings +and prevent needless testing. +To set the condition code to a token put in the code the following call: +.DS +test +.DE +This will generate a test if the condition codes +were not already set to that token. +The rules describing how to test things +can be found in the test definitions section described below. +See also the :cc qualifier that can be used at instruction +definition time. +.IP 4) +The +.I return +statement. +Only used when register variables are in use. +This statement causes a call to the machine dependent +C-routine +.I regreturn . +Explanation of this must wait for the description of the +file mach.c below. +.IP 5) +The +.I labeldef +statement. Its only argument should be that of the +.I lab +pseudo-instruction. This is needed to generate local labels when the +top element size information is used. It takes the form +.DS + labeldef $i +.DE +.IP 6) +A temporary label of the form : may be placed here. +Expressions of the form [0-9][bf] in this code rule +generate the same string as is used for this label. +The code generator system could probably easily be changed +to make this work for assemblers that do not support this +type of label by generating unique labels itself. +Implementation of this is not contemplated at the moment. +.NH 3 +Stack replacement +.PP +The optional stack replacement is a possibly empty list +of tokens to be pushed onto the fake stack. +It start with the word +.I yields , +and is followed by a list of token descriptions. +.PP +All tokens matched by the stack pattern at the beginning of the code rule +are first removed and their registers deallocated. +Items are pushed in the order of appearance. +This means that the last item will be on the top of the +stack after the push. +So if the stack pattern contained two sets +and they must be pushed back unchanged, +they have to be specified as stack replacement +.DS +yields %2 %1 +.DE +and not the other way around. +This is known to cause errors in tables so watch out for +this! +.NH 3 +EM replacement +.PP +In exceptional cases it might be useful to leave part of an EM-pattern +undone. +For example, a \fBsdl\fP instruction might +be split into two \fBstl\fP instructions +when there is no 4-byte quantity on the stack. +The EM replacement part allows +one to express this. +It is activated by the word +.I leaving . +.LP +Example: +.DS +leaving \fBstl\fP $1 \fBstl\fP $1+2 +.DE +The instructions are inserted in the stream so that they can match +the first part of a pattern in the next step. +Note that since the code generator traverses the EM instructions in a strict +linear fashion, +it is impossible to let the EM replacement match later parts of a pattern. +So if there is a pattern +.DS +\fBloc\fP \fBstl\fP $1==0 +.DE +and the input is +.DS +\fBloc\fP 0 \fBsdl\fP 4 +.DE +the \fBloc\fP\ 0 will be processed first, +then the \fBsdl\fP might be split into two \fBstl\fP's but the pattern +cannot match now. +.NH 3 +Examples +.PP +A list of examples for the PDP-11 is given here. +Far from being complete it gives examples of most kinds +of instructions. +.DS +.ta 7.5c +pat loc yields {const2, $1} + +pat ldc yields {const2, loww($1)} {const2, highw($1)} +.DE +These simple patterns just push one or more tokens onto the fake stack. +.DS +.ta 7.5c +pat lof +with REG yields {regind2,%1,$1} +with exact regconst2 yields {regind2,%1.reg,$1+%1.off} +with exact addr_external yields {relative2,$1+%1.off} +with exact addr_local yields {LOCAL, %1.ind + $1,2} +.DE +This pattern shows the possibility to do different things +depending on the fake stack contents, +there are some rules for some specific cases plus a general rule, +not preceded by +.I exact +that can always be taken after a coercion, +if necessary. +.DS +.ta 7.5c +pat lxl $1>3 +uses REG={LOCAL, SL, 2}, REG={const2,$1-1} +gen 1: + move {regind2,%a, SL},%a + sob %b,{label,1b} yields %a +.DE +This rule shows register allocation with initialisation, +and the use of a temporary label. +The constant SL used here is defined to be the offset from lb +of the static link, +that is pushed by the Pascal compiler as the last argument of +a function. +.DS +.ta 7.5c +pat stf +with regconst2 xsrc2 + kills allexeptcon + gen move %2,{regind2,%1.reg,$1+%1.off} +with addr_external xsrc2 + kills allexeptcon + gen move %2,{relative2,$1+%1.off} +.DE +This rule shows the use of a +.I kills +part in a store instruction. +The set allexeptcon contains all tokens that can be the destination +of an indirect store. +.DS +.ta 7.5c +pat sde +with exact FLTREG + kills posextern + gen move %1,{relative4,$1} +with exact ftolong + kills posextern + gen setl. + movfi %1.reg,{relative4,$1} + seti. +with src2 src2 + kills posextern + gen move %1, {relative2, $1 } + move %2, {relative2, $1+2} +.DE +The rule for +.B sde +shows the use of the +.I exact +clause in both qualities, +the first is for correctness, +the second for efficiency. +The third rule is taken by default, +resulting in two separate stores, +nothing better exists on the PDP-11. +.DS +.ta 7.5c +pat sbi $1==2 +with src2 REG + gen sub %1,%2 yields %2 +with exact REG src2-REG + gen sub %2,%1 + neg %1 yields %1 +.DE +This rule for +.I sbi +has a normal first part, +and a hand optimized special case as its second part. +.DS +.ta 7.5c +pat mli $1==2 +with ODDREG src2 + gen mul %2,%1 yields %1 +with src2 ODDREG + gen mul %1,%2 yields %2 +.DE +This shows the general property for rules with commutative +operators, +heuristics or look ahead will have to decide which rule is the best. +.DS +.ta 7.5c +pat loc sli $1==1 && $2==2 +with REG +gen asl %1 yields %1 +.DE +A simple rule involving a longer EM-pattern, +to make use of a specialized instruction available. +.DS +.ta 7.5c +pat loc loc cii $1==1 && $2==2 +with src1or2 +uses reusing %1,REG +gen movb %1,%a yields %a +.DE +A somewhat more complicated example of the same. +Note the +.I reusing +clause. +.DS +.ta 7.5c +pat loc loc loc cii $1>=0 && $2==2 && $3==4 + leaving loc $1 loc 0 +.DE +Shows a trivial example of EM-replacement. +This is a rule that could be done by the +peephole optimizer, +if word order in longs was defined in EM. +On a `big-endian' machine the two replacement +instructions would be the other way around. +.DS +.ta 7.5c +pat and $1==2 +with const2 REG + gen bic {const2,~%1.num},%2 yields %2 +with REG const2 + gen bic {const2,~%2.num},%1 yields %1 +with REG REG + gen com %1 + bic %1,%2 yields %2 +.DE +Shows the way to handle the absence +of an +.I and -instruction. +.DS +.ta 7.5c +pat set $1==2 +with REG +uses REG={const2,1} +gen ash %1,%a yields %a +.DE +Shows the building of a word-size set. +.DS +.ta 7.5c +pat lae aar $2==2 && rom($1,3)==1 && rom($1,1)==0 + leaving adi 2 + +pat lae aar $2==2 && rom($1,3)==1 && rom($1,1)!=0 + leaving adi 2 adp 0-rom($1,1) +.DE +Two rules showing the use of the rom pseudo function, +and some array optimalisation. +.DS +.ta 7.5c +pat bra +with STACK +gen jbr {label, $1} +.DE +A simple jump. +The stack pattern guarantees that everything will be stacked +before the jump is taken. +.DS +pat lab topeltsize($1)==2 && !fallthrough($1) +gen labeldef $1 yields r0 + +pat lab topeltsize($1)==2 && fallthrough($1) +with src2 +gen move %1,r0 + labeldef $1 yields r0 + +pat lab topeltsize($1)!=2 +with STACK +kills all +gen labeldef $1 + +pat bra topeltsize($1)==2 +with src2 STACK + gen move %1,d0 + jbr {label, $1} + +pat bra topeltsize($1)!=2 +with STACK + gen jbr {label, $1} +.DE +The combination of these patterns make sure that the top of the EM-stack will +be in register r0 whenever necessary. The top element size mechanism will +also show a size of 0 whenever a conditional branch to a label +occurs. This saves a lot of patterns and hardly decreases performance. +When the same register is used to return function results, this can save +many moves to and from the stack. +.DS +.ta 7.5c +pat cal +with STACK +gen jsr pc,{label, $1} +.DE +A simple call. +Same comments as previous rule. +.DS +.ta 7.5c +pat lfr $1==2 yields r0 +pat lfr $1==4 yields r1 r0 +.DE +Shows the return area conventions of the PDP-11 table. +At this point a reminder: +the +.B asp +instruction, and some other instructions must leave +the function return area intact. +See the defining document for EM for exact information. +.DS +.ta 7.5c +pat ret $1==0 +with STACK +gen mov lb,sp + rts pc +.DE +This shows a rule for +.B ret +in a table not using register variables. +In a table with register variables the +.I gen +part would just contain +.I return . +.DS +.ta 7.5c +pat blm +with REG REG +uses REG={const2,$1/2} +gen 1: + mov {autoinc,%2},{autoinc,%1} + sob %a,{label,1b} +.DE +This rule for +.B blm +already uses three registers of the same type. +.I Cgg +contains code to check all rules +to see if they can be applied from an empty fakestack. +It uses the marriage thesis from Hall, +a thesis from combinatorial mathematics, +to accomplish this. +.DS +.ta 7.5c +pat exg $1==2 +with src2 src2 yields %1 %2 +.DE +This rule shows the exchanging of two elements on the fake stack. +.NH 2 +Code rules using procedures +.PP +To start this section it must be admitted at once that the +word procedure is chosen here mainly for its advertising +value. +It more resembles a glorified goto but this of course can +not be admitted in the glossy brochures. +This document will continue to use the word +procedure. +.PP +The need for procedures was felt after the first version of +the code generator system was made, +mainly because of conditional instructions. +Often the code sequences for +.B tlt , +.B tle , +.B teq , +.B tne , +.B tge +and +.B tgt +were identical apart from one opcode in the code rule. +The code sequence had to be written out six times however. +Not only did this increase the table size and bore the +table writer, it also led to errors when changing the table +since it happened now and then that five out of six +rules were changed. +.PP +In general the procedures in this table format are used to +keep one copy instead of six of the code rules for all +sorts of conditionals and one out of two for things like +increment/decrement. +.PP +And now the syntax, first the procedure definition, +which must indeed be defined before the call because +.I cgg +is one-pass. +The procedure heading replaces the EM-pattern in a code rule +and looks like this: +.DS +proc +.DE +The identifier is used in later calls and the example must +be used if expressions like $1 are used in the code rule. +.DS + : example +.DE +so an example looks just like an EM-pattern, but without +the optional boolean expression. +The example is needed to know the types of $1 expressions. +The current version of +.I cgg +does not check correctness of the example, so be careful. +.PP +A procedure is called with string-parameters, +that are assembler opcodes. +They can be accessed by appending the string `[]' +to a table opcode, where is the parameter number. +The string `*' can be used as an equivalent for `[1]'. +Just in case this is not clear, here is an example for +a procedure to increment/decrement a register. +.DS +.ta 7.5c +incop REG:rw:cc . /* in the INSTRUCTIONS part of course */ + +proc incdec +with REG +gen incop* %1 yields %1 +.DE +The procedure is called with parameter "inc" or "dec". +.PP +The procedure call is given instead of the code-part of the +code rule and looks like this +.DS +call '(' ')' +.DE +which leads to the following large example: +.DS +.ta 7.5c +proc bxx example beq +with src2 src2 STACK +gen cmp %2,%1 + jxx* {label, $1} + +pat blt call bxx("jlt") +pat ble call bxx("jle") +pat beq call bxx("jeq") +pat bne call bxx("jne") +pat bgt call bxx("jgt") +pat bge call bxx("jge") +.DE +.NH 2 +Move definitions +.PP +We now jump back to near the beginning of the table +where the move definitions are found. +The move definitions directly follow the instruction +definitions. +.PP +In certain cases a move is called for, +either explicitly when a +.I move +instruction is used in a code rule, +or implicitly in a register initialization. +The different code rules possible to move data from one +spot to another are described here. +Example for the PDP-11: +.DS +.ta 8 16 24 32 40 48 56 64 +MOVES + +from const2 %num==0 to dst2 +gen clr %2 + +from src2 to dst2 +gen mov %1,%2 + +from FLTREG to longf4-FLTREG +gen movfo %1,%2 + +from longf4-FLTREG to FLTREG +gen movof %1,%2 +.DE +The example shows that the syntax is just +.DS +from to gen +.DE +Source and destination are a token set, optionally followed by +a boolean expression. +The code generator will take the first move that matches, +whenever a move is necessary. +.I Cgg +checks whether all moves called for in the table are present. +.NH 2 +Test definitions +.PP +This part describes the instructions necessary to set the condition codes +to a certain token. +These rules are needed when the +.I test +instruction is used in code rules. +Example for the PDP-11: +.DS +.ta 8 16 24 32 40 48 56 64 +TESTS + +to test src2 +gen tst %1 +.DE +So syntax is just +.DS +to test gen +.DE +Source is the same thing as in the move definition. +.I Cgg +checks whether all tests called for in the table are present. +.NH 2 +Some explanation about the rules behind coercions +.PP +A central part in code generation is taken by the +.I coercions . +It is the responsibility of the table writer to provide +all necessary coercions so that code generation can continue. +The minimal set of coercions are +the coercions to unstack every token expression, +in combination with the rules to stack every token. +It should not be possible to smuggle a table through +.I cgg +without these basic set available. +.PP +If these are present the code generator can always make the necessary +transformations by stacking and unstacking. +Of course for code quality it is usually best to provide extra coercions +to prevent this stacking to take place. +.I Cg +discriminates three types of coercions: +.IP 1) +Unstacking coercions. +This category can use the +.I uses +clause in its code. +.IP 2) +Splitting coercions, these are the coercions that split +larger tokens into smaller ones. +.IP 3) +Transforming coercions, these are the coercions that transform +a token into another of the same size. +This category can use the +.I uses +clause in its code. +.PP +When a stack configuration does not match the stack pattern +.I coercions +are searched for in the following order: +.IP 1) +First tokens are split if necessary to get their sizes right. +.IP 2) +Then transforming coercions are found that will make the pattern match. +.IP 3) +Finally if the stack pattern is longer than the fake stack contents +unstacking coercions will be used to fill up the pattern. +.PP +At any point, when coercions are missing so code generation could not +continue, the offending tokens are stacked. +.NH 2 +Stack definitions +.PP +The next part of the table defines the stacking rules for the machine. +Each token that may reside on the fake stack must have a rule attached +to put it on the real stack. +Example for the PDP-11: +.DS +.ta 8 16 24 32 40 48 56 64 +STACKINGRULES + +from const2 %num==0 to STACK +gen clr {autodec,sp} + +from src2 to STACK +gen mov %1,{autodec,sp} + +from regconst2 to STACK +gen mov %1.reg,{autodec,sp} + add {addr_external, %1.off},{regdef2,sp} + +from DBLREG to STACK +gen movf %1,{autodec,sp} + +from FLTREG to STACK +gen movfo %1,{autodec,sp} + +from regind8 to STACK +uses REG +gen move %1.reg,%a + add {addr_external, 8+%1.off},%a + mov {autodec, %a},{autodec,sp} + mov {autodec, %a},{autodec,sp} + mov {autodec, %a},{autodec,sp} + mov {autodec, %a},{autodec,sp} +.DE +.PP +These examples should be self-explanatory, except maybe for the last one. +It is possible inside a stacking-rule to use a register. +Since however the stacking might also take place at a moment +when no registers are free, it is mandatory that for each token +there is one stackingrule that does not use a register. +The code generator uses the first rule possible. +.NH 2 +Coercions +.PP +The next part of the table defines the coercions that are possible +on the defined tokens. +Example for the PDP-11: +.DS +.ta 7.5c +COERCIONS + +from STACK +uses REG +gen mov {autoinc,sp},%a yields %a + +from STACK +uses DBLREG +gen movf {autoinc,sp},%a yields %a + +from STACK +uses REGPAIR +gen mov {autoinc,sp},%a.1 + mov {autoinc,sp},%a.2 yields %a +.DE +These three coercions just deliver a certain type +of register by popping it from the real stack. +.DS +.ta 7.5c +from LOCAL yields {regind2,lb,%1.ind} + +from DLOCAL yields {regind4,lb,%1.ind} + +from REG yields {regconst2, %1, 0} +.DE +These three are zero-cost rewriting rules. +.DS +.ta 7.5c +from regconst2 %1.off==1 +uses reusing %1,REG=%1.reg +gen inc %a yields %a + +from regconst2 +uses reusing %1,REG=%1.reg +gen add {addr_external, %1.off},%a yields %a + +from addr_local +uses REG +gen mov lb,%a + add {const2, %1.ind},%a yields %a +.DE +The last three are three different cases of the coercion +register+constant to register. +Only in the last case is it always necessary to allocate +an extra register, +since arithmetic on the localbase is unthinkable. +.DS +.ta 7.5c +from xsrc2 +uses reusing %1, REG=%1 yields %a + +from longf4 +uses FLTREG=%1 yields %a + +from double8 +uses DBLREG=%1 yields %a + +from src1 +uses REG={const2,0} +gen bisb %1,%a yields %a +.DE +These examples show the coercion of different +tokens to a register of the needed type. +The last one shows the trouble needed on a PDP-11 to +ensure bytes are not sign-extended. +In EM it is defined that the result of a \fBloi\fP\ 1 +instruction is an integer in the range 0..255. +.DS +.ta 7.5c +from REGPAIR yields %1.2 %1.1 + +from regind4 yields {regind2,%1.reg,2+%1.off} + {regind2,%1.reg,%1.off} + +from relative4 yields {relative2,2+%1.off} + {relative2,%1.off} +.DE +The last examples are splitting rules. +.PP +The examples show that +all coercions change one token on the fake stack into one or more others, +possibly generating code. +The STACK token is supposed to be on the fake stack when it is +really empty, and can only be changed into one other token. +.NH 1 +The files mach.h and mach.c +.PP +The table writer must also supply two files containing +machine dependent declarations and C code. +These files are mach.h and mach.c. +.NH 2 +Types in the code generator +.PP +Three different types of integer coexist in the code generator +and their range depends on the machine at hand. +They are defined depending on the Target EM_WSIZE, or TEM_WSIZE, +and TEM_PSIZE. +The type 'int' is used for things like counters that won't require +more than 16 bits precision. +The type 'word' is used among others to assemble datawords and +is of type 'long'. +The type 'full' is used for addresses and is of type 'long' if +TEM_WSIZE>2 or TEM_PSIZE>2. +.PP +In macro and function definitions in later paragraphs implicit typing +will be used for parameters, that is parameters starting with an 's' +will be of type string, and the letters 'i','w','f' will stand for +int, word and full respectively. +.NH 2 +Global variables to work with +.PP +Some global variables are present in the code generator +that can be manipulated by the routines in mach.h and mach.c. +.LP +The declarations are: +.DS L +.ta 20 +FILE *codefile; /* code is emitted on this stream */ +word part_word; /* words to be output are put together here */ +int part_size; /* number of bytes already put in part_word */ +char str[]; /* Last string read in */ +long argval; /* Last int read and kept */ +.DE +.NH 2 +Macros in mach.h +.PP +In the file mach.h a collection of macros is defined that have +to do with formatting of assembly code for the machine at hand. +Some of these macros can of course be left undefined in which case the +macro calls are left in the source and will be treated as +function calls. +These functions can then be defined in \fImach.c\fR. +.PP +The macros to be defined are: +.IP ex_ap(s) 16 +Must print the magic incantations that will mark the symbol \fI\fR +to be exported to other modules. +This is the translation of the EM \fBexa\fP and \fBexp\fP instructions. +.IP in_ap(s) +Same to import the symbol. +Translation of \fBina\fP and \fBinp\fP. +.IP newplb(s) +Must print the definition of procedure label \fIs\fR. +If left undefined the newilb() macro is used instead. +.IP newilb(s) +Must print the definition of instruction label \fIs\fR. +.IP newdlb(s) +Must print the definition of data label \fIs\fR. +.IP dlbdlb(s1,s2) +Must define data label +.I s1 +to be equal to +.I s2 . +.IP newlbss(s,f) +Must declare a piece of memory initialized to BSS_INIT(see below) +of length +.I f +and with label +.I s . +.IP cst_fmt +Format to be used when converting constant arguments of +EM instructions to string. +Argument to be formatted will be 'full'. +.IP off_fmt +Format to be used for integer part of label+constant, +argument will be 'full'. +.IP fmt_ilb(ip,il,s) +Must use the numbers +.I ip +and +.I il +that are a procedure number +and a label number respectively and copy a string to +.I s +that must be unique for that combination. +This procedure is optional, if it is not given ilb_fmt +must be defined as below. +.IP ilb_fmt +Format to be used for creation of unique instruction labels. +Arguments will be a unique procedure number (int) and the label +number (int). +.IP dlb_fmt +Format to be used for printing numeric data labels. +Argument will be 'int'. +.IP hol_fmt +Format to be used for generation of labels for +space generated by a +.B hol +pseudo. +Argument will be 'int'. +.IP hol_off +Format to be used for printing of the address of an element in +.B hol +space. +Arguments will be the offset in the +.B hol +block (word) and the number of the +.B hol +(int). +.IP con_cst(w) +Must generate output that will assemble into one machine word. +.IP con_ilb(s) +Must generate output that will put the address of the instruction label +into the datastream. +.IP con_dlb(s) +Must generate output that will put the address of the data label +into the datastream. +.IP fmt_id(sf,st) +Must take the string in +.I sf +that is a nonnumeric global label, and transform it into a copy made to +.I st +that will not collide with reserved assembler words and system labels. +This procedure is optional, if it is not given the id_first macro is used +as defined below. +.IP id_first +Must be a character. +This is prepended to all nonnumeric global labels if their length +is shorter than the maximum allowed(currently 8) or if they already +start with that character. +This is to avoid conflicts of user labels with system labels. +.IP BSS_INIT +Must be a constant. +This is the value filled in all the words not initialized explicitly. +This is loader and system dependent. +If omitted no initialization is assumed. +.NH 3 +Example mach.h for the PDP-11 +.DS L +.ta 4c +#define ex_ap(y) fprintf(codefile,"\et.globl %s\en",y) +#define in_ap(y) /* nothing */ + +#define newplb(x) fprintf(codefile,"%s:\en",x) +#define newilb(x) fprintf(codefile,"%s:\en",x) +#define newdlb(x) fprintf(codefile,"%s:\en",x) +#define dlbdlb(x,y) fprintf(codefile,"%s=%s\en",x,y) +#define newlbss(l,x) fprintf(codefile,"%s:.=.+%d.\en",l,x); + +#define cst_fmt "$%d." +#define off_fmt "%d." +#define ilb_fmt "I%x_%x" +#define dlb_fmt "_%d" +#define hol_fmt "hol%d" + +#define hol_off "%ld.+hol%d" + +#define con_cst(x) fprintf(codefile,"%ld.\en",x) +#define con_ilb(x) fprintf(codefile,"%s\en",x) +#define con_dlb(x) fprintf(codefile,"%s\en",x) + +#define id_first '_' +#define BSS_INIT 0 +.DE +.NH 2 +Functions in mach.c +.PP +In mach.c some functions must be supplied, +mostly manipulating data resulting from pseudoinstructions. +The specifications are given here, +implicit typing of parameters as above. +.IP - +con_part(isz,word) +.br +This function must manipulate the globals +part_word and part_size to append the isz bytes +contained in word to the output stream. +If part_word is full, i.e. part_size==TEM_WSIZE +the function part_flush() may be called to empty the buffer. +This is the function that must go through the trouble of +doing byte order in words correct. +.IP - +con_mult(w_size) +.br +This function must take the string str[] and create an integer +from the string of size w_size and generate code to assemble global +data for that integer. +Only the sizes for which arithmetic is implemented need be +handled, +so if 200-byte integer division is not implemented, +200-byte integer global data don't have to be implemented. +Here one must take care of word order in long integers. +.IP - +con_float() +.br +This function must generate code to assemble a floating +point number of which the size is contained in argval +and the ASCII representation in str[]. +.IP - +prolog(f_nlocals) +.br +This function is called at the start of every procedure. +Function prolog code must be generated, +and room made for local variables for a total of f_nlocals bytes. +.IP - +mes(w_mesno) +.br +This function is called when a +.B mes +pseudo is seen that is not handled by the machine independent part. +The example below shows all one probably have to know about that. +.IP - +segname[] +.br +This is not a function, +but an array of four strings. +These strings are put out whenever the code generator +switches segments. +Segments are SEGTXT, SEGCON, SEGROM and SEGBSS in that order. +.PP +If register variables are used in a table, the program +.I cgg +will define the word REGVARS during compilation of the sources. +So the following functions described here should be bracketed +by #ifdef REGVARS and #endif. +.IP - +regscore(off,size,typ,freq,totyp) long off; +.br +This function should assign a score to a register variable, +the score should preferably be the estimated number of bytes +gained when it is put in a register. +Off and size are the offset and size of the variable, +typ is the type, that is reg_any, reg_pointer, reg_loop or reg_float. +Freq is the count of static occurrences, and totyp +is the type of the register it is planned to go into. +.br +Keep in mind that the gain should be net, that is the cost for +register save/restore sequences and the cost of initialisation +in the case of parameters should already be included. +.IP - +i_regsave() +.br +This function is called at the start of a procedure, just before +register saves are done. +It can be used to initialise some variables if needed. +.IP - +f_regsave() +.br +This function is called at end of the register save sequence. +It can be used to do the real saving if multiple register move +instructions are available. +.IP - +regsave(regstr,off,size) char *regstr; long off; +.br +Should either do the real saving or set up a table to have +it done by f_regsave. +Note that initialisation of parameters should also be done, +or planned here. +.IP - +regreturn() +.br +Should restore saved registers and return. +The function result is already in the function return area by now. +.NH 3 +Example mach.c for the PDP-11 +.PP +As an example of the sort of code expected, +the mach.c for the PDP-11 is presented here. +.DS L +.ta 0.5i 1i 1.5i 2i 2.5i 3i 3.5i 4i 4.5i +/* + * machine dependent back end routines for the PDP-11 + */ + +con_part(sz,w) register sz; word w; { + + while (part_size % sz) + part_size++; + if (part_size == 2) + part_flush(); + if (sz == 1) { + w &= 0xFF; + if (part_size) + w <<= 8; + part_word |= w; + } else { + assert(sz == 2); + part_word = w; + } + part_size += sz; +} + +con_mult(sz) word sz; { + long l; + + if (sz != 4) + fatal("bad icon/ucon size"); + l = atol(str); + fprintf(codefile,"\et%o;%o\en",(int)(l>>16),(int)l); +} + +con_float() { + double f; + register short *p,i; + + /* + * This code is correct only when the code generator is + * run on a PDP-11 or VAX-11 since it assumes native + * floating point format is PDP-11 format. + */ + + if (argval != 4 && argval != 8) + fatal("bad fcon size"); + f = atof(str); + p = (short *) &f; + i = *p++; + if (argval == 8) { + fprintf(codefile,"\et%o;%o;",i,*p++); + i = *p++; + } + fprintf(codefile,"\et%o;%o\en",i,*p++); +} + +#ifdef REGVARS + +char Rstring[10]; +full lbytes; +struct regadm { + char *ra_str; + long ra_off; +} regadm[2]; +int n_regvars; + +regscore(off,size,typ,score,totyp) long off; { + + /* + * This function is full of magic constants. + * They are a result of experimentation. + */ + + if (size != 2) + return(-1); + score -= 1; /* allow for save/restore */ + if (off>=0) + score -= 2; + if (typ==reg_pointer) + score *= 17; + else if (typ==reg_loop) + score = 10*score+50; /* Guestimate */ + else + score *= 10; + return(score); /* 10 * estimated # of words of profit */ +} + +i_regsave() { + + Rstring[0] = 0; + n_regvars=0; +} + +f_regsave() { + register i; + + if (n_regvars==0 || lbytes==0) { + fprintf(codefile,"mov r5,-(sp)\enmov sp,r5\en"); + if (lbytes == 2) + fprintf(codefile,"tst -(sp)\en"); + else if (lbytes!=0) + fprintf(codefile,"sub $0%o,sp\en",lbytes); + for (i=0;i6) { + fprintf(codefile,"mov $0%o,r0\en",lbytes); + fprintf(codefile,"jsr r5,PR%s\en",Rstring); + } else { + fprintf(codefile,"jsr r5,PR%d%s\en",lbytes,Rstring); + } + } + for (i=0;i=0) + fprintf(codefile,"mov 0%lo(r5),%s\en",regadm[i].ra_off, + regadm[i].ra_str); +} + +regsave(regstr,off,size) char *regstr; long off; { + + fprintf(codefile,"/ Local %ld into %s\en",off,regstr); + strcat(Rstring,regstr); + regadm[n_regvars].ra_str = regstr; + regadm[n_regvars].ra_off = off; + n_regvars++; +} + +regreturn() { + + fprintf(codefile,"jmp RT%s\en",Rstring); +} + +#endif + +prolog(nlocals) full nlocals; { + +#ifndef REGVARS + fprintf(codefile,"mov r5,-(sp)\enmov sp,r5\en"); + if (nlocals == 0) + return; + if (nlocals == 2) + fprintf(codefile,"tst -(sp)\en"); + else + fprintf(codefile,"sub $0%o,sp\en",nlocals); +#else + lbytes = nlocals; +#endif +} + +mes(type) word type; { + int argt ; + + switch ( (int)type ) { + case ms_ext : + for (;;) { + switch ( argt=getarg( + ptyp(sp_cend)|ptyp(sp_pnam)|sym_ptyp) ) { + case sp_cend : + return ; + default: + strarg(argt) ; + fprintf(codefile,".globl %s\en",argstr) ; + break ; + } + } + default : + while ( getarg(any_ptyp) != sp_cend ) ; + break ; + } +} + +char *segname[] = { + ".text", /* SEGTXT */ + ".data", /* SEGCON */ + ".data", /* SEGROM */ + ".bss" /* SEGBSS */ +}; +.DE +.NH 1 +Internal workings of the code generator. +.NH 2 +Description of tables.c and tables.h contents +.PP +In this section the intermediate files will be described +that are produced by +.I cgg +and compiled with machine independent code to produce a code generator. +.NH 3 +Tables.c +.PP +Tables.c contains a large number of initialized array's of all sorts. +Description of each follows: +.br +.in 1i +.ti -0.5i +byte coderules[] +.br +Pseudo code interpreted by the code generator. +Always starts with some opcode followed by operands depending +on the opcode. +Some of the opcodes have an argument encoded in the upper three +bits of the opcode byte. +Integers in this table are between 0 and 32767 and have a one byte +encoding if between 0 and 127. +.ti -0.5i +char wrd_fmt[] +.br +The format used for output of words. +.ti -0.5i +char stregclass[] +.br +Number of computed static register class per register. +Two registers are in the same class if they have the same properties +and don't share a common subregister. +.ti -0.5i +struct reginfo machregs[] +.br +Info per register. +Initialized with representation string, size, +members of the register and set of registers affected when this +one is changed. +Also contains room for run time information, +like contents and reference count. +.ti -0.5i +tkdef_t tokens[] +.br +Information per tokentype. +Initialized with size, cost, type of operands and formatstring. +.ti -0.5i +node_t enodes[] +.br +List of triples representing expressions for the code generator. +.ti -0.5i +string codestrings[] +.br +List of strings. +All strings are put in a list and checked for duplication, +so only one copy per string will reside here. +.ti -0.5i +set_t machsets[] +.br +List of token expression sets. +Bit 0 of the set is used for the SCRATCH property of registers, +bit 1 upto NREG are for the corresponding registers +and bit NREG+1 upto the end are for corresponding tokens. +.ti -0.5i +inst_t tokeninstances[] +.br +List of descriptions for building tokens. +Contains type of rule for building one, +plus operands depending on the type. +.ti -0.5i +move_t moves[] +.br +List of move rules. +Contains token expressions for source and destination +plus index for code rule. +.ti -0.5i +test_t tests[] +.br +List of test rules. +Contains token expressions for source +plus index for code rule. +.ti -0.5i +byte pattern[] +.br +EM patterns. +This is structured internally as chains of patterns, +each chain pointed at by pathash[]. +After each pattern the list of possible code rules is given. +.ti -0.5i +int pathash[256] +.br +Indices into pattern[] for all patterns with a certain low order +byte of the hashing function. +.ti -0.5i +c1_t c1coercs[] +.br +List of rules to stack tokens. +Contains token expressions, +register needed, +cost +and code rule. +.ti -0.5i +c2_t c2coercs[] +.br +List of splitting coercions. +Token expressions, +split factor, +replacements +and code rule. +.ti -0.5i +c3_t c3coercs[] +.br +List of one to one coercions. +Token expressions, +register needed, +replacement +and code rule. +.ti -0.5i +struct reginfo **reglist[] +.br +List of lists of pointers to register information. +For every property the list is here +to find the registers corresponding to it. +.in 0 +.NH 3 +tables.h +.PP +In tables.h various derived constants for the tables are +given. +They are then used to determine array sizes in the actual code generator, +plus loop termination in some cases. +.NH 2 +Other important data structures +.PP +During code generation some other data structures are used +and here is a short description of some of the important ones. +.PP +Tokens are kept in the code generator as a struct consisting of +one integer +.I t_token +which is -1 if the token is a register, +and the number of the token otherwise, +plus an array of +.I TOKENSIZE +unions +.I t_att +of which the first is the register number in case of a register. +.PP +The fakestack is an array of these tokens, +there is a global variable +.I stackheight . +.PP +The results of expressions are kept in a struct +.I result +with elements +.I e_typ , +giving the type of the expression: +.I EV_INT , +.I EV_REG +or +.I EV_ADDR , +and a union +.I e_v +which contains the real result. +.NH 2 +A tour through the sources +.NH 3 +codegen.c +.PP +The file codegen.c contains one large function consisting +of one giant switch statement. +It is the interpreter for the code generator pseudo code +as contained in code rules[]. +This function can call itself recursively when doing look ahead. +Arguments are: +.IP codep 10 +Pointer into code rules, pseudo program counter. +.IP ply +Number of EM pattern look ahead allowed. +.IP toplevel +Boolean telling whether this is the toplevel codegen() or +a deeper incarnation. +.IP costlimit +A cutoff value to limit searches. +If the cost crosses costlimit the incarnation can terminate. +.IP forced +A register number if nonzero. +This is used inside coercions to force the allocate() call to allocate +a register determined by earlier look ahead. +.PP +The instructions inplemented in the switch: +.NH 4 +DO_DLINE +.PP +Prints debugging information if the code generator runs in debug mode. +This information is only generated if +.I cgg +was called with the -d flag. +.NH 4 +DO_NEXTEM +.PP +Matches the next EM pattern and does look ahead if necessary to find the best +code rule associated with this pattern. +Heuristics are used to determine best code rule when possible. +This is done by calling the distance() function. +It can also handle the procedure mechanism. +.NH 4 +DO_COERC +.PP +This sets the code generator in the state to do a from stack coercion. +.NH 4 +DO_XMATCH +.PP +This is done when a match no longer has to be checked. +Used when the nocoercions: trick is used in the table. +.NH 4 +DO_MATCH +.PP +This is the big one inside this function. +It has the task to transform the contents of the current +fake stack to match the pattern given after it. +.PP +Since the code generator does not know combining coercions, +i.e. there is no way to make a big token out of two smaller ones, +the first thing done is to stack every token that is too small. +After that all tokens too big are split if possible to the right size. +.PP +Next the coercions are sought that would transform tokens in place to +the right one, plus the coercions that would pop tokens of the stack. +Each of those might need a register, so a list of registers is generated +and at the end of looking for coercions the function +.I tuples() +is called to generate the list of all possible \fIn\fP-tuples, +where +.I n +equals the number of registers needed. +.PP +Look ahead is now performed if the number of tuples is greater than one. +If no possibility is found within the costlimit, +the fake stack is made smaller by pushing the bottom token, +and this process is repeated until either a way is found or +the fake stack is completely empty and there is still no way +to make the match. +.PP +If there is a way the corresponding coercions are executed +and the code is finished. +.NH 4 +DO_REMOVE +.PP +Here the kills clause is executed, all tokens matched by the +token expression plus boolean expression are pushed. +In the current implementation there is no attempt to move those +tokens to registers, but that is a possible future extension. +.NH 4 +DO_DEALLOCATE +.PP +This one temporarily decrements by one the reference count of all registers +contained in the token given as argument. +.NH 4 +DO_REALLOCATE +.PP +Here all temporary deallocates are made undone. +.NH 4 +DO_ALLOCATE +.PP +This is the part that allocates a register and decides which one to use. +If the +.I forced +argument was given its task is simple, +otherwise some work must be done. +First the list of possible registers is scanned, +all free registers noted and it is noted whether any of those +registers is already +containing the initialization. +If no registers are available some fakestack token is stacked and the +process is repeated. +.PP +After that if an exact match was found, +the list of registers is reduced to one register matching exactly +out of every register class. +Now look ahead is performed if necessary and the register chosen. +If an initialization was given the corresponding move is performed, +otherwise the register is marked empty. +.NH 4 +DO_INSTR +.PP +This prints an instruction and its operands. +Only done on toplevel. +.NH 4 +DO_MOVE +.PP +Calls the move() function in the code generator to implement the move +instruction in the table. +.NH 4 +DO_TEST +.PP +Calls the test() function in the code generator to implement the test +instruction in the table. +.NH 4 +DO_ERASE +.PP +Marks the register that is its argument as empty. +.NH 4 +DO_TOKREPLACE +.PP +This is the token replacement part. +It is also called if there is no token replacement because it has +some other functions as well. +.PP +First the tokens that will be pushed on the fake stack are computed +and stored in a temporary array. +Then the tokens that were matched in this rule are popped +and their embedded registers have their reference count +decremented. +After that the replacement tokens are pushed. +.PP +Finally all registers allocated in this rule have their reference count +decremented. +If they were not pushed on the fake stack they will be available again +in the next code rule. +.NH 4 +DO_EMREPLACE +.PP +Places replacement EM instructions back into the instruction stream. +.NH 4 +DO_COST +.PP +Accounts for cost as given in the code rule. +.NH 4 +DO_RETURN +.PP +Returns from this level of codegen(). +Is used at the end of coercions, +move rules etc.. +.NH 4 +DO_LABDEF +.PP +This prints a label when the top element size mechanism is used. Only done on +toplevel. +.NH 3 +compute.c +.PP +This module computes the various expressions as given +in the enodes[] array. +Nothing very special happens here, +it is just a recursive function computing leaves +of expressions and applying the operator. +.NH 3 +equiv.c +.PP +In this module the tuples() function is implemented. +It is given the number of registers needed and +a list of register lists and it constructs a list of tuples +where the \fIn\fP'th register comes from the \fIn\fP'th list. +Before the list is constructed however +the dynamic register classes are computed. +Two registers are in the same dynamic class if they are in the +same static class and their contents is the same. +.PP +After that the permute() recursive function is called to +generate the list of tuples. +After construction a generated tuple is added to the list +if it is not already pairwise in the same class +or if the register relations are not the same, +i.e. if the first and second register share a common +subregister in one tuple and not in the other they are considered different. +.NH 3 +fillem.c +.PP +This is the routine that does the reading of EM instructions +and the handling of pseudos. +The mach.c module provided by the table writer is included +at the end of this module. +The routine fillemlines() is called by nextem() at toplevel +to make sure there are enough instruction to match. +It fills the EM instruction buffer up to 5 places from the end to +keep room for EM replacement instructions, +or up to a pseudo. +.PP +The dopseudo() function performs the function of the pseudo last +encountered. +If the pseudo is a +.B rom +the corresponding label is saved with the contents of the +.B rom +to be available to the code generator later. +The rest of the routines are small service routines for either +input or data output. +.NH 3 +gencode.c +.PP +This module contains routines called by codegen() to generate the real +code to the codefile. +The function genstr() gets a string as argument and copies it to codefile. +The prtoken() function interprets the tokenformat as given in +the tokens[] array. +.NH 3 +glosym.c +.PP +This module maintains a list of global symbols that have a +.B rom +pseudo associated. +There are functions to enter a symbol and to find a symbol. +.NH 3 +label.c +.PP +This module contains routines to handle the top element size messages. +.NH 3 +main.c +.PP +Main routine of the code generator. +Processes arguments and flags. +Flags available are: +.IP -d +Sets debug mode if the code generator was not compiled with +the NDEBUG macro defined. +The flag can be followed by a digit specifying the amount of debugging +wanted, +and by @labelname giving the start of debugging. +Debug mode gives very long output on stderr indicating +all steps of the code generation process including nesting +of the codegen() function. +.IP -p\fIn\fP +Sets the look ahead depth to +.I n , +the +.I p +stands for ply, +a well known word in chess playing programs. +.IP -w\fIn\fP +Sets the weight percentage for size in the cost function to +.I n +percent. +Uses Euclides algorithm to simplify rationals. +.NH 3 +move.c +.PP +Function to implement the move instruction in the tables, +register initialization and the test instruction and associated bookkeeping. +First tests are made to try to prevent the move from really happening. +After that, if there is an after that, +the move rule is found and the code executed. +.NH 3 +nextem.c +.PP +The entry point of this module is nextem(). +It hashes the next three EM instructions, +and uses the low order byte of the hash +as an index into the array pathash[], +to find a chain of patterns in the array +pattern[], +that are all tried for a match. +.PP +The function trypat() does most of the work +checking patterns. +When a pattern is found to match all instructions +the operands of the instruction are placed into the dollar[] array. +Then the boolean expression is tried. +If it matches the function can return, +leaving the operands still in the dollar[] array, +so later in the code rule they can still be used. +.NH 3 +reg.c +.PP +Collection of routines to handle registers. +Reference count routines are here, +chrefcount() and getrefcount(), +plus routines to erase a single register or all of them, +erasereg() and cleanregs(). +.PP +If NDEBUG hasn't been defined, here is also the routine that checks +if the reference count kept with the register information is in +agreement with the number of times it occurs on the fake stack. +.NH 3 +salloc.c +.PP +Module for string allocation and garbage collection. +Contains entry points myalloc(), +a routine calling malloc() and checking whether room is left, +myfree(), just free(), +popstr() a function called from state.c to free all strings +made since the last saved status. +Furthermore there is salloc() which has the size of the string as parameter +and returns a pointer to the allocated space, +while keeping a copy of the pointer for garbage allocation purposes. +.PP +The function garbage_collect is called from codegen() at toplevel +every now and then, +and checks all places where strings may reside to mark strings +as being in use. +Strings not in use are returned to the pool of free space. +.NH 3 +state.c +.PP +Set of routines called to save current status and +restore a previous saved state. +.NH 3 +subr.c +.PP +Random set of leftover routines. +.NH 4 +match +.PP +Computes whether a certain token matches a certain token expression. +Just computes a bitnumber according to the algorithm explained with +machsets[], +and tests the bit and the boolean expression if it is there. +.NH 4 +instance,cinstance +.PP +These two functions compute a token from a description. +They differ very slight, cinstance() is used to compute +the result of a coercion in a certain context +and therefore has more arguments, which it uses instead of +the global information instance() works on. +.NH 4 +eqtoken +.PP +eqtoken computes whether two tokens can be considered identical. +Used to check register contents during moves mainly. +.NH 4 +distance +.PP +This is the heuristic function that computes a distance from +the current fake stack contents to the token pattern in the table. +It likes exact matches most, then matches where at least the sizes are correct +and if the sizes are not correct it likes too large sizes more than too +small, since splitting a token is easier than combining one. +.NH 4 +split +.PP +This function tries to find a splitting coercion +and executes it immediately when found. +The fake stack is shuffled thoroughly when this happens, +so pieces below the token that must be split are saved first. +.NH 4 +docoerc +.PP +This function executes a coercion that was found. +The same shuffling is done, so the top of the stack is again saved. +.NH 4 +stackupto +.PP +This function gets a pointer into the fake stack and must stack +every token including the one pointed at up to the bottom of the fake stack. +The first stacking rule possible is used, +so rules using registers must come first. +.NH 4 +findcoerc +.PP +Looks for a one to one coercion, if found it returns a pointer +to it and leaves a list of possible registers to use in the global +variable curreglist. +This is used by codegen(). +.NH 3 +var.c +.PP +Global variables used by more than one module. +External definitions are in extern.h. diff --git a/doc/nopt.doc b/doc/nopt.doc new file mode 100644 index 0000000..48b9d55 --- /dev/null +++ b/doc/nopt.doc @@ -0,0 +1,591 @@ +.\" $Id: nopt.doc,v 2.5 1994/06/24 10:02:13 ceriel Exp $ +.TL +A Tour of the New Peephole Optimizer +.AU +B. J. McKenzie +.NH +Introduction +.LP +The peephole optimizer consists of four major parts: +.IP a) +the table describing the optimization to be performed +.IP b) +a program to parse these tables and build input and output routines to +interface to the library and a dfa based routine to recognize patterns and +make the requested replacements. +.IP c) +common routines for the library that are independent of the table of a) +.IP d) +a stand alone version of the optimizer. +.LP +The library conforms to the +.I EM_CODE(3) +module interface but with routine names of the form +.BI C_ xxx +replaced by names like +.BI O_ xxx. +Furthermore there is also no routine +.I O_getid +and no variable +.I O_tmpdir +in the module. +The library module results in calls to the usual +.I EM_CODE(3) +module. It is possible to write a front end so that it can call either the +normal +.I EM_CODE(3) +module or this new module by adding +.B +#define PEEPHOLE +.R +before the line +.B +#include +.R +This will map all calls to the routine +.BI C_ xxx +into a call to the routine +.BI O_ xxx. + +.LP +We shall now describe each of these major parts in some detail. + +.NH +The optimization table +.LP +The file +.I patterns +contains the patterns of EM instructions to be recognized by the optimizer +and the EM instructions to replace them. Each pattern may have an +optional restriction that must be satisfied before the replacement is made. +The syntax of the table will be described using extended BNF notation +used by +.I LLGen +where: +.DS +.I + [...] - are used to group items + | - is used to separate alternatives + ; - terminates a rule + ? - indicates item is optional + * - indicates item is repeated zero or more times + + - indicates item is repeated one or more times +.R +.DE +The format of each rule in the table is: +.DS +.I + rule : pattern global_restriction? ':' replacement + ; +.R +.DE +Each rule must be on a single line except that it may be broken after the +colon if the next line begins with a tab character. +The pattern has the syntax: +.DS +.I + pattern : [ EM_mnem [ local_restriction ]? ]+ + ; + EM-mnem : "An EM instruction mnemonic" + | 'lab' + ; +.R +.DE +and consists of a sequence of one or more EM instructions or +.I lab +which stands for a defined instruction label. Each EM-mnem may optionally be +followed by a local restriction on the argument of the mnemonic and take +one of the following forms depending on the type of the EM instruction it +follows: +.DS +.I + local_restriction : normal_restriction + | opt_arg_restriction + | ext_arg_restriction + ; +.R +.DE +A normal restriction is used after all types of EM instruction except for +those that allow an optional argument, (such as +.I adi +) or those involving external names, (such as +.I lae +) +and takes the form: +.DS +.I + normal_restriction : [ rel_op ]? expression + ; + rel_op : '==' + | '!=' + | '<=' + | '<' + | '>=' + | '>' + ; +.R +.DE +If the rel_op is missing, the equality +.I == +operator is assumed. The general form of expression is defined later but +basically it involves simple constants, references to EM_mnem arguments +that appear earlier in the pattern and expressions similar to those used +in C expressions. + +The form of the restriction after those EM instructions like +.I adi +whose arguments are optional takes the form: +.DS +.I + opt_arg_restriction : normal_restriction + | 'defined' + | 'undefined' + ; +.R +.DE +The +.I defined +and +.I undefined +indicate that the argument is present +or absent respectively. The normal restriction form implies that the +argument is present and satisfies the restriction. + +The form of the restriction after those EM instructions like +.I lae +whose arguments refer to external object take the form: +.DS +.I + ext_arg_restriction : patarg offset_part? + ; + offset_part : [ '+' | '-' ] expression + ; +.R +.DE +Such an argument has one of three forms: a offset with no name, an +offset form a name or an offset from a label. With no offset part +the restriction requires the argument to be identical to a previous +external argument. With an offset part it requires an identical name +part, (either empty, same name or same label) and supplies a relationship +among the offset parts. It is possible to refer to test for the same +external argument, the same name or to obtain the offset part of an external +argument using the +.I sameext +, +.I samenam +and +.I offset +functions given below. +.LP +The general form of an expression is: +.DS +.I + expression : expression binop expression + | unaryop expression + | '(' expression ')' + | bin_function '(' expression ',' expression ')' + | ext_function '(' patarg ',' patarg ')' + | 'offset' '(' patarg ')' + | patarg + | 'p' + | 'w2' + | 'w' + | INTEGER + ; +.R +.DE +.DS +.I + bin_function : 'sfit' + | 'ufit' + | 'samesign' + | 'rotate' + ; +.R +.DE +.DS +.I + ext_function : 'samenam' + | 'sameext' + ; + patarg : '$' INTEGER + ; + binop : "As for C language" + unaryop : "As for C language" +.R +.DE +The INTEGER in the +.I patarg +refers to the first, second, etc. argument in the pattern and it is +required to refer to a pattern that appears earlier in the pattern +The +.I w +and +.I p +refer to the word size and pointer size (in bytes) respectively. +The +.I w2 +refers to twice the word size. +The +various function test for: +.IP sfit 10 +the first argument fits as a signed value of +the number of bit specified by the second argument. +.IP ufit 10 +as for sfit but for unsigned values. +.IP samesign 10 +the first argument has the same sign as the second. +.IP rotate 10 +the value of the first argument rotated by the number of bit specified +by the second argument. +.IP samenam 10 +both arguments refer to externals and have either no name, the same name +or same label. +.IP sameext 10 +both arguments refer to the same external. +.IP offset 10 +the argument is an external and this yields it offset part. + +.LP +The global restriction takes the form: +.DS +.I + global_restriction : '?' expression + ; +.R +.DE +and is used to express restrictions that cannot be expressed as simple +restrictions on a single argument or are can be expressed in a more +readable fashion as a global restriction. An example of such a rule is: +.DS +.I + dup w ldl stf ? p==2*w : ldl $2 stf $3 ldl $2 lof $3 +.R +.DE +which says that this rule only applies if the pointer size is twice the +word size. + +.NH +Incompatibilities with Previous Optimizer +.LP +The current table format is not compatible with previous versions of the +peephole optimizer tables. In particular the previous table had no provision +for local restrictions and only the equivalent of the global restriction. +This meant that our +.I '?' +character that announces the presence of the optional global restriction was +not required. The previous optimizer performed a number of other tasks that +were unrelated to optimization that were possible because the old optimizer +read the EM code for a complete procedure at a time. This included tasks such +as register variable reference counting and moving the information regarding +the number of bytes of local storage required by a procedure from it +.I end +pseudo instruction to it's +.I pro +pseudo instruction. These tasks are no longer done by this module but have +been moved to other modules or programs in the pipeline. The register variable +reference counting is now performed by the front end. The reordering of +code, such as the moving of mes instructions and the local storage +requirements from the end to beginning of procedures, is now performed using +the insertpart mechanism in the +.I EM_CODE +(or +.I EM_OPT +) module. +The removal of dead code is performed by the global optimizer. +Various +.I ext_functions +available in the old tables are no longer available as they rely on +information that is not available to the current program. +These are the +.I notreg +and the +.I rom +functions. +The previous optimizer allowed the use of +.I LLP, +.I LEP, +.I SLP +and +.I SEP +in patterns. For example +.I LLP +stood for either +.I lol +if the pointer size was the same as the word size, or for +.I ldl +if the pointer size was twice the word size. +In the current optimizer it is necessary to include two patterns for each +such single pattern in the old table. For example for a pattern containing +.I LLP +there would be one pattern with +.I lol +and with a global restriction of the form +.I p=w +and another pattern with ldl and a global restriction of the form +.I p=2*w. + +.NH +The Parser +.LP +The program to parse the tables and build the pattern table dependent dfa +routines is built from the files: +.IP parser.h 15 +header file +.IP parser.g 15 +LLGen source file defining syntax of table +.IP syntax.l 15 +Lex sources file defining form of tokens in table. +.IP initlex.c 15 +Uses the data in the library +.I em_data.a +to initialize the lexical analyzer to recognize EM instruction mnemonics. +.IP outputdfa.c 15 +Routines to output the dfa when it has been constructed. It outputs the files +.I dfa.c +and +.I trans.c +.IP outcalls.c 15 +Routines to output the file +.I incalls.r +defined in the next section. +.IP findworst.c 15 +Routines to analyze patterns to find how to continue matching after a +successful replacement or failed match. + +.LP +The parser checks that the tables conform to the syntax outlined in the +previous section and also makes a number of semantic checks on their +validity. Further versions could make further checks such as looking for +cycles in the rules or checking that each replacement leaves the same +number of bytes on the stack as the pattern it replaces. The parser +builds an internal dfa representation of the rules by combining rules with +common prefixes. All local and global restrictions are combined into a single +test to be performed are a complete pattern has been detected in the input. +The idea is to build a structure so that each of the patterns can be matched +and then the corresponding tests made and the first that succeeds is replaced. +If two rules have the same pattern and both their tests also succeed the one +that appears first in the tables file will be done. Somewhat less obvious +is that if one pattern is a proper prefix of a longer pattern and its test +succeeds then the second pattern will not be checked for. + +A major task of the parser if to decide on the action to take when a rule has +been partially matched or when a pattern has been completely matched but its +test does not succeed. This requires a search of all patterns to see if any +part of the part matched could be part of some other pattern. for example +given the two patterns: +.DS +.I + loc adi w loc adi w : loc $1+$3 adi w + loc adi w loc sbi w : loc $1-$3 adi w +.R +.DE +If the first pattern fails after seeing the input: +.DS +.I + loc adi loc +.R +.DE +the parser will still need to check whether the second pattern matches. +This requires a decision on how to fix up any internal data structures in +the dfa matcher, such as moving some instructions from the pattern to the +output queue and moving the pattern along and then deciding what state +it should continue from. Similar decisions are requires after a pattern +has been replaced. For example if the replacement is empty it is necessary +to backup +.I n-1 +instructions where +.I n +is the length of the longest pattern in the tables. + +.NH +Structure of the Resulting Library + +.LP +The major data structures maintained by the library consist of three queues; +an +.I output +queue of instructions awaiting output, a +.I pattern +queue containing instructions that match the current prefix, and a +.I backup +queue of instructions that have been backed up over and need to be reparsed +for further pattern matches. +These three queues are maintained in a single fixed size buffer as explained +in more detail in the next section. +Also, after a successful match, a replacement queue is constructed. + + +.LP +If no errors are detected by the parser in the tables it output the following +files if they have changed from the existing version of the file: +.IP dfa.c 10 +this contains the dfa encoded into a number of arrays using the technique +of row displacement for compacted sparse matricies. Given an opcode and +the current state, the value of +.I OO_base[OO_state] +is consulted to obtain a pointer into the array +.I OO_checknext. +If this pointer in zero or the +.I check +field of the addressed structure does +not correspond to the curerent state then it is known there is no entry for +this opcode/state pair and the +.I OO_default +array is consulted instead. +If the check field does match then the +.I next +field contains the new state. +After each transition the array +.I OO_ftrans +is consulted to see if this state corresponds to a final state +(i.e. a complete pattern) and if so the corresponding function is called. +.IP trans.c 10 +this contains external declarations of transition routines with names like +.B OO_xxxdotrans +(where +.I xxx +is a small integer). +These are called when there a transition to state +.I xxx +that corresponds to a +complete pattern. Any tests are performed if necessary to confirm that the +pattern matches and then the replacement instructions are placed on the +output queue and the routine +.I OO_mkrepl +is called to make the replacement and if backup the amount required. +If there are a number of patterns with the same instructions but different +tests, these will all appear in the same routine and the tests performed in +the order they appear in the original +.I patterns +file. +.IP incalls.r 10 +this contains an entry for every EM instruction (plus +.I lab +) giving information on how to build a routine with the name +.BI O_ xxx +for the library version of the module. +If the EM instruction does not appear in the tables +patterns at all then the dfa routine is called to flush any current queued +output and the the output +.BI C_ xxx +routine is called. If the EM instruction does appear in a pattern then the +instruction data structure fields are +initialized and it is added onto the end of the pattern queue. +The dfa routines are then called to attempted to make a transition. +This file is input to the +.I awk +program +.I makefuns.awk. + +.LP +The following files contain code that is independent of the pattern tables: +.IP main.c 10 +this is used only in the stand alone version of the optimizer and consists +of code to open the input file, read the input using the +.I READ_EM(3) +module and call the dfa routines. This version does not require the routines +constructed from the incalls.r file described above. +.IP nopt.c 10 +general routines to initialize, and maintain the data structures. The file +handling routines +.I O_open +etc are defined here. Also defined are routines for flushing the output queue +by calling the +.I EM_mkcalls +routine from the +.I READ_EM(3) +module and moving instructions from the output to the backup queue. +Routines to free the strings stored in instructions +with types of +.I sof_ptyp, +.I pro_ptyp, +.I str_ptyp, +.I ico_ptyp, +.I uco_ptyp, +and +.I fco_ptyp are also defined. These strings are copied to a large array that +is extended by +.I Realloc +if it overflows. The strings can be thrown away on any flush that occurs when +the backup queue is empty. +.IP mkstrct.c 10 +contains routines to build the data structure from the input +.BI C_ xxx +routines and place the structure on the pattern queue. These routines are also +used to build the data structures when a replacement is constructed. +.IP aux.c 10 +routines to implement the external functions used in the pattern table. + +.LP +The following files are also used in building the module library: +.IP makefuns.awk 10 +this +.I awk +program is used to produce individual C files with names like +.BI O_ xxx.c +each containing a single function definition and then call the +.I cc +compiler to produce a single output file. +This enables the loader to only load those routines that are actually +needed when the library is loaded. +.IP pseudo.r 10 +this file is like the +.I incalls.r +file produced by the parser but is built by hand and handles the pseudo +EM instructions. It is also processed by +.I makefuns.awk. + +.NH +Miscellaneous Issues +.LP +The output, pattern and backup queues are maintained in fixed length array, +.I OO_buffer +allocated of size +.I MAXBUFFER +(a constant declared in nopt.h) at run time. +It consists of an array of the +.I e_instr +data structure used by the +.I READ_EM(3) +module. +At any time the pointers +.I OO_patternqueue +and +.I OO_nxtpatt +point to the beginning and end of the current pattern prefix that corresponds +to the current state. Any instructions on the backup queue are between +.I OO_nxtpatt +and +.I OO_endbackup. +If there are no instructions on the backup queue then +.I OO_endbackup +will be 0 (zero). +The size of the replacement queue is set to the length of the maximum +replacement length by the tables output by the parser. + +.LP +The fixed size of the buffer causes no difficulty in +practice and can only result in some potential optimizations being missed. +When space for a new instruction is required and the buffer is full the +routine +.I OO_halfflush +is called to flush half the buffer and move all the data structures left. +It should be noted that it is not possible to statically determine the +maximum possible size for these queues as they need to be unbounded in +the worst case. +A study of the rule +.DS +.I + inc dec : +.R +.DE +with the input consisting of +.I N +.I inc +and then +.I N +.I dec +instructions requires an output queue length of +.I N-1 +to find all possible replacements. diff --git a/doc/occam/ctot b/doc/occam/ctot new file mode 100755 index 0000000..f4fe648 --- /dev/null +++ b/doc/occam/ctot @@ -0,0 +1,8 @@ +sed 's/^$/.sp 0.5/ +s/\\/\\e/g +s/^ $/.ft\ +.DE\ +.bp\ +.DS\ +.ft CW\ +.ta 0.65i 1.3i 1.95i 2.6i 3.25i 3.9i 4.55i 5.2i 5.85i 6.5i/' diff --git a/doc/occam/p0 b/doc/occam/p0 new file mode 100644 index 0000000..1055ec1 --- /dev/null +++ b/doc/occam/p0 @@ -0,0 +1,21 @@ +.pl 11.7i +.ND +.de PT +.if \\n%>0 .if e .tl '\fB%\fP''' +.if \\n%>1 .if o .tl '''\fB%\fP' +.. +.TL +An Occam Compiler +.AU +Kees Bot +Edwin Scheffer +.AI +Vrije Universiteit +Amsterdam, The Netherlands +.AB +This document describes the implementation of an \fBOccam\fP to \fBEM\fP +compiler. The lexical analysis is done using \fBLex\fP. +For the semantic analysis the extended LL(1) parser generator \fBLLgen\fP is +used. To handle the Occam-specific features as channels and parallelism some +library routines are required. +.AE diff --git a/doc/occam/p1 b/doc/occam/p1 new file mode 100644 index 0000000..1d45e80 --- /dev/null +++ b/doc/occam/p1 @@ -0,0 +1,87 @@ +.NH +Introduction +.PP +Occam [1] is a programming language which is based on the concepts of +concurrency and communication. These concepts enable today's applications of +microprocessors and computers to be implemented more effectively. +.PP +An Occam program consists of a (dynamically determined) number +of processes communicating through channels. +To communicate with the outside world some predefined channels are needed. +A channel has only one writer and one reader; it carries machine words and +bytes, at the reader/writer's discretion. The process with its communication +in Occam replaces the procedure with parameters in other languages (there are +no procedures in Occam). +.PP +In addition to the normal assignment statement, Occam has two more +information-transfer statements, the input and the output: +.DS +.ft CW + chan1 ? x -- reads a value from chan1 into x + chan2 ! x -- writes the value of x onto chan2 +.ft +.DE +Both the outputting and the inputting processes wait until the other is there. +Channels are declared and given names. Arrays of channels are possible. +.PP +Processes come in 5 varieties: sequential, parallel, alternative, +conditional and repetitive. A process starts with a reserved word telling +its nature, followed by an indented list of other processes. (Indentation +is used to indicate block structure.) It may be preceded by declarations. +The processes in a sequential/parallel process are executed sequentially/in +parallel. The processes in an alternative process have guards based on the +availability of input; the first to be ready is executed (this is waiting +for multiple input). The conditional and repetitive processes are normal +\fBIF\fPs and \fBWHILE\fPs. +.PP +\fIProducer-consumer example:\fP +.DS +.ft CW +.nf +CHAN buffer: -- declares the channel buffer +PAR + WHILE TRUE -- the producer + VAR x: -- a local variable + SEQ + produce(x) -- in some way + buffer ! x -- and send it + WHILE TRUE -- the consumer + VAR x: + SEQ + buffer ? x -- get a value + consume(x) -- in some way +.ft +.fi +.DE +.bp +.PP +Processes can be replicated from a given template; this combines +with arrays of variables and/or channels. +.PP +\fIExample: 20 window-sorters in series:\fP +.DS +.ft CW +.nf +CHAN s[20]: -- 20 channels +PAR i = [ 0 FOR 19 ] -- 19 processes + WHILE TRUE + VAR v1, v2: + SEQ + s[i] ? v1; v2 -- wait for 2 variables from s[i] + IF + v1 <= v2 -- ok + s[i+1] ! v1; v2 + v1 > v2 -- reorder + s[i+1] ! v2; v1 +.fi +.ft +.DE +.PP +A process may wait for a condition, which must include a comparison +with \fBNOW\fP, the present clock value. +.PP +Processes may be distributed over several processors; all processes +under a \fBVAR\fP declaration must run on the same processor. Concurrency can be +improved by avoiding \fBVAR\fP declarations, and replacing them by \fBCHAN\fP +declarations. Processes can be allocated explicitly on named processors and +channels can be connected to physical ports. diff --git a/doc/occam/p2 b/doc/occam/p2 new file mode 100644 index 0000000..b7003a6 --- /dev/null +++ b/doc/occam/p2 @@ -0,0 +1,151 @@ +.NH +The Compiler +.PP +The compiler is written in \fBC\fP using LLgen and Lex and compiles +Occam programs to EM code, using the procedural interface as defined for EM. +In the following sub-sections we describe the LLgen parser generator and +the aspect of indentation. +.NH 2 +The LLgen Parser Generator +.PP +LLgen accepts a Context Free syntax extended with the operators `\f(CW*\fP', `\f(CW?\fP' and `\f(CW+\fP' +that have effects similar to those in regular expressions. +The `\f(CW*\fP' is the closure set operator without an upperbound; `\f(CW+\fP' is the positive +closure operator without an upperbound; `\f(CW?\fP' is the optional operator; +`\f(CW[\fP' and `\f(CW]\fP' can be used for grouping. +For example, a comma-separated list of expressions can be described as: +.DS +.ft CW + expression_list: + expression [ ',' expression ]* + ; +.ft +.DE +.LP +Alternatives must be separated by `\f(CW|\fP'. +C code (``actions'') can be inserted at all points between the colon and the +semicolon. +Variables global to the complete rule can be declared just in front of the +colon enclosed in the brackets `\f(CW{\fP' and `\f(CW}\fP'. All other declarations are local to +their actions. +Nonterminals can have parameters to pass information. +A more mature version of the above example would be: +.DS +.ft CW + expression_list(expr *e;) { expr e1, e2; } : + expression(&e1) + [ ',' expression(&e2) + { e1=append(e1, e2); } + ]* + { *e=e1; } + ; +.ft +.DE +As LLgen generates a recursive-descent parser with no backtrack, it must at all +times be able to determine what to do, based on the current input symbol. +Unfortunately, this cannot be done for all grammars. Two kinds of conflicts +are possible, viz. the \fBalternation\fP and \fBrepetition\fP conflict. +An alternation confict arises if two sides of an alternation can start with the +same symbol. E.g. +.DS +.ft CW + plus: '+' | '+' ; +.ft +.DE +The parser doesn't know which `\f(CW+\fP' to choose (neither do we). +Such a conflict can be resolved by putting an \fBif-condition\fP in front of +the first conflicting production. It consists of a \fB``%if''\fP followed by a +C-expression between parentheses. +If a conflict occurs (and only if it does) the C-expression is evaluated and +parsing continues along this path if non-zero. Example: +.DS +.ft CW + plus: + %if (some_plusses_are_more_equal_than_others()) + '+' + | + '+' + ; +.ft +.DE +A repetition conflict arises when the parser cannot decide whether +``\f(CWproductionrule\fP'' in e.g. ``\f(CW[ productionrule ]*\fP'' must be chosen +once more, or that it should continue. +This kind of conflicts can be resolved by putting a \fBwhile-condition\fP right +after the opening parentheses. It consists of a \fB``%while''\fP +followed by a C-expression between parentheses. As an example, we can look at +the \fBcomma-expression\fP in C. The comma may only be used for the +comma-expression if the total expression is not part of another comma-separated +list: +.DS +.nf +.ft CW + comma_expression: + sub_expression + [ %while (not_part_of_comma_separated_list()) + ',' sub_expression + ]* + ; +.ft +.fi +.DE +Again, the \fB``%while''\fP is only used in case of a conflict. +.LP +Error recovery is done almost completely automatically. All the LLgen-user has to do +is write a routine called \fILLmessage\fP to give the necessary error +messages and supply information about terminals found missing. +.NH 2 +Indentation +.PP +The way conflicts can be resolved are of great use to Occam. The use of +indentation, to group statements, leads to many conflicts because the spaces +used for indentation are just token separators to the lexical analyzer, i.e. +``white space''. The lexical analyzer can be instructed to generate `BEGIN' and +`END' tokens at each indentation change, but that leads to great difficulties +as expressions may occupy several lines, thus leading to indentation changes +at the strangest moments. So we decided to resolve the conflicts by looking +at the indentation ourselves. The lexical analyzer puts the current indentation +level in the global variable \fIind\fP for use by the parser. The best example +is the \fBSEQ\fP construct, which exists in two flavors, one with a replicator +and one process: +.DS +.nf +.ft CW + seq i = [ 1 for str[byte 0] ] + out ! str[byte i] +.ft +.fi +.DE +and one without a replicator and several processes: +.DS +.nf +.ft CW + seq + in ? c + out ! c +.ft +.fi +.DE +The LLgen skeleton grammar to handle these two is: +.DS +.nf +.ft CW + SEQ { line=yylineno; oind=ind; } + [ %if (line==yylineno) + replicator + process + | + [ %while (ind>oind) process ]* + ] +.ft +.fi +.DE +This shows clearly that, a replicator must be on the same line as the \fBSEQ\fP, +and new processes are collected as long as the indentation level of each process +is greater than the indentation level of \fBSEQ\fP (with appropriate checks on this +identation). +.PP +Different indentation styles are accepted, as long as the same amount of spaces +is used for each indentation shift. The ascii tab character sets the indentation +level to an eight space boundary. The first indentation level found in a file +is used to compare all other indentation levels to. diff --git a/doc/occam/p3 b/doc/occam/p3 new file mode 100644 index 0000000..fca586d --- /dev/null +++ b/doc/occam/p3 @@ -0,0 +1,337 @@ +.NH +Implementation +.PP +It is now time to describe the implementation of some of the occam-specific +features such as channels and \fBNOW\fP. Also the way communication with +UNIX\(dg is performed must be described. +.FS +\(dg UNIX is a trademark of Bell Laboratories +.FE +For a thorough description of the library routines to simulate parallelism, +which are e.g. used by the channel routines and by the \fBPAR\fP construct +in Appendix B, see [6]. +.NH 2 +Channels +.PP +There are currently two types of channels (see Figure 1.) indicated by the type +field of a channel variable: +.IP - +An interprocess communication channel with two additional fields: +.RS +.IP - +A synchronization field to hold the state of an interprocess communication +channel. +.IP - +An integer variable to hold the value to be send. +.RE +.IP - +An outside world communication channel. This is a member of an array of +channels connected to UNIX files. Its additional fields are: +.RS +.IP - +A flags field holding a readahead flag and a flag that tells if this channel +variable is currently connected to a file. +.IP - +A preread character, if readahead is done. +.IP - +An index field to find the corresponding UNIX file. +.RE +.LP +.PS +box ht 3.0 wid 3.0 +box ht 0.75 wid 0.75 with .nw at 1st box.nw + (0.5, -0.5) "Process 1" +box ht 0.75 wid 0.75 with .ne at 1st box.ne + (-0.5, -0.5) "Process 2" +box ht 0.75 wid 0.75 with .sw at 1st box.sw + (0.5, 0.5) "Process 3" +box ht 0.75 wid 0.75 with .se at 1st box.se + (-0.5, 0.5) "Process 4" +line right from 5/12 <2nd box.ne, 2nd box.se> to 5/12 <3nd box.nw, 3nd box.sw> +line right from 7/12 <2nd box.ne, 2nd box.se> to 7/12 <3nd box.nw, 3nd box.sw> +line right from 5/12 <4th box.ne, 4th box.se> to 5/12 <5nd box.nw, 5nd box.sw> +line right from 7/12 <4th box.ne, 4th box.se> to 7/12 <5nd box.nw, 5nd box.sw> +line down from 5/12 <2nd box.sw, 2nd box.se> to 5/12 <4nd box.nw, 4nd box.ne> +line down from 7/12 <2nd box.sw, 2nd box.se> to 7/12 <4nd box.nw, 4nd box.ne> +line down from 5/12 <3rd box.sw, 3rd box.se> to 5/12 <5nd box.nw, 5nd box.ne> +line down from 7/12 <3rd box.sw, 3rd box.se> to 7/12 <5nd box.nw, 5nd box.ne> +line right 1.0 from 5/12 <5th box.ne, 5th box.se> +line right 1.0 from 7/12 <5th box.ne, 5th box.se> +line left 1.0 from 5/12 <2nd box.nw, 2nd box.sw> +line left 1.0 from 7/12 <2nd box.nw, 2nd box.sw> +.PE +.DS C +\fIFigure 1. Interprocess and outside world communication channels\fP +.DE +The basic channel handling is done by \f(CWchan_in\fP and \f(CWchan_out\fP. All +other routines are based on them. The routine \f(CWchan_any\fP only checks if +there's a value available on a given channel. (It does not read this value!) +\f(CWC_init\fP initializes an array of interprocess communication channels. +.LP +The following table shows Occam statements paired with the routines used to +execute them. +.TS H +center, box; +c | c | c +lf5 | lf5 | lf5. +Occam statement Channel handling routine Called as += +.sp 0.5 +.TH +T{ +.nf +CHAN c: +CHAN c[z]: +.fi +T} T{ +.nf +c_init(c, z) +chan *c; unsigned z; +.fi +T} T{ +.nf +c_init(&c, 1); +c_init(&c, z); +.fi +T} +.sp 0.5 +_ +.sp 0.5 +T{ +.nf +c ? v +.fi +T} T{ +.nf +chan_in(v, c) +long *v; chan *c; +.fi +T} T{ +.nf +chan_in(&v, &c); +.fi +T} +.sp 0.5 +T{ +.nf +c ? b[byte i] +.fi +T} T{ +.nf +cbyte_in(b, c) +char *b; chan *c; +.fi +T} T{ +.nf +cbyte_in(&b[i], &c); +.fi +T} +.sp 0.5 +T{ +.nf +c ? a[i for z] +.fi +T} T{ +.nf +c_wa_in(a, z, c) +long *a; unsigned z; chan *c; +.fi +T} T{ +.nf +c_wa_in(&a[i], z, &c); +.fi +T} +.sp 0.5 +T{ +.nf +c ? a[byte i for z] +.fi +T} T{ +.nf +c_ba_in(a, z, c) +long *a; unsigned z; chan *c; +.fi +T} T{ +.nf +c_ba_in(&a[i], z, &c); +.fi +T} +.sp 0.5 +_ +.sp 0.5 +T{ +.nf +c ! v +.fi +T} T{ +.nf +chan_out(v, c) +long *v; chan *c; +.fi +T} T{ +.nf +chan_out(&v, &c); +.fi +T} +.sp 0.5 +T{ +.nf +c ! a[i for z] +.fi +T} T{ +.nf +c_wa_out(a, z, c) +long *a; unsigned z; chan *c; +.fi +T} T{ +.nf +c_wa_out(&a[i], z, &c); +.fi +T} +.sp 0.5 +T{ +.nf +c ! a[byte i for z] +.fi +T} T{ +.nf +c_ba_out(a, z, c) +long *a; unsigned z; chan *c; +.fi +T} T{ +.nf +c_ba_out(&a[i], z, &c); +.fi +T} +.sp 0.5 +_ +.sp 0.5 +T{ +.nf +alt + c ? .... + .... +.fi +T} T{ +.nf +int chan_any(c) +chan *c; +.fi +T} T{ +.nf +deadlock=0; +for(;;) { + if (chan_any(&c)) { + .... + .... +.fi +T} +.sp 0.5 +.TE +The code of \f(CWc_init\fP, \f(CWchan_in\fP, \f(CWchan_out\fP and \f(CWchan_any\fP +can be found in Appendix A. +.NH 3 +Synchronization on interprocess communication channels +.PP +The synchronization field can hold three different values indicating the +state the channel is in: +.IP "- \fBC\(ulS\(ulFREE\fP:" 15 +Ground state, channel not in use. +.IP "- \fBC\(ulS\(ulANY\fP:" 15 +Channel holds a value, the sending process is waiting for an acknowledgement +about its receipt. +.IP "- \fBC\(ulS\(ulACK\fP:" 15 +Channel data has been removed by a receiving process, the sending process can +set the channel free now. +.LP +A sending process cannot simply wait until the channel changes state C\(ulS\(ulANY +to state C\(ulS\(ulFREE before it continues. There is a third state needed to prevent +a third process from using the channel before our sending process is +acknowledged. Note, however that it is not allowed to use a channel for input +or output in more than one parallel process. This is too difficult to check +in practice, so we tried to smooth it a little. +.NH 2 +NOW +.PP +\fBNOW\fP evaluates to the current time returned by the time(2) system call. +The code is simply: +.DS +.ft CW +.nf + long now() + { + deadlock=0; + return time((long *) 0); + } +.fi +.ft +.DE +The ``deadlock=0'' prevents deadlocks while using the clock. +.NH 2 +UNIX interface +.PP +To handle the communication with the outside world the following channels are +defined: +.IP - +\fBinput\fP, that corresponds with the standard input file, +.IP - +\fBoutput\fP, that corresponds with the standard output file, +.IP - +\fBerror\fP, that corresponds with the standard error file. +.IP - +\fBfile\fP, an array of channels that can be subscripted with an index +obtained by the builtin named process ``\f(CWopen\fP''. Note that +\fBinput\fP=\fBfile\fP[0], \fBoutput\fP=\fBfile\fP[1] and +\fBerror\fP=\fBfile\fP[2]. +.LP +Builtin named processes to open and close files are defined as +.DS +.nf +.ft CW +proc open(var index, value name[], mode[]) = ..... : +proc close(value index) = ..... : +.fi +.ft +.DE +To open a file `junk', write nonsense onto it, and close it, goes as follows: +.DS +.ft CW +.nf + var i: + seq + open(i, "junk", "w") + file[i] ! nonsense + close(i) +.fi +.ft +.DE +Errors opening a file are reported by a negative index, which is the +negative value of the error number (called \fIerrno\fP in UNIX). +.LP +Bytes read from or written onto these channels are taken from occam variables. +As these variables can hold more than 256 values, some negative values are used +to control channels. These values are: +.IP "- \fBEOF\fP" 9 +(-1): Eof from file channel is read as -1. +.IP "- \fBTEXT\fP" 9 +(-2): A -2 written onto any channel connected to a terminal puts this +terminal in the normal line oriented mode (i.e. characters typed are echoed +and lines are buffered before they are read). +.IP "- \fBRAW\fP" 9 +(-3): A -3 written onto any channel connected to a terminal puts it in raw mode +(i.e. no echoing of typed characters and no line buffering). +.LP +To exit an Occam program, e.g. after an error, a builtin named process +\f(CWexit\fP is available that takes an exit code as its argument. +.NH 2 +Replicators and slices +.PP +Both the base and the count of replicators like in +.DS +.ft CW + par i = [ base for count ] +.ft +.DE +may be arbitrary expressions. The count in array slices like in +.DS +.ft CW + c ? A[ base for count ] +.ft +.DE +must be a constant expression however, the base is again free. diff --git a/doc/occam/p4 b/doc/occam/p4 new file mode 100644 index 0000000..afa9ec1 --- /dev/null +++ b/doc/occam/p4 @@ -0,0 +1,42 @@ +.NH +Particular details +.NH 2 +Lower case/Upper case +.PP +Keywords must be either fully written in lower case or in upper case, thus +\fBPAR\fP is equivalent to \fBpar\fP but \fBPar\fP is not a keyword. Identifiers +may be of mixed case. Different styles are used in our examples just to indicate +what's accepted by the compiler. +.NH 2 +File inclusion +.PP +The C preprocessor is applied to the input file before +compilation, so that files containing useful \fBPROC\fP and \fBDEF\fP +declarations can be used in the program by using the \fB#include\fP-directive +of the preprocessor. +.NH 2 +Substitution +.PP +Named processes are not textually substituted. A procedure call is used instead. +The semantics of occam substitution imply this by letting a global variable +(i.e. not declared inside the named process' body) be found where the named +process is defined and not where it is substituted. +.NH 2 +ANY +.PP +According to the occam syntax the \fBANY\fP keyword may be the only argument of +an input or output process. Thus, +.DS +.ft CW + c ? ANY; x +.ft +.DE +is not allowed. Because it was easy to add, and it was used by some programs, +our compiler allows it. (If portability is an issue, usage of this feature +is not advisable). +.NH 2 +Configuration +.PP +The special configuration keywords like \fBPLACED\fP, \fBALLOCATE\fP, \fBPORT\fP +and \fBLOAD\fP are not implemented. Only \fBPRI\fP works because \fBPAR\fP and +\fBALT\fP work the same without it. diff --git a/doc/occam/p5 b/doc/occam/p5 new file mode 100644 index 0000000..1dc98e0 --- /dev/null +++ b/doc/occam/p5 @@ -0,0 +1,18 @@ +.NH +Conclusions +.PP +Writing the compiler was very straightforward using the LLgen parser generator. +Its extended grammar and its way of conflict resolving were of great use to us, +especially +the indentation handling could be implemented quite easily. The automatic +error recovery given by LLgen took a great weight of our shoulders. +.PP +A set of parallelism simulation routines makes implementing \fBPAR\fP constructs +very simple. And we consider it a necessity to have such a layer to shield the +compiler writer from these details. +.PP +The translation to EM code was fairly direct, no great tricks were needed to +make things work. Only the different sizes of words and pointers that are given +as parameters to the compiler must be carefully watched. Variables or pointers +must sometimes be handled with double word instructions for different word or +pointer sizes. diff --git a/doc/occam/p6 b/doc/occam/p6 new file mode 100644 index 0000000..2ce3d9d --- /dev/null +++ b/doc/occam/p6 @@ -0,0 +1,5 @@ +.NH +Acknowledgement +.PP +We want to thank Dick Grune for his description of Occam which is used +in the introduction. diff --git a/doc/occam/p7 b/doc/occam/p7 new file mode 100644 index 0000000..c9397d1 --- /dev/null +++ b/doc/occam/p7 @@ -0,0 +1,23 @@ +.bp +.NH +References +.LP +.IP [1] +INMOS limited, \fIOCCAM Programming manual\fP, Prentice-Hall, 1984. +.IP [2] +C. J. H. Jacobs, \fISome Topics in Parser Generation\fP, +Informatica Rapport IR-105, Vrije Universiteit, Amsterdam, October 1985. +.IP [3] +B. W. Kernighan and D. M. Ritchie, \fIThe C Programming Language\fP, +Prentice-Hall, 1978. +.IP [4] +M. E. Lesk, \fILex - A Lexical Analyser Generator\fP, Comp. Sci. Tech. Rep. +No. 39, Bell Laboratories, Murrey Hill, New Jersey, October 1975. +.IP [5] +A. S. Tanenbaum, H. van Staveren, E. G. Keizer, J. W. Stevenson, +\fIDescription of a Machine Architecture for use with Block Structured +Languages\fP, Informatica Rapport IR-81, Vrije Universiteit, Amsterdam, 1983. +.IP [6] +K. Bot and E. Scheffer, \fIA set of multi-process primitives for stack based +machines\fP, Vrije Universiteit, Amsterdam, 1986. +.LP diff --git a/doc/occam/p8 b/doc/occam/p8 new file mode 100644 index 0000000..71e622b --- /dev/null +++ b/doc/occam/p8 @@ -0,0 +1,16 @@ +.bp +.NH +Appendix A: Implementation of the channel routines +.DS L +.ft CW +.ta 0.65i 1.3i 1.95i 2.6i 3.25i 3.9i 4.55i 5.2i 5.85i 6.5i +.so channel.h.t +.ft +.DE +.bp +.DS L +.ft CW +.ta 0.65i 1.3i 1.95i 2.6i 3.25i 3.9i 4.55i 5.2i 5.85i 6.5i +.so channel.c.t +.ft +.DE diff --git a/doc/occam/p9 b/doc/occam/p9 new file mode 100644 index 0000000..3096fe1 --- /dev/null +++ b/doc/occam/p9 @@ -0,0 +1,60 @@ +.bp +.NH +Appendix B: Translation of a \fBPAR\fP construct to EM code using the library +routines to simulate parallelism +.PP +Translation of the parallel construct: +.DS +.ft CW + par + P0 + par i = [ 1 for n ] + P(i) +.DE +is +.TS +center; +lf5 lf5. + lal -20 ; Assume 20 bytes of local variables at this moment + cal $parbegin ; Set up a process group + asp 4 ; Assume pointersize = 4 + cal $parfork ; Split stack in two from local -20 + lfr 4 ; Assume wordsize = 4 + zne *23 ; One end jumps to second process, other continues here + lor 0 ; Static link + cal $P0 + asp 4 + bra *24 ; Jump to the outer parend +23 + cal $parfork ; Fork off `par i = ...' process + lfr 4 + zne *25 ; One end jumps to end of outer par + lal -20 ; Place break just above i + cal $parbegin ; Set up another process group for the P(i) + loc 1 + stl -24 ; i:=1 + lol n ; Assume n can be addressed this simply + stl -28 ; A nameless counter + bra *26 ; Branch to counter test +27 + cal $parfork ; Fork off one P(i) + lfr 4 + zne *28 ; One jumps away to increment i, the other calls P(i) + lol -24 + lor 0 + cal $P + asp 8 + bra *29 +28 + inl -24 ; i:=i+1 + del -28 ; counter:=counter-1 +26 + lol -28 + zgt *27 ; while counter>0 repeat loop +29 + cal $parend ; Wait for the P(i) to finish, then delete group + bra *24 ; Jump to the higher up meeting place with P0 +25 ; Note that the bra will be optimized away +24 + cal $parend ; Wait for both processes to end, then delete group +.TE diff --git a/doc/occam/proto.make b/doc/occam/proto.make new file mode 100644 index 0000000..03adb64 --- /dev/null +++ b/doc/occam/proto.make @@ -0,0 +1,32 @@ +# $Id: proto.make,v 1.2 1994/06/24 10:04:41 ceriel Exp $ + +#PARAMS do not remove this line! + +SRC_DIR = $(SRC_HOME)/doc/occam + +FILES= $(SRC_DIR)/p0 \ + $(SRC_DIR)/p1 \ + $(SRC_DIR)/p2 \ + $(SRC_DIR)/p3 \ + $(SRC_DIR)/p4 \ + $(SRC_DIR)/p5 \ + $(SRC_DIR)/p6 \ + $(SRC_DIR)/p7 \ + $(SRC_DIR)/p8 \ + $(SRC_DIR)/p9 + +PIC=pic +EQN=eqn +TBL=tbl + +$(TARGET_HOME)/doc/occam.doc: $(FILES) channel.h.t channel.c.t + soelim $(FILES) | $(PIC) | $(TBL) | $(EQN) > $@ + +channel.h.t: $(SRC_HOME)/h/ocm_chan.h + $(SRC_DIR)/ctot <$(SRC_HOME)/h/ocm_chan.h >channel.h.t + +channel.c.t: channel.c + $(SRC_DIR)/ctot channel.c.t + +channel.c: $(SRC_HOME)/lang/occam/lib/tail_ocm.a + $(UTIL_HOME)/bin/arch x $(SRC_HOME)/lang/occam/lib/tail_ocm.a channel.c diff --git a/doc/pascal/ab+intro.doc b/doc/pascal/ab+intro.doc new file mode 100644 index 0000000..bd99d00 --- /dev/null +++ b/doc/pascal/ab+intro.doc @@ -0,0 +1,50 @@ +.TL +The ACK Pascal Compiler +.AU +Aad Geudeke +Frans Hofmeester +.AI +Dept. of Mathematics and Computer Science +Vrije Universiteit +Amsterdam, The Netherlands +.AB +This document describes the implementation of a Pascal to EM compiler. The +compiler is written in C. The lexical analysis is done using a hand-written +lexical analyzer. Semantic analysis makes use of the extended LL(1) parser +generator LLgen. Several EM utility modules are used in the compiler. +.AE +.sp 2 +.NH +Introduction + +.PP +.nh +The Pascal front end of the Amsterdam Compiler Kit (ACK) complies with the +requirements of the international standard published by the International +Organization for Standardization (ISO) [ISO]. An informal description, which +unfortunately is not conforming to the standard, of the programming language +Pascal is given in [JEN]. + +.PP +The main reason for rewriting the Pascal compiler was that the old Pascal +compiler was written in Pascal itself, and a disadvantage of it was its +lack of flexibility. The compiler did not meet the needs of the current +ACK-framework, which makes use of modern parsing techniques and utility +modules. In this framework it is, for example, possible to use a fast back +end. Such a back end translates directly to object code [ACK]. Our compiler is +written in C and it is designed similar to the current C and Modula-2 compiler +of ACK. + +.PP +Chapter 2 describes the basic structure of the compiler. Chapter 3 discusses +the code generation of the main Pascal constructs. Chapter 4 covers one of +the major components of Pascal, viz. the conformant array. In Chapter 5 the +various compiler options that can be used are enumerated. The extensions +to the standard and the deviations from the standard are listed in Chapter +6 and 7. Chapter 8 presents some ideas to improve the standard. Chapter 9 +gives a short overview of testing the compiler. The major differences +between the old and new compiler can be found in Chapter 10. Suggestions +to improve the compiler are described in Chapter 11. The appendices +contain the grammar of Pascal and the changes made to the ACK Pascal run time +library. A translation of a Pascal program to EM code as example is presented. +.bp diff --git a/doc/pascal/compar.doc b/doc/pascal/compar.doc new file mode 100644 index 0000000..e712435 --- /dev/null +++ b/doc/pascal/compar.doc @@ -0,0 +1,89 @@ +.sp 2 +.NH +Comparison with the Pascal-VU compiler +.nh + +.LP +In this chapter, the differences with the Pascal-VU compiler [IM2] are listed. +The points enumerated below can be used as improvements to the compiler (see +also Chapter 11). +.sp +.NH 2 +Deviations +.LP +.sp +- large labels +.in +3m +only labels in the closed interval 0..9999 are allowed, as opposed to the +Pascal-VU compiler. The Pascal-VU compiler allows every unsigned integer +as label. +.in -3m + +- goto +.in +3m +the new compiler conforms to the standard as opposed to the old one. The +following program, which contains an illegal jump to label 1, is accepted +by the Pascal-VU compiler. + +.nf +\fBprogram\fR illegal_goto(output); +\fBlabel\fR 1; +\fBvar\fR i : integer; +\fBbegin\fR + \fBgoto\fR 1; + \fBfor\fR i := 1 \fBto\fR 10 \fBdo\fR + \fBbegin\fR + 1 : writeln(i); + \fBend\fR; +\fBend\fR. +.fi + +This program is rejected by the new compiler. +.in -3m + +.NH 2 +Extensions +.LP +.sp +The extensions implemented by the Pascal-VU compiler are listed in +Chapter 5 of [IM2]. +.sp +- separate compilation +.ti +3m +the new compiler only accepts programs, not modules. + +- assertions +.ti +3m +not implemented. + +- additional procedures +.ti +3m +the procedures \fIhalt, mark\fR and \fIrelease\fR are not available. +.bp +- UNIX\(tm interfacing +.ti +3m +the \-c option is not implemented. +.FS +\(tm UNIX is a Trademark of Bell Laboratories. +.FE + +- double length integers +.ti +3m +integer size can be set with the \-V option, so the additional type \fIlong\fR +is not implemented. + + +.NH 2 +Compiler options +.LP +.sp +The options implemented by the Pascal-VU compiler are listed in +Chapter 7 of [IM2]. +.sp +The construction "{$....}" is not recognized. + +The options: \fIa, c, d, s\fR and \fIt\fR are not available. + +The \-l option has been changed into the \-L option. + +The size of reals can be set with the \-V option. diff --git a/doc/pascal/conf.doc b/doc/pascal/conf.doc new file mode 100644 index 0000000..ff85003 --- /dev/null +++ b/doc/pascal/conf.doc @@ -0,0 +1,88 @@ +.sp 1.5i +.nr H1 3 +.NH +Conformant Arrays +.nh +.LP +.sp +A fifth kind of parameter, besides the value, variable, procedure, and function +parameter, is the conformant array parameter (\fBISO 6.6.3.7\fR). This +parameter, undoubtedly the major addition to Pascal from the compiler writer's +point of view, has been implemented. With this kind of parameter, the required +bounds of the index-type of an actual parameter are not fixed, but are +restricted to a specified range of values. Two types of conformant array +parameters can be distinguished: variable conformant array parameters and +value conformant array parameters. +.sp +.NH 2 +Variable conformant array parameters +.LP +.sp +The treatment of variable conformant array parameters is comparable with the +normal variable parameter. +Both have in common that the parameter mechanism used is \fIcall by +reference\fR. +.br +An example is: +.br +.in +5m +to sort variable length arrays of integers, the following Pascal procedure could be used: + +.nf +\fBprocedure\fR bubblesort(\fBvar\fR A : \fBarray\fR[low..high : integer] \fBof\fR integer); +\fBvar\fR i, j : integer; +\fBbegin + for\fR j := high - 1 \fBdownto\fR low \fBdo + for\fR i := low \fBto\fR j \fBdo + if\fR A[i+1] < A[i] \fBthen\fI interchange A[i] and A[i+1] +\fBend\fR; +.fi +.in -5m + +For every actual parameter, the base address of the array is pushed on the +stack and for every index-type-specification, exactly one array descriptor +is pushed. +.sp +.NH 2 +Value conformant array parameters +.LP +.sp +The treatment of value conformant array parameters is more complex than its +variable counterpart. +.br +An example is: +.br +.in +5m +an unpacked array of characters could be printed as a string with the following program part: + +.nf +\fBprocedure\fR WriteAsString( A : \fBarray\fR[low..high : integer] \fBof\fR char); +\fBvar\fR i : integer; +\fBbegin + for\fR i := low \fBto\fR high \fBdo\fR write(A[i]); +\fBend\fR; +.fi +.in -5m + +The calling procedure pushes the base address of the actual parameter and +the array descriptors belonging to it on the stack. Subsequently the procedure +using the conformant array parameter is called. Because it is a \fIcall by +value\fR, the called procedure has to create a copy of the actual parameter. +This implies that the calling procedure knows how much space on the stack +must be reserved for the parameters. If the actual-parameter is a conformant +array, the called procedure keeps track of the size of the activation record. +Hence the restrictions on the use of value conformant array parameters, as +specified in \fBISO 6.6.3.7.2\fR, are dropped. + +A description of the EM code generated by the compiler is: + +.nf +.ft I +load the stack adjustment sofar +load base address of array parameter +compute the size in bytes of the array +add this size to the stack adjustment +copy the array +remember the new address of the array +.ft R +.fi diff --git a/doc/pascal/contents.doc b/doc/pascal/contents.doc new file mode 100644 index 0000000..f744c9e --- /dev/null +++ b/doc/pascal/contents.doc @@ -0,0 +1,41 @@ +.sp 1.5i +.ps 12 +.vs 14 +.ft B +Contents\fR\h'+108u'\h'+5i'Page + + +\h'+34u'1. Introduction \h'+34u'\h'+1.5i'1 + +\h'+34u'2. The compiler \h'+34u'\h'+1.5i'2 + +\h'+34u'3. Translation of Pascal to EM \h'+34u'\h'+1.5i'5 + +\h'+34u'4. Conformant arrays \h'+1.5i'10 + +\h'+34u'5. Compiler options \h'+1.5i'11 + +\h'+34u'6. Extensions to the standard \h'+1.5i'13 + +\h'+34u'7. Deviations from the standard \h'+1.5i'13 + +\h'+34u'8. Hints to change the standard \h'+1.5i'15 + +\h'+34u'9. Testing the compiler \h'+1.5i'16 + +10. Comparison with the old compiler \h'+1.5i'16 + +11. Improvements to the compiler \h'+1.5i'17 + +12. History & Acknowledgements \h'+1.5i'18 + +13. References \h'+1.5i'19 + + +\fBAppendices\fR + +\h'+16u'A. ISO-PASCAL Grammar \h'+1.5i'20 + +\h'+24u'B. Changes to run time library \h'+1.5i'26 + +\h'+20u'C. An example \h'+1.5i'28 diff --git a/doc/pascal/deviations.doc b/doc/pascal/deviations.doc new file mode 100644 index 0000000..53ee571 --- /dev/null +++ b/doc/pascal/deviations.doc @@ -0,0 +1,118 @@ +.sp 2 +.NH +Deviations from the standard +.nh + +.PP +The compiler deviates from the ISO 7185 standard with respect to the +following clauses: + +.IP "\fBISO 6.1.3:\fR" 14 +\h'-5u'Identifiers may be of any length and all characters of an identifier +shall be significant in distinguishing between them. +.sp +.in +3m +The constant IDFSIZE, defined in the file \fIidfsize.h\fR, determines +the (maximum) significant length of an identifier. It can be set at run +time with the \-M option (see also section on compiler options). +.in -3m +.sp +.IP "\fBISO 6.1.8:\fR" +\h'-5u'There shall be at least one separator between any pair of consecutive tokens +made up of identifiers, word-symbols, labels or unsigned-numbers. +.sp +.in +3m +A token separator is not needed when a number is followed by an identifier +or a word-symbol. For example the input sequence, 2\fBthen\fR, is recognized +as the integer 2 followed by the keyword \fBthen\fR. +.in -3m +.sp +.IP "\fBISO 6.2.1:\fR" +\h'-29u'The label-declaration-part shall specify all labels that prefix a statement +in the corresponding statement-part. +.sp +.ti +3m +The compiler generates a warning if a label is declared but never defined. +.bp +.IP "\fBISO 6.2.2:\fR" +\h'-9u'The scope of identifiers and labels should start at the beginning of the +block in which these identifiers or labels are declared. +.sp +.in +3m +The compiler, as most other one pass compilers deviates in this respect, +because the scope of variables and labels start at their defining-point. +.nf +.in +4m +\fBprogram\fR deviates\fB; +const\fR + x \fB=\fR 3\fB; +procedure\fR p\fB; +const\fR + y \fB=\fR x\fB;\fR + x \fB=\fR true\fB; +begin end; +begin +end.\fR +.in -4m +.fi + +In procedure p, the constant y has the integer value 3. This program does not +conform to the standard. In [SAL] a simple algorithm is described for +enforcing the scope rules, it involves numbering all scopes encoutered in the +program in order of their opening, and recording in each identifier table +entry the number of the latest scope in which it is used. + +Note: The compiler does not deviate from the standard in the following program: +.nf +.in +4m +\fBprogram\fR conforms\fB; +type\fR + x \fB=\fR real\fB; +procedure\fR p\fB; +type\fR + y \fB= ^\fRx\fB;\fR + x \fB=\fR boolean\fB; +var\fR + p \fB:\fR y\fB; +begin end; +begin +end.\fR +.in -4m +.fi + +In procedure p, the variable p is a pointer to boolean. +.fi +.in -3m +.sp +.IP "\fBISO 6.4.3.2:\fR" +The standard specifies that any ordinal type is allowed as index-type. +.sp +.in +3m +The required type \fIinteger\fR is not allowed as index-type, i.e. +.ti +2m +\fBARRAY [ \fIinteger\fB ] OF\fR +is not permitted. +.br +This could be implemented, but this might cause problems on machines with +a small memory. +.in -3m +.sp +.IP "\fBISO 6.4.3.3:\fR" +\h'-1u'The type possessed by the variant-selector, called the tag-type, must +be an ordinal type, so the integer type is permitted. The values denoted by +all case-constants shall be distinct and the set thereof shall be equal +to the set of values specified by the tag-type. +.sp +.in +3m +Because it is impracticable to enumerate all integers as case-constants, +the integer type is not permitted as tag-type. It would not make a great +difference to allow it as tagtype. +.in -3m +.sp +.IP "\fBISO 6.8.3.9:\fR" +The standard specifies that the control-variable of a for-statement is not +allowed to be modified while executing the loop. +.sp +.in +3m +Violation of this rule is not detected. An algorithm to implement this rule +can be found in [PCV]. diff --git a/doc/pascal/example.doc b/doc/pascal/example.doc new file mode 100644 index 0000000..f8350f0 --- /dev/null +++ b/doc/pascal/example.doc @@ -0,0 +1,92 @@ +.sp 1.5i +.ft B +Appendix C: An example +.ft R +.nh +.nf + + +\h'+10u' 1 \fBprogram\fR factorials(input, output); +\h'+10u' 2 { This program prints factorials } +\h'+10u' 3 +\h'+10u' 4 \fBconst\fR +\h'+10u' 5 FAC1 = 1; +\h'+10u' 6 \fBvar\fR +\h'+10u' 7 i : integer; +\h'+10u' 8 +\h'+10u' 9 \fBfunction\fR factorial(n : integer) : integer; +10 \fBbegin\fR +11 \fBif\fR n = FAC1 \fBthen\fR +12 factorial := FAC1 +13 \fBelse\fR +14 factorial := n * factorial(n-1); +15 \fBend\fR; +16 +17 \fBbegin\fR +18 write('Give a number : '); +19 readln(i); +20 \fBif\fR i < 1 \fBthen\fR +21 writeln('No factorial') +22 \fBelse\fR +23 writeln(factorial(i):1); +24 \fBend\fR. +.bp +.po +.DS + mes 2,4,4 loc 16 +\&.1 cal $_wrs + rom 'factorials.p\(rs000' asp 12 +i lin 19 + bss 4,0,0 lae input +output cal $_rdi + bss 540,0,0 asp 4 +input lfr 4 + bss 540,0,0 ste i + exp $factorial lae input + pro $factorial, ? cal $_rln + mes 9,4 asp 4 + lin 11 lin 20 + lol 0 loe i + loc 1 loc 1 + cmi 4 cmi 4 + teq tlt + zeq *1 zeq *1 + lin 12 lin 21 + loc 1 .4 + stl -4 rom 'No factorial' + bra *2 lae output +1 lae .4 + lin 14 loc 12 + lol 0 cal $_wrs + lol 0 asp 12 + loc 1 lae output + sbi 4 cal $_wln + cal $factorial asp 4 + asp 4 bra *2 + lfr 4 1 + mli 4 lin 23 + stl -4 lae output +2 loe i + lin 15 cal $factorial + mes 3,0,4,0,0 asp 4 + lol -4 lfr 4 + ret 4 loc 1 + end 4 cal $_wsi + exp $m_a_i_n asp 12 + pro $m_a_i_n, ? lae output + mes 9,0 cal $_wln + fil .1 asp 4 +\&.2 2 + con input, output lin 24 + lxl 0 loc 0 + lae .2 cal $_hlt + loc 2 end 0 + lxa 0 mes 4,24,'factorials.p\(rs000' + cal $_ini + asp 16 + lin 18 +\&.3 + rom 'Give a number : ' + lae output + lae .3 +.DE diff --git a/doc/pascal/extensions.doc b/doc/pascal/extensions.doc new file mode 100644 index 0000000..44febcc --- /dev/null +++ b/doc/pascal/extensions.doc @@ -0,0 +1,60 @@ +.pl 12i +.sp 1.5i +.NH +Extensions to Pascal as specified by ISO 7185 +.nh + +.IP "\fBISO 6.1.3:\fR" 14 +\h'-11u'The underscore is treated as a letter when the \-u option is turned +on (see also section 5.2). This is implemented to be compatible with +Pascal-VU and can be used in identifiers to increase readability. +.sp +.IP "\fBISO 6.1.4:\fR" +\h'-12u'The directive \fIextern\fR can be used in a procedure-declaration or +function-declaration to specify that the procedure-block or function-block +corresponding to that declaration is external to the program-block. This can +be used in conjunction with library routines. +.sp +.IP "\fBISO 6.1.9:\fR" +\h'-22u'An alternative representation for the following tokens and delimiting +characters is recognized: +.in +5m +.ft 5 +\fBtoken +.ft 5 +\& \fBalternative token +.ft 5 +.sp +^ +\& @ +.br +[ +\& (. +.br +] +\& .) + +.ft 5 +\fBdelimiting character +.ft 5 +\& \fBalternative delimiting pair of characters +.ft 5 +.sp +{ +\& (* +.br +} +\& *) +.in -5m +.sp +.IP "\fBISO 6.6.3.7.2:\fR" +\h'-1u'A conformant array parameter can be passed as value conformant array +parameter without the restrictions imposed by the standard. The compiler +gives a warning. This is implemented to keep the parameter mechanism orthogonal (see also Chapter 4). +.sp +.IP "\fBISO 6.9.3.1:\fR" +\h'-16u'If the value of the argument \fITotalWidth\fR of the required +procedure \fIwrite\fR is zero or negative, no characters are written for +character, string or boolean type arguments. If the value of the argument +\fIFracDigits\fR of the required procedure \fIwrite\fR is zero or negative, +the fraction and '.' character are suppressed for fixed-point arguments. diff --git a/doc/pascal/hints.doc b/doc/pascal/hints.doc new file mode 100644 index 0000000..a1c7fc1 --- /dev/null +++ b/doc/pascal/hints.doc @@ -0,0 +1,76 @@ +.sp 1.5i +.nr H1 7 +.NH +Hints to change the standard +.nh +.sp +.LP +We encoutered some difficulties when the compiler was developed. In this +chapter some hints are presented to change the standard, which would make +the implementation of the compiler less difficult. The semantics of Pascal +would not be altered by these adaptions. +.sp 2 +.LP +\- Some minor changes in the grammar of Pascal from the user's point of view, +but which make the writing of an LL(1) parser considerably easier, could be: +.in +3m +.nf +field-list : [ ( fixed-part [ variant-part ] | variant-part ) ] . +fixed-part : record-section \fB;\fR { record-section \fB;\fR } . +variant-part : \fBcase\fR variant-selector \fBof\fR variant \fB;\fR { variant \fB;\fR } . + +case-statement : \fBcase\fR case-index \fBof\fR case-list-element \fB;\fR { case-list-element \fB;\fR } \fBend\fR . +.fi +.in -3m + + +.LP +\- To ease the semantic checking on sets, the principle of qualified sets could +be used, every set-constructor must be preceeded by its type-identifier: +.nf +.ti +3m +set-constructor : type-identifier \fB[\fR [ member-designator { \fB,\fR member-designator } ] \fB]\fR . + +Example: + t1 = set of 1..5; + t2 = set of integer; + +The type of [3, 5] would be ambiguous, but the type of t1[3, 5] not. +.fi + + +.LP +\- Another problem arises from the fact that a function name can appear in +three distinct 'use' contexts: function call, assignment of function +result and as function parameter. +.br +Example: +.in +5m +.nf +\fBprogram\fR function_name; + +\fBfunction\fR p(x : integer; function y : integer) : integer; +\fBbegin\fR .. \fBend\fR; + +\fBfunction\fR f : integer; +\fBbegin\fR + f := p(f, f); (*) +\fBend\fR; + +\fBbegin\fR .. \fBend\fR. +.fi +.in -5m + +A possible solution in case of a call (also a procedure call) would be to +make the (possibly empty) actual-parameter-list mandatory. The assignment +of the function result could be changed in a \fIreturn\fR statement. +Though this would change the semantics of the program slightly. +.br +The above statement (*) would look like this: return p(f(), f); + + +.LP +\- Another extension to the standard could be the implementation of an +\fIotherwise\fR clause in a case-statement. This would behave exactly like +the \fIdefault\fR clause in a switch-statement in C. +.bp diff --git a/doc/pascal/his.doc b/doc/pascal/his.doc new file mode 100644 index 0000000..d4c64a2 --- /dev/null +++ b/doc/pascal/his.doc @@ -0,0 +1,36 @@ +.sp 2 +.NH +History & Acknowledgements +.nh +.sp 2 +.ft B +History +.ft R +.sp +.LP +The purpose of this project was to make a Pascal compiler which should satisfy +the conditions of the ISO standard. The task was considerably simplified, +because parts of the Modula-2 compiler were used. This gave the advantage of +increasing the uniformity of the compilers in ACK. +.br +While developing the compiler, a number of errors were detected in the Modula-2 +compiler, EM utility modules and the old Pascal compiler. + +.sp 2 +.ft B +Acknowledgements +.ft R +.sp +.LP +During the development of the compiler, valuable support was received from +a number of persons. In this regard we owe a debt of gratitude to +Fred van Beek, Casper Capel, Rob Dekker, Frank Engel, Jos\('e Gouweleeuw +and Sonja Keijzer (Jut and Jul !!), Herold Kroon, Martin van Nieuwkerk, +Sjaak Schouten, Eric Valk, and Didan Westra. +.br +Special thanks are reserved for Dick Grune, who introduced us to the field of +Compiler Design and who helped testing the compiler. Ceriel Jacobs, who +developed LLgen and the Modula-2 compiler of ACK. Finally we would like to +thank Erik Baalbergen, who had the supervision on this entire project and +gave us many valuable suggestions. +.bp diff --git a/doc/pascal/improv.doc b/doc/pascal/improv.doc new file mode 100644 index 0000000..3c15ee8 --- /dev/null +++ b/doc/pascal/improv.doc @@ -0,0 +1,87 @@ +.sp 2 +.NH +Improvements to the compiler +.nh +.sp +.LP +In consideration of portability, a restricted option could be implemented. +Under this option, the extensions and warnings should be considered as errors. + + +.LP +The restrictions imposed by the standard on the control variable of a +for-statment should be implemented (\fBISO 6.8.3.9\fR). + +.LP +To check whether a function returns a valid result, the following algorithm +could be used. When a function is entered a hidden temporary variable of +type boolean is created. This variable is initialized with the value false. +The variable is set to true, when an assignment to the function name occurs. +On exit of the function a test is performed on the variable. If the value +of the variable is false, a run-time error occurs. +.br +Note: The check has to be done run-time. + + +.LP +The \fIundefined value\fR should be implemented. A problem arises with +local variables, for which space on the stack is allocated. A possible +solution would be to generate code for the initialization of the local +variables with the undefined value at the beginning of a procedure or +function. +.br +The implementation for the global variables is easy, because \fBbss\fR +blocks are used. + + +.LP +Closely related to the last point is the generation of warnings when +variables are never used or assigned. This is not yet implemented. + + +.LP +The error messages could specify more details about the errors occurred, +if some additional testing is done. + +.bp +.LP +Every time the compiler detects sets with different base-types, a warning +is given. Sometimes this is superfluous. + +.nf +\fBprogram\fR sets(output); +\fBtype\fR + week = (sunday, monday, tuesday, wednesday, thursday, friday, saturday); + workweek = monday..friday; +\fBvar\fR + s : \fBset of\fR workweek; + day : week; +\fBbegin\fR + day := monday; + s := [day]; (* warning *) + day := saturday; + s := [day]; (* warning *) +\fBend\fR. +.fi +The new compiler gives two warnings, the first one is redundant. + + +.LP +A nasty point in the compiler is the way the procedures \fIread, readln, +write\fR and \fIwriteln\fR are handled (see also section 2.2). They have +been added to the grammar. This implies that they can not be redefined as +opposed to the other required procedures and functions. They should be +removed from the grammar altogether. This could imply that more semantic +checks have to be performed. + + +.LP +No effort is made to detect possible run-time errors during compilation. +.br +E.g. a : \fBarray\fR[1..10] \fBof\fI something\fR, and the array selection +a[11] would occur. + + +.LP +Some assistance to implement the improvements mentioned above, can be +obtained from [PCV]. diff --git a/doc/pascal/internal.doc b/doc/pascal/internal.doc new file mode 100644 index 0000000..d1a94e7 --- /dev/null +++ b/doc/pascal/internal.doc @@ -0,0 +1,342 @@ +.pl 12.5i +.sp 1.5i +.NH +The compiler + +.nh +.LP +The compiler can be divided roughly into four modules: + +\(bu lexical analysis +.br +\(bu syntax analysis +.br +\(bu semantic analysis +.br +\(bu code generation +.br + +The four modules are grouped into one pass. The activity of these modules +is interleaved during the pass. +.br +The lexical analyzer, some expression handling routines and various +datastructures from the Modula-2 compiler contributed to the project. +.sp 2 +.NH 2 +Lexical Analysis + +.LP +The first module of the compiler is the lexical analyzer. In this module, the +stream of input characters making up the source program is grouped into +\fItokens\fR, as defined in \fBISO 6.1\fR. The analyzer is hand-written, +because the lexical analyzer generator, which was at our disposal, +\fILex\fR [LEX], produces much slower analyzers. A character table, in the file +\fIchar.c\fR, is created using the program \fItab\fR which takes as input +the file \fIchar.tab\fR. In this table each character is placed into a +particular class. The classes, as defined in the file \fIclass.h\fR, +represent a set of tokens. The strategy of the analyzer is as follows: the +first character of a new token is used in a multiway branch to eliminate as +many candidate tokens as possible. Then the remaining characters of the token +are read. The constant INP_NPUSHBACK, defined in the file \fIinput.h\fR, +specifies the maximum number of characters the analyzer looks ahead. The +value has to be at least 3, to handle input sequences such as: +.br + 1e+4 (which is a real number) +.br + 1e+a (which is the integer 1, followed by the identifier "e", a plus, and the identifier "a") + +Another aspect of this module is the insertion and deletion of tokens +required by the parser for the recovery of syntactic errors (see also section +2.2). A generic input module [ACK] is used to avoid the burden of I/O. +.sp 2 +.NH 2 +Syntax Analysis + +.LP +The second module of the compiler is the parser, which is the central part of +the compiler. It invokes the routines of the other modules. The tokens obtained +from the lexical analyzer are grouped into grammatical phrases. These phrases +are stored as parse trees and handed over to the next part. The parser is +generated using \fILLgen\fR[LL], a tool for generating an efficient recursive +descent parser with no backtrack from an Extended Context Free Syntax. +.br +An error recovery mechanism is generated almost completely automatically. A +routine called \fILLmessage\fR had to be written, which gives the necessary +error messages and deals with the insertion and deletion of tokens. +The routine \fILLmessage\fR must accept one parameter, whose value is +a token number, zero or -1. A zero parameter indicates that the current token +(the one in the external variable \fILLsymb\fR) is deleted. +A -1 parameter indicates that the parser expected end of file, but did +not get it. The parser will then skip tokens until end of file is detected. +A parameter that is a token number (a positive parameter) indicates that +this token is to be inserted in front of the token currently in \fILLsymb\fR. +Also, care must be taken, that the token currently in \fILLsymb\fR is again +returned by the \fBnext\fR call to the lexical analyzer, with the proper +attributes. So, the lexical analyzer must have a facility to push back one +token. +.br +Calls to the two standard procedures \fIwrite\fR and \fIwriteln\fR can be +different from calls to other procedures. The syntax of a write-parameter +is different from the syntax of an actual-parameter. We decided to include +them, together with \fIread\fR and \fIreadln\fR, in the grammar. An alternate +solution would be to make the syntax of an actual-parameter identical to the +syntax of a write-parameter. Afterwards the parameter has to be checked to +see whether it is used properly or not. +.bp +As the parser is LL(1), it must always be able to determine what to do, +based on the last token read (\fILLsymb\fR). Unfortunately, this was not the +case with the grammar as specified in [ISO]. Two kinds of problems +appeared, viz. the \fBalternation\fR and \fBrepetition\fR conflict. +The examples given in the following paragraphs are taken from the grammar. + +.NH 3 +Alternation conflict + +.LP +An alternation conflict arises when the parser can not decide which +production to choose. +.br +\fBExample:\fR +.in +2m +.ft 5 +.nf +procedure-declaration : procedure-heading \fB';'\f5 directive | +.br +\h'\w'procedure-declaration : 'u'procedure-identification \fB';'\f5 procedure-block | +.br +\h'\w'procedure-declaration : 'u'procedure-heading \fB';'\f5 procedure-block ; +.br +procedure-heading : \fBprocedure\f5 identifier [ formal-parameter-list ]? ; +.br +procedure-identification : \fBprocedure\f5 procedure-identifier ; +.fi +.ft R +.in -2m + +A sentence that starts with the terminal \fBprocedure\fR is derived from the +three alternative productions. This conflict can be resolved in two ways: +adjusting the grammar, usually some rules are replaced by one rule and more +work has to be done in the semantic analysis; using the LLgen conflict +resolver, "\fB%if\fR (C-expression)", if the C-expression evaluates to +non-zero, the production in question is chosen, otherwise one of the +remaining rules is chosen. The grammar rules were rewritten to solve this +conflict. The new rules are given below. For more details see the file +\fIdeclar.g\fR. + +.in +2m +.ft 5 +.nf +procedure-declaration : procedure-heading \fB';'\f5 ( directive | procedure-block ) ; +.br +procedure-heading : \fBprocedure\f5 identifier [ formal-parameter-list ]? ; +.fi +.ft R +.in -2m + +A special case of an alternation conflict, which is common to many block +structured languages, is the \fI"dangling-else"\fR ambiguity. + +.in +2m +.ft 5 +.nf +if-statement : \fBif\f5 boolean-expression \fBthen\f5 statement [ else-part ]? ; +.br +else-part : \fBelse\f5 statement ; +.fi +.ft R +.in -2m + +The following statement that can be derived from the rules above is ambiguous: + +.ti +2m +\fBif\f5 boolean-expr-1 \fBthen\f5 \fBif\f5 boolean-expr-2 \fBthen\f5 statement-1 \fBelse\f5 statement-2 +.ft R + + +.ps 8 +.vs 7 +.PS +move right 1.1i +S: line down 0.5i +"if-statement" at S.start above +.ft B +"then" at S.end below +.ft R +move to S.start then down 0.25i +L: line left 0.5i then down 0.25i +box ht 0.33i wid 0.6i "boolean" "expression-1" +move to L.start then left 0.5i +L: line left 0.5i then down 0.25i +.ft B +"if" at L.end below +.ft R +move to L.start then right 0.5i +L: line right 0.5i then down 0.25i +"statement" at L.end below +move to L.end then down 0.10i +L: line down 0.25i dashed +"if-statement" at L.end below +move to L.end then down 0.10i +L: line down 0.5i +.ft B +"then" at L.end below +.ft R +move to L.start then down 0.25i +L: line left 0.5i then down 0.25i +box ht 0.33i wid 0.6i "boolean" "expression-2" +move to L.start then left 0.5i +L: line left 0.5i then down 0.25i +.ft B +"if" at L.end below +.ft R +move to L.start then right 0.5i +L: line right 0.5i then down 0.25i +box ht 0.33i wid 0.6i "statement-1" +move to L.start then right 0.5i +L: line right 0.5i then down 0.25i +.ft B +"else" at L.end below +.ft R +move to L.start then right 0.5i +L: line right 0.5i then down 0.25i +box ht 0.33i wid 0.6i "statement-2" +move to S.start +move right 3.5i +L: line down 0.5i +"if-statement" at L.start above +.ft B +"then" at L.end below +.ft R +move to L.start then down 0.25i +L: line left 0.5i then down 0.25i +box ht 0.33i wid 0.6i "boolean" "expression-1" +move to L.start then left 0.5i +L: line left 0.5i then down 0.25i +.ft B +"if" at L.end below +.ft R +move to L.start then right 0.5i +S: line right 0.5i then down 0.25i +"statement" at S.end below +move to S.start then right 0.5i +L: line right 0.5i then down 0.25i +.ft B +"else" at L.end below +.ft R +move to L.start then right 0.5i +L: line right 0.5i then down 0.25i +box ht 0.33i wid 0.6i "statement-2" +move to S.end then down 0.10i +L: line down 0.25i dashed +"if-statement" at L.end below +move to L.end then down 0.10i +L: line down 0.5i +.ft B +"then" at L.end below +.ft R +move to L.start then down 0.25i +L: line left 0.5i then down 0.25i +box ht 0.33i wid 0.6i "boolean" "expression-2" +move to L.start then left 0.5i +L: line left 0.5i then down 0.25i +.ft B +"if" at L.end below +.ft R +move to L.start then right 0.5i +L: line right 0.5i then down 0.25i +box ht 0.33i wid 0.6i "statement-1" +.PE +.ps +.vs +\h'615u'(a)\h'1339u'(b) +.sp +.ce +Two parse trees showing the \fIdangling-else\fR ambiguity +.sp 2 +According to the standard, \fBelse\fR is matched with the nearest preceding +unmatched \fBthen\fR, i.e. parse tree (a) is valid (\fBISO 6.8.3.4\fR). +This conflict is statically resolved in LLgen by using "\fB%prefer\fR", +which is equivalent in behaviour to "\fB%if\fR(1)". +.bp +.NH 3 +Repetition conflict + +.LP +A repetition conflict arises when the parser can not decide whether to choose +a production once more, or not. +.br +\fBExample:\fR +.in +2m +.ft 5 +.nf +field-list : [ ( fixed-part [ \fB';'\f5 variant-part ]? | variantpart ) [;]? ]? ; +.br +fixed-part : record-section [ \fB';'\f5 record-section ]* ; +.fi +.in -2m +.ft R + +When the parser sees the semicolon, it can not decide whether another +record-section or a variant-part follows. This conflict can be resolved in +two ways: adjusting the grammar or using the conflict resolver, +"\fB%while\fR (C-expression)". The grammar rules that deal with this conflict +were completely rewritten. For more details, the reader is referred to the +file \fIdeclar.g\fR. +.sp 2 +.NH 2 +Semantic Analysis + +.LP +The third module of the compiler is the checking of semantic conventions of +ISO-Pascal. To check the program being parsed, actions have been used in +LLgen. An action consists of several C-statements, enclosed in brackets +"{" and "}". In order to facilitate communication between the actions and +\fILLparse\fR, the parsing routines can be given C-like parameters and +local variables. An important part of the semantic analyzer is the symbol +table. This table stores all information concerning identifiers and their +definitions. Symbol-table lookup and hashing is done by a generic namelist +module [ACK]. The parser turns each program construction into a parse tree, +which is the major datastructure in the compiler. This parse tree is used +to exchange information between various routines. +.sp 2 +.NH 2 +Code Generation + +.LP +The final module in the compiler is that of code generation. The information +stored in the parse trees is used to generate the EM code [EM]. EM code is +generated with the help of a procedural EM-code interface [ACK]. The use of +static exchanges is not desired, since the fast back end can not cope with +static code exchanges, hence the EM pseudoinstruction \fBexc\fR is never +generated. +.br +Chapter 3 discusses the code generation in more detail. +.sp 2 +.NH 2 +Error Handling + +.LP +The first three modules have in common that they can detect errors in the +Pascal program being compiled. If this is the case, a proper message is given +and some action is performed. If code generation has to be aborted, an error +message is given, otherwise a warning is given. The constant MAXERR_LINE, +defined in the file \fIerrout.h\fR, specifies the maximum number of messages +given per line. This can be used to avoid long lists of error messages caused +by, for example, the omission of a ';'. Three kinds of errors can be +distinguished: the lexical error, the syntactic error, and the semantic error. +Examples of these errors are respectively, nested comments, an expression with +unbalanced parentheses, and the addition of two characters. +.sp 2 +.NH 2 +Memory Allocation and Garbage Collection + +.LP +The routines \fIst_alloc\fR and \fIst_free\fR provide a mechanism for +maintaining free lists of structures, whose first field is a pointer called +\fBnext\fR. This field is used to chain free structures together. Each +structure, suppose the tag of the structure is ST, has a free list pointed +by h_ST. Associated with this list are the operations: \fInew_ST()\fR, an +allocating mechanism which supplies the space for a new ST struct; and +\fIfree_ST()\fR, a garbage collecting mechanism which links the specified +structure into the free list. +.bp diff --git a/doc/pascal/options.doc b/doc/pascal/options.doc new file mode 100644 index 0000000..a278b5e --- /dev/null +++ b/doc/pascal/options.doc @@ -0,0 +1,166 @@ +.sp 1.5i +.NH +Compiler options +.nh +.PP +There are some options available to control the behaviour of the compiler. +Two types of options can be distinguished: compile-time options and +run-time options. +.sp +.NH 2 +Compile time options +.LP +.sp +There are some options that can be set when the compiler is installed. +Those options can be found in the file \fIParameters\fR. To set a parameter +just modify its definition in the file \fIParameters\fR. The shell script +in the file \fImake.hfiles\fR creates for each parameter a separate .h file. +This mechanism is derived from the C compiler in ACK. +.sp +\fBIDFSIZE\fR +.in +3m +The maximum number of characters that are significant in an identifier. This +value has to be at least the value of \fBMINIDFSIZE\fR, defined in the file +\fIoptions.c\fR. A compile-time check is included to see if the value of +\fBMINIDFSIZE\fR is legal. The compiler will not recognize some keywords +if \fBIDFSIZE\fR is too small. +.in -3m +.sp +\fBISTRSIZE\fR, \fBRSTRSIZE\fR +.in +3m +The lexical analyzer uses these two values for the allocation of memory needed +to store a string. \fBISTRSIZE\fR is the initial number of bytes allocated. +\fBRSTRSIZE\fR is the step size used for enlarging the memory needed. +.in -3m +.sp +\fBNUMSIZE\fR +.in +3m +The maximum length of a numeric constant recognized by the lexical analyzer. +It is an error if this length is exceeded. +.in -3m +.sp +\fBERROUT\fR, \fBMAXERR_LINE\fR +.in +3m +Used for error messages. \fBERROUT\fR defines the file on which the +messages are written. \fBMAXERR_LINE\fR is the maximum number of error +messages given per line. +.in -3m +.sp +\fBSZ_CHAR\fR, \fBAL_CHAR\fR, etc +.in +3m +The default values of the target machine sizes and alignments. The values +can be overruled with the \-V option. +.in -3m +.sp +\fBMAXSIZE\fR +.in +3m +This value must be set to the maximum of the values of the target machine +sizes. This parameter is used in overflow detection (see also section 3.2). +.in -3m +.sp +\fBDENSITY\fR +.in +3m +This parameter is used to decide what EM instruction has to be generated +for a case-statement. If the range of the index value is sparse, i.e. +.br +.ti +5m +(upperbound - lowerbound) / number_of_cases +.br +is more than some threshold (\fBDENSITY\fR) the \fBcsb\fR instruction is +chosen. If the range is dense a jump table is generated (\fBcsa\fR). This +uses more space. Reasonable values are 2, 3 or 4. +.br +Higher values might also be reasonable on machines, which have lots of +address space and memory (see also section 3.3.3). +.in -3m +.sp +\fBINP_READ_IN_ONE\fR +.in +3m +Used by the generic input module. It can either be defined or not defined. +Defining it has the effect that files will be read completely into memory +using only one read-system call. This should be used only on machines with +lots of memory. +.in -3m +.sp +.bp +\fBDEBUG\fR +.in +3m +.nf +If this parameter is defined some built-in compiler-debugging tools can be used: +.in +2m +\(bu only lexical analyzing is done, if the \-l option is given. +\(bu if the \-I option is turned on, the allocated number of structures is printed. +\(bu the routine debug can be used to print miscellaneous information. +\(bu the routine PrNode prints a tree of nodes. +\(bu the routine DumpType prints information about a type structure. +\(bu the macro DO_DEBUG(x,y) defined as ((x) && (y)) can be used to perform + several actions. +.in -2m +.in -3m +.sp +.NH 2 +Run time options +.LP +.sp +The run time options can be given in the command line when the compiler is +called. +.br +They all have the form: \- +.br +Depending on the option, a character string has to be specified. The following +options are currently available: +.sp +.IP \-\fBC\fR 18 +The lower case and upper case letters are treated different (\fBISO 6.1.1\fR). +.sp +.IP \-\fBu\fR +The character '_' is treated like a letter, so it is allowed to use the +underscore in identifiers. +.br +Note: identifiers starting with an underscore may cause problems, because +.br +\h'\w'Note: 'u'most identifiers in library routines start with an underscore. +.sp +.IP \-\fBn\fR +This option suppresses the generation of register messages. +.sp +.IP \-\fBr\fR +With this option rangechecks are generated where necessary. +.sp +.IP \-\fBL\fR +Do not generate EM \fBlin\fR and \fBfil\fR instructions. These instructions +are used only for profiling. +.sp +.IP \-\fBM\fR +Set the number of characters that are significant in an identifier to . +The maximum significant identifier length depends on the constant IDFSIZE, +defined in \fIidfsize.h\fR. +.sp +.IP \-\fBi\fR +With this flag the setsize for a set of integers can be changed. The number must +be the number of bits per set. Default value : (#bits in a word) \- 1 +.sp +.IP \-\fBw\fR +Suppress warning messages (see also section 2.5). +.sp +.IP \-\fBV\fR[[\fBw\fR|\fBi\fR|\fBf\fR|\fBp\fR|\fBS\fR][\fIsize\fR]?[\fI.alignment\fR]?]* +.br +Option to set the object sizes and alignments on the target machine +dynamically. The objects that can be manipulated are: +.br +\fBw\fR\h'\w'ifpS'u' word +.br +\fBi\fR\h'\w'wfpS'u' integer +.br +\fBf\fR\h'\w'wipS'u' float +.br +\fBp\fR\h'\w'wifS'u' pointer +.br +\fBS\fR\h'\w'wifp'u' structure +.br +In case of a structure, \fIsize\fR is discarded and the \fIalignment\fR is +the initial alignment of the structure. The effective alignment is the least +common multiple of \fIalignment\fR and the alignment of its members. This +option has been implemented so that the compiler can be used as cross +compiler. +.bp diff --git a/doc/pascal/proto.make b/doc/pascal/proto.make new file mode 100644 index 0000000..86bf8f1 --- /dev/null +++ b/doc/pascal/proto.make @@ -0,0 +1,28 @@ +# $Id: proto.make,v 1.2 1997/07/10 07:58:19 ceriel Exp $ + +#PARAMS do not remove this line! + +SRC_DIR = $(SRC_HOME)/doc/pascal + +PIC = pic + +SRC = \ + $(SRC_DIR)/ab+intro.doc \ + $(SRC_DIR)/internal.doc \ + $(SRC_DIR)/transpem.doc \ + $(SRC_DIR)/conf.doc \ + $(SRC_DIR)/options.doc \ + $(SRC_DIR)/extensions.doc \ + $(SRC_DIR)/deviations.doc \ + $(SRC_DIR)/hints.doc \ + $(SRC_DIR)/test.doc \ + $(SRC_DIR)/compar.doc \ + $(SRC_DIR)/improv.doc \ + $(SRC_DIR)/his.doc \ + $(SRC_DIR)/reference.doc \ + $(SRC_DIR)/syntax.doc \ + $(SRC_DIR)/rtl.doc \ + $(SRC_DIR)/example.doc + +$(TARGET_HOME)/doc/pascal.doc: $(SRC) + $(PIC) $(SRC) > $@ diff --git a/doc/pascal/reference.doc b/doc/pascal/reference.doc new file mode 100644 index 0000000..e99f16d --- /dev/null +++ b/doc/pascal/reference.doc @@ -0,0 +1,50 @@ +.ps 12 +.vs 14 +.NH +References +.sp +.nh +.IP [ISO] 8 +ISO 7185 Specification for Computer Programming Language Pascal, 1982, +Acornsoft ISO-PASCAL, 1984 +.sp +.IP [EM] +A.S. Tanenbaum, H. van Staveren, E.G. Keizer and J.W. Stevenson, +\fIDescription Of A Machine Architecture for use with Block Structured +Languages\fR, Informatica Rapport IR-81, Vrije Universiteit, Amsterdam, 1983 +.sp +.IP [C] +B.W. Kernighan and D.M. Ritchie, \fIThe C Programming Language\fR, +Prentice-Hall, 1978 +.sp +.IP [LL] +C.J.H. Jacobs, \fISome Topics in Parser Generation\fR, Informatica Rapport +IR-105, Vrije Universiteit, Amsterdam, October 1985 +.sp +.IP [IM2] +J.W. Stevenson, \fIPascal-VU Reference Manual and Unix Manual Pages\fR, +Informatica Manual IM-2, Vrije Universiteit, Amsterdam, 1980 +.sp +.IP [JEN] +K. Jensen and N.Wirth, \fIPascal User Manual and Report\fR, +Springer-Verlag, 1978 +.sp +.IP [ACK] +\fIACK Manual Pages\fR: ALLOC, ASSERT, EM_CODE, EM_MES, IDF, INPUT, PRINT, +STRING, SYSTEM +.sp +.IP [AHO] +A.V. Aho, R. Sethi and J.D. Ullman, \fICompiler Principles, Techniques, and +Tools\fR, Addison Wesley, 1985 +.sp +.IP [LEX] +M.E. Lesk, \fILex - A Lexical Analyser Generator\fR, Comp. Sci. Tech. Rep. +No. 39, Bell Laboratories, Murray Hill, New Jersey, October 1975 +.sp +.IP [PCV] +B.A. Wichmann and Z.J. Ciechanowicz, \fIPascal Compiler Validation\fR, John +Wiley & Sons, 1983 +.sp +.IP [SAL] +A.H.J. Sale, \fIA Note on Scope, One-Pass Compilers and Pascal\fR, Australian +Communications, 1, 1, 80-82, 1979 diff --git a/doc/pascal/rtl.doc b/doc/pascal/rtl.doc new file mode 100644 index 0000000..011375b --- /dev/null +++ b/doc/pascal/rtl.doc @@ -0,0 +1,85 @@ +.sp 1.5i +.ft B +Appendix B: Changes to the run time library +.ft R +.nh +.sp +Some minor changes in the run time library have been made concerning the +external files (i.e. program arguments). The old compiler reserved +space for the file structures of the external files in one \fBhol\fR block. +In the new compiler, every file structure is placed in a separate \fBbss\fR +block. This implies that the arguments with which \fI_ini\fR is called are +slightly different. The second argument was the base of the \fBhol\fR block +to relocate the buffer addresses, it is changed into an integer denoting the +size of the array passed as third argument. The third argument was a pointer +to an array of integers containing the description of external files, this +argument is changed into a pointer to an array of pointers to file structures. + +The differences in the generated EM code for an arbitrary Pascal program are +listed below (only the relevant parts are shown): +.in +5m +.nf +\fBprogram\fR external_files(output,f); +\fBvar\fR + f : \fBfile of \fIsome-type\fR; + . + . +\fBend\fR. +.in -5m + +EM code generated by Pascal-VU: +.in +5m + . + . + hol 1088,-2147483648,0 ; space belonging to file structures of the program arguments + . + . + . +\&.2 + con 3, -1, 544, 0 \h'80u'; description of external files + lxl 0 + lae .2 + lae 0 \h'146u'; base of hol block, to relocate buffer addresses + lxa 0 + cal $_ini + asp 16 + . + . +.in -5m + +EM code generated by our compiler: +.in +5m + . + . +f + bss 540,0,0 \h'100u'; space belonging to file structure of program argument f +output + bss 540,0,0 \h'100u'; space belonging to file structure of standard output + . + . + . +\&.2 + con 0U4, output, f \h'50u'; the absence of standard input is denoted by a null pointer + lxl 0 + lae .2 + loc 3 \h'144u'; denotes the size of the array of pointers to file structures + lxa 0 + cal $_ini + asp 16 + . + . +.in -5m + +.po +The following files in the run time library have been changed: +.in +1m +pc_file.h +hlt.c +ini.c +opn.c +pentry.c +pexit.c +.in -1m +.fi +.bp +.po diff --git a/doc/pascal/syntax.doc b/doc/pascal/syntax.doc new file mode 100644 index 0000000..ba6cfbe --- /dev/null +++ b/doc/pascal/syntax.doc @@ -0,0 +1,269 @@ +.sp 1.5i +.LP +.vs 14 +.nh +.ft B +Appendix A: ISO-PASCAL grammar +.ft R + + +\fBA.1 Lexical tokens\fR + +The syntax describes the formation of lexical tokens from characters and the +separation of these tokens, and therefore does not adhere to the same rules +as the syntax in A.2. + +The lexical tokens used to construct Pascal programs shall be classified into +special-symbols, identifiers, directives, unsigned-numbers, labels and +character-strings. The representation of any letter (upper-case or lower-case, +differences of font, etc) occurring anywhere outside of a character-string +shall be insignificant in that occurrence to the meaning of the program. + +letter = \fBa\fR | \fBb\fR | \fBc\fR | \fBd\fR | \fBe\fR | \fBf\fR | \fBg\fR | \fBh\fR | \fBi\fR | \fBj\fR | \fBk\fR | \fBl\fR | \fBm\fR | \fBn\fR | \fBo\fR | \fBp\fR | \fBq\fR | \fBr\fR | \fBs\fR | \fBt\fR | \fBu\fR | \fBv\fR | \fBw\fR | \fBx\fR | \fBy\fR | \fBz\fR . + +digit = \fB0\fR | \fB1\fR | \fB2\fR | \fB3\fR | \fB4\fR | \fB5\fR | \fB6\fR | \fB7\fR | \fB8\fR | \fB9\fR . + + +The special symbols are tokens having special meanings and shall be used to +delimit the syntactic units of the language. + +special-symbol = \fB+\fR | \fB\-\fR | \fB*\fR | \fB/\fR | \fB=\fR | \fB<\fR | \fB>\fR | \fB[\fR | \fB]\fR | \fB.\fR | \fB,\fR | \fB:\fR | \fB;\fR | \fB^\fR | \fB(\fR | \fB)\fR | \fB<>\fR | \fB<=\fR | \fB>=\fR | \fB:=\fR | \fB..\fR | +\h'\w'special-symbol = 'u'word-symbol . + +word-symbol = \fBand\fR | \fBarray\fR | \fBbegin\fR | \fBcase\fR | \fBconst\fR | \fBdiv\fR | \fBdo\fR | \fBdownto\fR | \fBelse\fR | \fBend\fR | \fBfile\fR | \fBfor\fR | \fBfunction\fR | +\h'\w'word-symbol = 'u'\fBgoto\fR | \fBif\fR | \fBin\fR | \fBlabel\fR | \fBmod\fR | \fBnil\fR | \fBnot\fR | \fBof\fR | \fBor\fR | \fBpacked\fR | \fBprocedure\fR | \fBprogram\fR | \fBrecord\fR | +\h'\w'word-symbol = 'u'\fBrepeat\fR | \fBset\fR | \fBthen\fR | \fBto\fR | \fBtype\fR | \fBuntil\fR | \fBvar\fR | \fBwhile\fR | \fBwith\fR . + + +Identifiers may be of any length. All characters of an identifier shall be +significant. No identifier shall have the same spelling as any word-symbol. + +identifier = letter { letter | digit } . + + +A directive shall only occur in a procedure-declaration or function-declaration. +No directive shall have the same spelling as any word-symbol. + +directive = letter {letter | digit} . + + +Numbers are given in decimal notation. + +.nf +unsigned-integer = digit-sequence . +unsigned-real = unsigned-integer \fB.\fR fractional-part [ \fBe\fR scale-factor ] | unsigned-integer \fBe\fR scale-factor . +digit-sequence = digit {digit} . +fractional-part = digit-sequence . +scale-factor = signed-integer . +signed-integer = [sign] unsigned-integer . +sign = \fB+\fR | \fB\-\fR . +.fi + +.bp +Labels shall be digit-sequences and shall be distinguished by their apparent +integral values and shall be in the closed interval 0 to 9999. + +label = digit-sequence . + + +A character-string containing a single string-element shall denote a value of +the required char-type. Each string-character shall denote an implementation- +defined value of the required char-type. + +.nf +character-string = \fB'\fR string-element { string-element } \fB'\fR . +string-element = apostrophe-image | string-character . +apostrophe-image = \fB''\fR . +string-character = All 7-bits ASCII characters except linefeed (10), vertical tab (11), and new page (12). +.fi + + +The construct: + + \fB{\fR any-sequence-of-characters-and-separations-of-lines- not-containing-right-brace \fB}\fR + +shall be a comment if the "{" does not occur within a character-string or +within a comment. The substitution of a space for a comment shall not alter +the meaning of a program. + +Comments, spaces (except in character-strings), and the separation of +consecutive lines shall be considered to be token separators. Zero or more +token separators may occur between any two consecutive tokens, or before +the first token of a program text. No separators shall occur within tokens. +.bp +.po +\fBA.2 Grammar\fR + +The non-terminal symbol \fIprogram\fR is the start symbol of the grammar. + +.nf +actual-parameter : expression | variable-access | procedure-identifier | function-identifier . +actual-parameter-list : \fB(\fR actual-parameter { \fB,\fR actual-parameter } \fB)\fR . +adding-operator : \fB+\fR | \fB\-\fR | \fBor\fR . +array-type : \fBarray\fR \fB[\fR index-type { \fB,\fR index-type } \fB]\fR \fBof\fR component-type . +array-variable : variable-access . +assignment-statement : ( variable-access | function-identifier ) \fB:=\fR expression . + +base-type : ordinal-type . +block : label-declaration-part constant-definition-part type-definition-part variable-declaration-part +\h'\w'block : 'u'procedure-and-function-declaration-part statement-part . +Boolean-expression : expression . +bound-identifier : identifier . +buffer-variable : file-variable \fB^\fR . + +case-constant : constant . +case-constant-list : case-constant { \fB,\fR case-constant } . +case-index : expression . +case-list-element : case-constant-list \fB:\fR statement . +case-statement : \fBcase\fR case-index \fBof\fR case-list-element { \fB;\fR case-list-element } [ \fB;\fR ] \fBend\fR . +component-type : type-denoter . +component-variable : indexed-variable | field-designator . +compound-statement : \fBbegin\fR statement-sequence \fBend\fR . +conditional-statement : if-statement | case-statement . +conformant-array-parameter-specification : value-conformant-array-specification | +\h'+18.5m'variable-conformant-array-specification . +conformant-array-schema : packed-conformant-array-schema | unpacked-conformant-array-schema . +constant : [ sign ] ( unsigned-number | constant-identifier ) | character-string . +constant-definition : identifier \fB=\fR constant . +constant-definition-part : [ \fBconst\fR constant-definition \fB;\fR { constant-definition \fB;\fR } ] . +constant-identifier : identifier . +control-variable : entire-variable . + +domain-type : type-identifier . + +else-part : \fBelse\fR statement . +empty-statement : . +entire-variable : variable-identifier . +enumerated-type : \fB(\fR identifier-list \fB)\fR . +expression : simple-expression [ relational-operator simple-expression ] . +.bp +.po +factor : variable-access | unsigned-constant | bound-identifier | function-designator | set-constructor | +\h'\w'factor : 'u'\fB(\fR expression \fB)\fR | \fBnot\fR factor . +field-designator : record-variable \fB.\fR field-specifier | field-designator-identifier . +field-designator-identifier : identifier . +field-identifier : identifier . +field-list : [ ( fixed-part [ \fB;\fR variant-part ] | variant-part ) [ \fB;\fR ] ] . +field-specifier : field-identifier . +file-type : \fBfile\fR \fBof\fR component-type . +file-variable : variable-access . +final-value : expression . +fixed-part : record-section { \fB;\fR record-section } . +for-statement : \fBfor\fR control-variable \fB:=\fR initial-value ( \fBto\fR | \fBdownto\fR ) final-value \fBdo\fR statement . +formal-parameter-list : \fB(\fR formal-parameter-section { \fB;\fR formal-parameter-section } \fB)\fR . +formal-parameter-section : value-parameter-specification | variable-parameter-specification | +\h'\w'formal-parameter-section : 'u'procedural-parameter-specification | functional-parameter-specification | +\h'\w'formal-parameter-section : 'u'conformant-array-parameter-specification . +function-block : block . +function-declaration : function-heading \fB;\fR directive | function-identification \fB;\fR function-block | +\h'\w'function-declaration : 'u'function-heading \fB;\fR function-block . +function-designator : function-identifier [ actual-parameter-list ] . +function-heading : \fBfunction\fR identifier [ formal-parameter-list ] \fB:\fR result-type . +function-identification : \fBfunction\fR function-identifier . +function-identifier : identifier . +functional-parameter-specification : function-heading . + +goto-statement : \fBgoto\fR label . + +identified-variable : pointer-variable \fB^\fR . +identifier-list : identifier { \fB,\fR identifier } . +if-statement : \fBif\fR Boolean-expression \fBthen\fR statement [ else-part ] . +index-expression : expression . +index-type : ordinal-type . +index-type-specification : identifier \fB..\fR identifier \fB:\fR ordinal-type-identifier . +indexed-variable : array-variable \fB[\fR index-expression { \fB,\fR index-expression } \fB]\fR . +initial-value : expression . + +label : digit-sequence . +label-declaration-part : [ \fBlabel\fR label { \fB,\fR label } \fB;\fR ] . + +member-designator : expression [ \fB..\fR expression ] . +multiplying-operator : \fB*\fR | \fB/\fR | \fBdiv\fR | \fBmod\fR | \fBand\fR . +.bp +.po +new-ordinal-type : enumerated-type | subrange-type . +new-pointer-type : \fB^\fR domain-type . +new-structured-type : [ \fBpacked\fR ] unpacked-structured-type . +new-type : new-ordinal-type | new-structured-type | new-pointer-type . + +ordinal-type : new-ordinal-type | ordinal-type-identifier . +ordinal-type-identifier : type-identifier . + +packed-conformant-array-schema : \fBpacked\fR \fBarray\fR \fB[\fR index-type-specification \fB]\fR \fBof\fR type-identifier . +pointer-type-identifier : type-identifier . +pointer-variable : variable-access . +procedural-parameter-specification : procedure-heading . +procedure-and-function-declaration-part : { ( procedure-declaration | function-declaration ) \fB;\fR } . +procedure-block : block . +procedure-declaration : procedure-heading \fB;\fR directive | procedure-identification \fB;\fR procedure-block | +\h'\w'procedure-declaration : 'u'procedure-heading \fB;\fR procedure-block . +procedure-heading : \fBprocedure\fR identifier [ formal-parameter-list ] . +procedure-identification : \fBprocedure \fR procedure-identifier . +procedure-identifier : identifier . +procedure-statement : procedure-identifier ( [ actual-parameter-list ] | read-parameter-list | readln-parameter-list | +\h'\w'procedure-statement : procedure-identifier ( ['u'write-parameter-list | writeln-parameter-list ) . +program : program-heading \fB;\fR program-block \fB.\fR . +program-block : block . +program-heading : \fBprogram\fR identifier [ \fB(\fR program-parameters \fB)\fR ] . +program-parameters : identifier-list . + +read-parameter-list : \fB(\fR [ file-variable \fB,\fR ] variable-access { \fB,\fR variable-access } \fB)\fR . +readln-parameter-list : [ \fB(\fR ( file-variable | variable-access ) { \fB,\fR variable-access } \fB)\fR ] . +record-section : identifier-list \fB:\fR type-denoter . +record-type : \fBrecord\fR field-list \fBend\fR . +record-variable : variable-access . +record-variable-list : record-variable { \fB,\fR record-variable } . +relational-operator : \fB=\fR | \fB<>\fR | \fB<\fR | \fB>\fR | \fB<=\fR | \fB>=\fR | \fBin\fR . +repeat-statement : \fBrepeat\fR statement-sequence \fBuntil\fR Boolean-expression . +repetitive-statement : repeat-statement | while-statement | for-statement . +result-type : simple-type-identifier | pointer-type-identifier . + +set-constructor : \fB[\fR [ member-designator { \fB,\fR member-designator } ] \fB]\fR . +set-type : \fBset\fR \fBof\fR base-type . +sign : \fB+\fR | \fB\-\fR . +simple-expression : [ sign ] term { adding-operator term } . +simple-statement : empty-statement | assignment-statement | procedure-statement | goto-statement . +simple-type-identifier : type-identifier . +.bp +.po +statement : [ label \fB:\fR ] ( simple-statement | structured-statement ) . +statement-part : compound-statement . +statement-sequence : statement { \fB;\fR statement } . +structured-statement : compound-statement | conditional-statement | repetitive-statement | with-statement . +subrange-type : constant \fB..\fR constant . + +tag-field : identifier . +tag-type : ordinal-type-identifier . +term : factor { multiplying-operator factor } . +type-definition : identifier \fB=\fR type-denoter . +type-definition-part : [ \fBtype\fR type-definition \fB;\fR { type-definition \fB;\fR } ] . +type-denoter : type-identifier | new-type . +type-identifier : identifier . + +unpacked-conformant-array-schema : \fBarray\fR \fB[\fR index-type-specification { \fB;\fR index-type-specification } \fB]\fR \fBof\fR +\h'\w'unpacked-conformant-array-schema : 'u'( type-identifier | conformant-array-schema ) . +unpacked-structured-type : array-type | record-type | set-type | file-type . +unsigned-constant : unsigned-number | character-string | constant-identifier | \fBnil\fR . +unsigned-number : unsigned-integer | unsigned-real . + +value-conformant-array-specification : identifier-list \fB:\fR conformant-array-schema . +value-parameter-specification : identifier-list \fB:\fR type-identifier . +variable-access : entire-variable | component-variable | identified-variable | buffer-variable . +variable-conformant-array-specification : \fBvar\fR identifier-list \fB:\fR conformant-array-schema . +variable-declaration : identifier-list \fB:\fR type-denoter . +variable-declaration-part : [ \fBvar\fR variable-declaration \fB;\fR { variable-declaration \fB;\fR } ] . +variable-identifier : identifier . +variable-parameter-specification : \fBvar\fR identifier-list \fB:\fR type-identifier . +variant : case-constant-list \fB:\fR \fB(\fR field-list \fB)\fR . +variant-part : \fBcase\fR variant-selector \fBof\fR variant { \fB;\fR variant } . +variant-selector : [ tag-field \fB:\fR ] tag-type . + +while-statement : \fBwhile\fR Boolean-expression \fBdo\fR statement . +with-statement : \fBwith\fR record-variable-list \fBdo\fR statement . +write-parameter : expression [ \fB:\fR expression [ \fB:\fR expression ] ] . +write-parameter-list : \fB(\fR [ file-variable \fB,\fR ] write-parameter { \fB,\fR write-parameter } \fB)\fR . +writeln-parameter-list : [ \fB(\fR ( file-variable | write-parameter ) { \fB,\fR write-parameter } \fB)\fR ] . +.fi +.vs +.bp +.po diff --git a/doc/pascal/test.doc b/doc/pascal/test.doc new file mode 100644 index 0000000..60220a0 --- /dev/null +++ b/doc/pascal/test.doc @@ -0,0 +1,19 @@ +.sp 2 +.NH +Testing the compiler +.nh +.sp +.LP +Although it is practically impossible to prove the correctness of a compiler, +a systematic method of testing the compiler is used to increase the confidence +that it will work satisfactorily in practice. The first step was to see if +the lexical analysis was performed correctly. For this purpose, the routine +LexScan() was used (see also the \-l option). Next we tested the parser +generated by LLgen, to see whether correct Pascal programs were accepted and +garbage was dealed with gracefully. The biggest test involved was the +validation of the semantic analysis. Simultaneously we tested the code +generation. First some small Pascal test programs were translated and +executed. When these programs work correctly, the Pascal validation suite +and a large set of Pascal test programs were compiled to see whether they +behaved in the manner the standard specifies. For more details about the +Pascal validation suite, the reader is referred to [PCV]. diff --git a/doc/pascal/titlepg.doc b/doc/pascal/titlepg.doc new file mode 100644 index 0000000..af074c0 --- /dev/null +++ b/doc/pascal/titlepg.doc @@ -0,0 +1,13 @@ +\v'3i' +.ps 36 +The ACK Pascal Compiler +.ps 12 +.sp 30 +.ce 5 +.ft I +There is always something like something that there should not be. +.sp 2 +.ps 10 +For Whom The Bell Tolls +.ft R +Ernest Hemingway diff --git a/doc/pascal/transpem.doc b/doc/pascal/transpem.doc new file mode 100644 index 0000000..ede7936 --- /dev/null +++ b/doc/pascal/transpem.doc @@ -0,0 +1,407 @@ +.sp 1.5i +.de CL +.ft R +c\\$1 +.ft 5 + \fIcode statement-\\$1 +.ft 5 + \fBbra *\fRexit_label +.ft 5 +.. +.NH +Translation of Pascal to EM code +.nh +.LP +.sp +A short description of the translation of Pascal constructs to EM code is +given in the following paragraphs. The EM instructions and Pascal terminal +symbols are printed in \fBboldface\fR. A sentence in \fIitalics\fR is a +description of a group of EM (pseudo)instructions. +.sp +.NH 2 +Global Variables +.LP +.sp +For every global variable, a \fBbss\fR block is reserved. To enhance the +readability of the EM-code generated, the variable-identifier is used as +a data label to address the block. +.sp +.NH 2 +Expressions +.LP +.sp +Operands are always evaluated, so the execution of +.br +.ti +3m +\fBif\fR ( p <> nil ) \fBand\fR ( p^.value <> 0 ) \fBthen\fR ..... +.br +might cause a run-time error, if p is equal to nil. +.LP +The left-hand operand of a dyadic operator is almost always evaluated before +the right-hand side. Peculiar evaluations exist for the following cases: +.sp +the expression: set1 <= set2, is evaluated as follows : +.nf +- evaluate set2 +- evaluate set1 +- compute set2+set1 +- test set2 and set2+set1 for equality +.fi +.sp +the expression: set1 >= set2, is evaluated as follows : +.nf +- evaluate set1 +- evaluate set2 +- compute set1+set2 +- test set1 and set1+set2 for equality +.fi +.sp +Where allowed, according to the standard, constant integral expressions are +compile-time evaluated while an effort is made to report overflow on target +machine basis. The integral expressions are evaluated in the type \fIarith\fR. +The size of an arith is assumed to be at least the size of the integer type +on the target machine. If the target machine's integer size is less than the +size of an arith, overflow can be detected at compile-time. However, the +following call to the standard procedure new, \fInew(p, 3+5)\fR, is illegal, +because the second parameter is not a constant according to the grammar. +.sp +Constant floating expressions are not compile-time evaluated, because the +precision on the target machine and the precision on the machine on which the +compiler runs could be different. The boolean expression \fI(1.0 + 1.0) = 2.0\fR +could evaluate to false. +.sp +.NH 2 +Statements +.NH 3 +Assignment Statement + +\fRPASCAL : +.ti +3m +\f5(variable-access | function-identifier) \fB:=\f5 expression + +\fREM : +.nf +.in +3m +.ft I +evaluate expression +store in variable-access or function-identifier +.ft R +.in -3m +.fi + +In case of a function-identifier, a hidden temporary variable is used to +keep the function result. +.bp +.NH 3 +Goto Statement + +\fRPASCAL : +.ti +3m +\fBGOTO\f5 label + +\fREM : +.in +3m +Two cases can be distinguished : +.br +- local goto, +.ti +2m +in which a \fBbra\fR is generated. + +- non-local goto, +.in +2m +.ll -1i +a goto_descriptor is build, containing the ProgramCounter of the instruction +jumped to and an offset in the target procedure frame which contains the +value of the StackPointer after the jump. The code for the jump itself is to +load the address of the goto_descriptor, followed by a push of the LocalBase +of the target procedure and a \fBcal\fR $_gto. A message is generated to +indicate that a procedure or function contains a statement which is the +target of a non-local goto. +.ll +1i +.in -2m +.in -3m +.sp 2 +.NH 3 +If Statement + +\fRPASCAL : +.in +3m +.ft 5 +\fBIF\f5 boolean-expression \fBTHEN\f5 statement + +.in -3m +\fREM : +.nf +.in +3m + \fIevaluation boolean-expression + \fBzeq \fR*exit_label + \fIcode statement +\fRexit_label +.in -3m +.fi +.sp 2 +\fRPASCAL : +.in +3m +.ft 5 +\fBIF\f5 boolean-expression \fBTHEN\f5 statement-1 \fBELSE\f5 statement-2 + +.in -3m +\fREM : +.nf +.in +3m + \fIevaluation boolean-expression + \fBzeq \fR*else_label + \fIcode statement-1 + \fBbra \fR*exit_label +\fRelse_label + \fIcode statement-2 +\fRexit_label +.in -3m +.fi +.sp 2 +.NH 3 +Repeat Statement + +\fRPASCAL : +.in +3m +.ft 5 +\fBREPEAT\f5 statement-sequence \fBUNTIL\f5 boolean-expression + +.in -3m +\fREM : +.nf +.in +3m +\fRrepeat_label + \fIcode statement-sequence + \fIevaluation boolean-expression + \fBzeq\fR *repeat_label +.in -3m +.fi +.bp +.NH 3 +While Statement + +\fRPASCAL : +.in +3m +.ft 5 +\fBWHILE\f5 boolean-expression \fBDO\f5 statement + +.in -3m +\fREM : +.nf +.in +3m +\fRwhile_label + \fIevaluation boolean-expression + \fBzeq\fR *exit_label + \fIcode statement + \fBbra\fR *while_label +\fRexit_label +.in -3m +.fi +.sp 2 +.NH 3 +Case Statement +.LP +.sp +The case-statement is implemented using the \fBcsa\fR and \fBcsb\fR +instructions. + +\fRPASCAL : +.in +3m +\fBCASE\f5 case-expression \fBOF\f5 +.in +5m +case-constant-list-1 \fB:\f5 statement-1 \fB;\f5 +.br +case-constant-list-2 \fB:\f5 statement-2 \fB;\f5 +.br +\&. +.br +\&. +.br +case-constant-list-n \fB:\f5 statement-n [\fB;\f5] +.in -5m +\fBEND\fR +.in -3m +.sp 2 +.LP +.ll -1i +The \fBcsa\fR instruction is used if the range of the case-expression +value is dense, i.e. +.br +.ti +3m +\f5( upperbound \- lowerbound ) / number_of_cases\fR +.br +is less than the constant DENSITY, defined in the file \fIdensity.h\fR. + +If the range is sparse, a \fBcsb\fR instruction is used. + +.ll +1i +\fREM : +.nf +.in +3m + \fIevaluation case-expression + \fBbra\fR *l1 +.CL 1 +.CL 2 + . + . +.CL n +.ft R +\&.case_descriptor +.ft 5 + \fIgeneration case_descriptor +\fRl1 +.ft 5 + \fBlae\fR .case_descriptor +.ft 5 + \fBcsa\fR size of (case-expression) +\fRexit_label +.in -3m +.fi +.bp +.NH 3 +For Statement + +\fRPASCAL : +.in +3m +.ft 5 +\fBFOR\f5 control-variable \fB:=\f5 initial-value (\fBTO\f5 | \fBDOWNTO\f5) final-value \fBDO\f5 statement + +.ft R +.in -3m +The initial-value and final-value are evaluated at the beginning of the loop. +If the values are not constant, they are evaluated once and stored in a +temporary. + +EM : +.nf +.in +3m + \fIload initial-value + \fIload final-value + \fBbgt\fR exit-label (* DOWNTO : \fBblt\fI exit-label\fR *) + \fIload initial-value +\fRl1 + \fIstore in control-variable + \fIcode statement + \fIload control-variable + \fBdup\fI control-variable + \fIload final-value + \fBbeq\fR exit_label + \fBinc\fI control-variable\fR (* DOWNTO : \fBdec\fI control-variable\fR *) + \fBbra *\fRl1 +\fRexit_label +.in -3m +.fi + +Note: testing must be done before incrementing(decrementing) the +control-variable, +.br +\h'\w'Note: 'u'because wraparound could occur, which could lead to an infinite +loop. +.sp 2 +.NH 3 +With Statement + +\fRPASCAL : +.ti +3m +\fBWITH\f5 record-variable-list \fBDO\f5 statement + +.ft R +The statement +.ti +3m +\fBWITH\fR r\s-3\d1\u\s0, r\s-3\d2\u\s0, ..., r\s-3\dn\u\s0 \fBDO\f5 statement + +.ft R +is equivalent to +.in +3m +\fBWITH\fR r\s-3\d1\u\s0 \fBDO\fR + \fBWITH\fR r\s-3\d2\u\s0 \fBDO\fR + ... + \fBWITH\fR r\s-3\dn\u\s0 \fBDO\f5 statement + +.ft R +.in -3m +The translation of +.ti +3m +\fBWITH\fR r\s-3\d1\u\s0 \fBDO\f5 statement +.br +.ft R +is +.nf +.in +3m +\fIpush address of r\s-3\d1\u\s0 +\fIstore address in temporary +\fIcode statement +.in -3m +.fi + +.ft R +An occurrence of a field is translated into: +.in +3m +\fIload temporary +.br +\fIadd field-offset +.in -3m +.bp +.NH 2 +Procedure and Function Calls + +.ft R +In general, the call +.ti +5m +p(a\s-3\d1\u\s0, a\s-3\d2\u\s0, ...., a\s-3\dn\u\s0) +.br +is translated into the sequence: + +.in +5m +.nf +\fIevaluate a\s-3\dn\u\s0 +\&. +\&. +\fIevaluate a\s-3\d2\u\s0 +\fIevaluate a\s-3\d1\u\s0 +\fIpush localbase +\fBcal\fR $p +\fIpop parameters +.ft R +.fi +.in -5m + +i.e. the order of evaluation and binding of the actual-parameters is from +right to left. In general, a copy of the actual-parameter is made when the +formal-parameter is a value-parameter. If the formal-parameter is a +variable-parameter, a pointer to the actual-parameter is pushed. + +In case of a function call, a \fBlfr\fR is generated, which pushes the +function result on top of the stack. +.sp 2 +.NH 2 +Register Messages + +.ft R +A register message can be generated to indicate that a local variable is never +referenced indirectly. This implies that a register can be used for a variable. +We distinguish the following classes, given in decreasing priority: + +\(bu control-variable and final-value of a for-statement +.br +.ti +5m +to speed up testing, and execution of the body of the for-statement +.sp +\(bu record-variable of a with-statement +.br +.ti +5m +to improve the field selection of a record +.sp +\(bu remaining local variables and parameters +.sp 2 +.NH 2 +Compile-time optimizations + +.ft R +The only optimization that is performed is the evaluation of constant +integral expressions. The optimization of constructs like +.ti +5m +\fBif\f5 false \fBthen\f5 statement\fR, +.br +is left to either the peephole optimizer, or a global optimizer. diff --git a/doc/pascal/vrk.doc b/doc/pascal/vrk.doc new file mode 100644 index 0000000..c5622a5 --- /dev/null +++ b/doc/pascal/vrk.doc @@ -0,0 +1,23 @@ +.TL + + + +The ACK Pascal Compiler +.AU +Aad Geudeke +Frans Hofmeester +.AI +Dept. of Mathematics and Computer Science +Vrije Universiteit +Amsterdam, The Netherlands +.LP +.ps 12 +.sp 24 +.ce 5 +.ft I +There is always something like something that there should not be. +.sp 2 +.ps 10 +For Whom The Bell Tolls +.ft R +Ernest Hemingway diff --git a/doc/pcref.doc b/doc/pcref.doc new file mode 100644 index 0000000..c86fad5 --- /dev/null +++ b/doc/pcref.doc @@ -0,0 +1,1204 @@ +.\" $Id: pcref.doc,v 1.10 1994/06/24 10:02:16 ceriel Exp $ +.\" tbl pcref.doc | troff +.ds OF \\fBtest~off:~\\fR +.ds ON \\fBtest~on:~~\\fR +.ds AL \\fBtest~all:~\\fR +.ll 72n +.wh 0 hd +.wh 60 fo +.de hd +'sp 5 +.. +.de fo +'bp +.. +.tr ~ +. TITLE +.de TL +.sp 10 +.ce +\\fB\\$1\\fR +.sp 10 +.. +. AUTHOR +.de AU +.ce +by +.sp 2 +.ce +\\$1 +.. +. OTHER AUTHOR +.de OA +.sp 2 +.ce +(revised) +.sp 2 +.ce +\\$1 +.. +. DATE +.de DA +.sp 1 +.ce +( \\$1 ) +.. +. INSTITUTE +.de VU +.sp 3 +.ce 4 +Vakgroep Informatica +Vrije Universiteit +De Boelelaan 1081 +Amsterdam +.. +. PARAGRAPH +.de PP +.sp +.ti +5 +.. +.nr CH 0 1 +. CHAPTER +.de CH +.nr SH 0 1 +.bp +.in 0 +\\fB\\n+(CH.~\\$1\\fR +.PP +.. +. SUBCHAPTER +.de SH +.sp 3 +.in 0 +\\fB\\n(CH.\\n+(SH.~\\$1\\fR +.PP +.. +. INDENT START +.de IS +.sp +.in +5 +.. +. INDENT END +.de IE +.in -5 +.sp +.. +. DOUBLE INDENT START +.de DS +.sp +.in +5 +.ll -5 +.. +. DOUBLE INDENT END +.de DE +.ll +5 +.in -5 +.sp +.. +. EQUATION START +.de EQ +.sp +.nf +.. +. EQUATION END +.de EN +.fi +.sp +.. +. ITEM +.de IT +.sp +.in 0 +\\fBBS~\\$1:\\fR~\\ +.. +.de CS +.br +~-~\\ +.. +.br +.fi +.TL "Amsterdam Compiler Kit-Pascal reference manual" +.AU "Johan W. Stevenson" +.DA "January 4, 1983" +.OA "Hans van Eck" +.DA "May 1, 1989" +.VU +.CH "Introduction" +This document refers to the (1982) BSI standard for Pascal [1]. +Ack-Pascal complies with the requirements of level 1 of BS 6192: 1982, with +the exceptions as listed in this document. +.PP +The standard requires an accompanying document describing the +implementation-defined and implementation-dependent features, +the reaction on errors and the extensions to standard Pascal. +These four items will be treated in the rest of this document, +each in a separate chapter. +The other chapters describe the deviations from the standard and +the list of options recognized by the compiler. +.PP +The Ack-Pascal compiler produces code for an EM machine as defined in [2]. +It is up to the implementor of the EM machine to decide whether errors like +integer overflow, undefined operand and range bound error are recognized or not. +.PP +There does not (yet) exist a hardware EM machine. +Therefore, EM programs must be interpreted, or translated into +instructions for a target machine. +The Ack-Pascal compiler is currently available for use with the VAX, +Motorola MC68020, Motorola MC68000, +PDP-11, and Intel 8086 code-generators. +For the 8086, MC68000, and MC68020, +floating point emulation is used. This is made available with the \fI-fp\fP +option, which must be passed to \fIack\fP[3]. +.IE +.CH "Implementation-defined features" +For each implementation-defined feature mentioned in the BSI standard +we give the section number, the quotation from that section and the definition. +First we quote the definition of implementation-defined: +.DS +Possibly differing between processors, but defined for any particular +processor. +.DE +.IT 6.1.7 +Each string-character shall denote an implementation-defined value of the +required char-type. +.IS +All 7-bits ASCII characters except linefeed LF (10) are allowed. +.IE +.IT 6.4.2.2 +The values of type real shall be an implementation-defined subset +of the real numbers denoted as specified by 6.1.5 bu signed real. +.IS +The format of reals is not defined in EM. +Even the size of reals depends on the EM-implementation. +The compiler can be instructed, by the V-option, to use a different +size for real values. +The size of reals is preset by the calling program \fIack\fP +[3] to the proper size. +.IE +.IT 6.4.2.2 +The type char shall be the enumeration of a set of implementation-defined +characters, some possibly without graphic representations. +.IS +The 7-bits ASCII character set is used, where LF (10) denotes the +end-of-line marker on text-files. +.IT 6.4.2.2 +The ordinal numbers of the character values shall be values of integer-type, +that are implementation-defined, and that are determined by mapping +the character values on to consecutive non-negative integer values +starting at zero. +.IS +The normal ASCII ordering is used: ord('0')=48, ord('A')=65, ord('a')=97, etc. +.IE +.IT 6.6.5.2 +The post-assertions imply corresponding activities on the external entities, +if any, to which the file-variables are bound. These activities, and the +point at which they are actually performed, shall be +implementation-defined. +.IS +The reading and writing writing of objects on files is buffered. +This means that when a program terminates abnormally, IO may be +unfinished. Terminal IO is unbuffered. +Files are closed whenever they are rewritten or reset, or on +program termination. +.IT 6.7.2.2 +The predefined constant maxint shall be of integer-type and shall denote +an implementation-defined value, that satisfies the following conditions: +.sp 1 +.in +5 +.ti -4 +(a)~All integral values in the closed interval from -maxint to +maxint +shall be values of the integer-type. +.ti -4 +(b)~Any monadic operation performed on an integer value in this interval +shall be correctly performed according to the mathematical rules for +integer arithmetic. +.ti -4 +(c)~Any dyadic integer operation on two integer values in this same interval +shall be correctly performed according to the mathematical rules for +integer arithmetic, provided that the result is also in this interval. +.ti -4 +(d)~Any relational operation on two integer values in this same interval +shall be correctly performed according to the mathematical rules for +integer arithmetic. +.in -5 +.IS +The representation of integers in EM is a \fIn\fP*8-bit word using +two's complement arithmetic. +Where \fIn\fP is called wordsize. +The range of available integers depends on the EM implementation: +For 2-byte machines, the integers range from -32767 to +32767. For 4-byte +machines, the integers range from -2147483647 to 2147483647. +The number -maxint-1 may be used to indicate 'undefined'. +.IE +.IT 6.7.2.2 +The result of the real arithmetic operators and functions shall be +approximations to the corresponding mathematical results. The accuracy of +this approximation shall be implementation-defined +.IS +Since EM doesn't specify floating point format, it is not possible to +specify the accuracy. When the floating point emulation is used, and the +default size of reals is 8 bytes, the accuracy is 11 bits for the exponent, +and 53 bits for the mantissa. This gives an accuracy of about 16 digits, +and exponents ranging from -309 to +307. +.IE +.IT 6.9.3.1 +The default TotalWidth values for integer, Boolean and real types +shall be implementation-defined. +.IS +The defaults are: + integer 6 for 2-byte machines, 11 for 4-byte machines + Boolean 5 + real 14 +.IT 6.9.3.4.1 +ExpDigits, the number of digits written in an exponent part of a real, +shall be implementation-defined. +.IS +ExpDigits is defined as 3. This is sufficient for all implementations +currently available. When the representation would need more than 3 +digits, then the string '***' replaces the exponent. +.IT 6.9.3.4.1 +The character written as part of the representation of +a real to indicate the beginning of the exponent part shall be +implementation-defined, either 'E' or 'e'. +.IS +The exponent part starts with 'e'. +.IT 6.9.3.5 +The case of the characters written as representation of the +Boolean values shall be implementation-defined. +.IS +The representations of true and false are 'true' and 'false'. +.IT 6.9.5 +The effect caused by the standard procedure page +on a text file shall be implementation-defined. +.IS +The ASCII character form feed FF (12) is written. +.IT 6.10 +The binding of the variables denoted by the program-parameters +to entities external to the program shall be implementation-defined if +the variable is of a file-type. +.IS +The program parameters must be files and all, except input and output, +must be declared as such in the program block. +.PP +The program parameters input and output, if specified, will correspond +with the UNIX streams 'standard input' and 'standard output'. +.PP +The other program parameters will be mapped to the argument strings +provided by the caller of this program. +The argument strings are supposed to be path names of the files to be +opened or created. +The order of the program parameters determines the mapping: +the first parameter is mapped onto the first argument string etc. +Note that input and output are ignored in this mapping. +.PP +The mapping is recalculated each time a program parameter +is opened for reading or writing by a call to the standard procedures +reset or rewrite. +This gives the programmer the opportunity to manipulate the list +of string arguments using the external procedures argc, argv and argshift +available in libpc [6]. +.IT 6.10 +The effect of an explicit use of reset or rewrite +on the standard textfiles input or output shall be implementation-defined. +.IS +The procedures reset and rewrite are no-ops +if applied to input or output. +.CH "Implementation-dependent features" +For each implementation-dependent feature mentioned in the BSI standard, +we give the section number, the quotation from that section and the way +this feature is treated by the Ack-Pascal system. +First we quote the definition of 'implementation-dependent': +.DS +Possibly differing between processors and not necessarily defined for any +particular processor. +.DE +.IT 6.7.2.1 +The order of evaluation of the operands of a dyadic operator +shall be implementation-dependent. +.IS +Operands are always evaluated, so the program part +.EQ + if (p<>nil) and (p^.value<>0) then +.EN +is probably incorrect. +.PP +The left-hand operand of a dyadic operator is almost always evaluated +before the right-hand side. +Some peculiar evaluations exist for the following cases: +.IS +.ti -3 +1.~\ +the modulo operation is performed by a library routine to +check for negative values of the right operand. +.sp +.ti -3 +2.~\ +the expression +.EQ + set1 <= set2 +.EN +where set1 and set2 are compatible set types is evaluated in the +following steps: +.IS +.CS +evaluate set2 +.CS +evaluate set1 +.CS +compute set2+set1 +.CS +test set2 and set2+set1 for equality +.IE +.sp +.ti -3 +3.~\ +the expression +.EQ + set1 >= set2 +.EN +where set1 and set2 are compatible set types is evaluated in the following steps: +.IS +.CS +evaluate set1 +.CS +evaluate set2 +.CS +compute set1+set2 +.CS +test set1 and set1+set2 for equality +.IE +.IE +.IT 6.7.3 +The order of evaluation, accessing and binding +of the actual-parameters for functions +shall be implementation-dependent. +.IS +The order of evaluation is from right to left. +.IT 6.8.2.2 +The decision as to the order of accessing the variable and evaluating +the expression in an assignment-statement, shall be +implementation-dependent. +.IS +The expression is evaluated first. +.IT 6.8.2.3 +The order of evaluation and binding of the actual-parameters for procedures +shall be implementation-dependent. +.IS +The same as for functions. +.IT 6.9.5 +The effect of inspecting a text file to which the page +procedure was applied during generation is +implementation-dependent. +.IS +The formfeed character written by page is +treated like a normal character, with ordinal value 12. +.IT 6.10 +The binding of the variables denoted by the program-parameters +to entities external to the program shall be implementation-dependent unless +the variable is of a file-type. +.IS +Only variables of a file-type are allowed as program parameters. +.IE +.CH "Error handling" +There are three classes of errors to be distinguished. +In the first class are the error messages generated by the compiler. +The second class consists of the occasional errors generated by the other +programs involved in the compilation process. +Errors of the third class are the errors as defined in the standard by: +.DS +An error is a violation by a program of the requirements of this standard +that a processor is permitted to leave undetected. +.DE +.SH "Compiler errors" +Error are written on the standard error output. Each line has the form: +.br +, line : +.br +Every time the compiler detects an error that does not have influence +on the code produced by the compiler or on the syntax decisions, a warning +messages is given. +If only warnings are generated, compilation proceeds and probably results +in a correctly compiled program. +.PP +Sometimes the compiler produces several errors for the same line. They are +only shown up to a maximum of 5 errors per line. Warning are also shown up +to a maximum of 5 per line. +.PP +Extensive treatment of these errors is outside the scope of this manual. +.SH "Runtime errors" +Errors detected at run time cause an error message to be generated on the +diagnostic output stream (UNIX file descriptor 2). +The message consists of the name of the program followed by a message +describing the error, possibly followed by the source line number. +Unless the -L-option is turned on, the compiler generates code to keep track +of which source line causes which EM instructions to be generated. +It depends on the EM implementation whether these LIN instructions +are skipped or executed. +.PP +For each error mentioned in the standard we give the section number, +the quotation from that section and the way it is processed by the +Pascal-compiler or runtime system. +.PP +For detected errors the corresponding message +and trap number are given. +Trap numbers are useful for exception-handling routines. +Normally, each error causes the program to terminate. +By using exception-handling routines one can +ignore errors or perform alternate actions. +Only some of the errors can be ignored +by restarting the failing instruction. +These errors are marked as non-fatal, +all others as fatal. +A list of errors with trap number between 0 and 63 +(EM errors) can be found in [2]. +Errors with trap number between 64 and 127 (Pascal errors) are listed in [7]. +.IT 6.4.6 +It shall be an error if a value of type T2 must be +assignment-compatible with type T1, while +T1 and T2 are compatible ordinal-types and the value of +type T2 is not in the closed interval specified by T1. +.IS +The compiler distinguishes between array-index expressions and the other +places where assignment-compatibility is required. +.PP +Array subscripting is done using the EM array instructions. +These instructions have three arguments: the array base address, +the index and the address of the array descriptor. +An array descriptor describes one dimension by three values: +the lower bound on the index, the number of elements minus one and the +element-size. +It depends on the EM implementation whether these bounds are checked. Since +most implementations don't, an extra compiler flag is added to force these +checks. +.br +The other places where assignment-compatibility is required are: +.IS +.CS +assignment +.CS +value parameters +.CS +procedures read and readln +.CS +the final value of the for-statement +.IE +For these places the compiler generates an EM range check instruction, except +when the R-option is turned on, or when the range of values of T2 +is enclosed in the range of T1. +If the expression consists of a single variable and if that variable +is of a subrange type, +then the subrange type itself is taken as T2, not its host-type. +Therefore, a range instruction is only generated if T1 is a subrange type +and if the expression is a constant, an expression with two or more +operands, or a single variable with a type not enclosed in T1. +If a constant is assigned, then the EM optimizer removes the range check +instruction, except when the value is out of bounds. +.PP +It depends on the EM implementation whether the range check instruction +is executed or skipped. +.IT 6.4.6 +It shall be an error if a value of type T2 must be +assignment-compatible with type T1, while T1 and T2 are compatible +set-types and any member of the value of type T2 +is not in the closed interval specified by the base-type +of the type T1. +.IS +This error is not detected. +.IT 6.5.3.3 +It shall be an error if a component of a variant-part of a variant, +where the selector of the variant-part is not a field, +is accessed unless the variant is active for the entirety of each +reference and access to each component of the variant. +.IS +This error is not detected. +.IT 6.5.4 +It shall be an error if +the pointer-variable of an identified-variable either denotes a +nil-value or is undefined. +.IS +The EM definition does not specify the binary representation of pointer +values, so that it is not possible to choose an otherwise illegal +binary representation for the pointer value NIL. +Rather arbitrary the compiler uses the integer value zero to represent NIL. +For all current implementations this does not cause problems. +.PP +The size of pointers depends on the implementation and is +preset in the compiler by \fIack\fP [3]. +The compiler can be instructed, by the V-option, to use +another size for pointer objects. +NIL is represented here by the appropriate number of zero words. +.PP +It depends on the EM implementation whether de-referencing of a pointer +with value NIL causes an error. +.IE +.IT 6.5.4 +It shall be an error to remove the identifying-value of an identified +variable from its pointer-type when a reference to the variable exists. +.IS +When the identified variable is an element of the record-variable-list of +a with_statement, a warning is given at compile-time. Otherwise, this error +is not detected. +.IT 6.5.5 +It shall be an error to alter the value of a file-variable f when a +reference to the buffer-variable f^ exists. +.IS +When f is altered when it is an element of the record-variable-list of a +with-statement, a warning is given. When a buffer-variable is used as a +variable-parameter, an error is given. This is done at compile-time. +.IT 6.6.5.2 +It shall be an error if +the stated pre-assertion does not hold immediately +prior to any use of the file handling procedures +rewrite, put, reset and get. +.IS +For each of these four operations the pre-assertions +can be reformulated as: +.sp +rewrite(f):~no pre-assertion. +.br +put(f):~~~~~f is opened for writing and f^ is not undefined. +.br +reset(f):~~~f exists. +.br +get(f):~~~~~f is opened for reading and eof(f) is false. +.sp +The following errors are detected for these operations: +.sp +rewrite(f): +.in +10 +.ti -5 +more args expected, trap 64, fatal: +.br +f is a program-parameter and the corresponding +file name is not supplied by the caller of the program. +.ti -5 +rewrite error, trap 101, fatal: +.br +the caller of the program lacks the necessary +access rights to create the file in the file system +or operating system problems like table overflow +prevent creation of the file. +.in -10 +.sp +put(f): +.in +10 +.ti -5 +file not yet open, trap 72, fatal: +.br +reset or rewrite are never applied to the file. +The checks performed by the run time system are not foolproof. +.ti -5 +not writable, trap 96, fatal: +.br +f is opened for reading. +.ti -5 +write error, trap 104, fatal: +.br +probably caused by file system problems. +For instance, the file storage is exhausted. +Because IO is buffered to improve performance, +it might happen that this error occurs if the +file is closed. +Files are closed whenever they are rewritten or reset, or on +program termination. +.in -10 +.sp +reset(f): +.in +10 +.ti -5 +more args expected, trap 64, fatal: +.br +same as for rewrite(f). +.ti -5 +reset error, trap 100, fatal: +.br +f does not exist, or the caller has insufficient access rights, or +operating system tables are exhausted. +.in -10 +.sp +get(f): +.in +10 +.ti -5 +file not yet open, trap 72, fatal: +.br +as for put(f). +.ti -5 +not readable, trap 97, fatal: +.br +f is opened for writing. +.ti -5 +end of file, trap 98, fatal: +.br +eof(f) is true just before the call to get(f). +.ti -5 +read error, trap 103, fatal: +.br +unlikely to happen. Probably caused by hardware problems +or by errors elsewhere in the program that destroyed +the file information maintained by the run time system. +.ti -5 +truncated, trap 99, fatal: +.br +the file is not properly formed by an integer +number of file elements. +For instance, the size of a file of integer is odd. +.ti -5 +non-ASCII char read, trap 106, non-fatal: +.br +the character value of the next character-type +file element is out of range (0..127). +Only for text files. +.in -10 +.IT 6.6.5.3 +It shall be an error if a variant of a variant-part within the new +variable becomes active and a different variant of the variant-part is +one of the specified variants. +.IS +This error is not detected. +.IT 6.6.5.3 +It shall be an error to use dispose(q) if the identifying variable has been +allocated using the form new(p,c1,...,cn). +.IS +This error is not detected. However, this error can cause more memory +to be freed then was allocated. +Dispose causes a fatal trap 73 when memory already on the free +list is freed again. +.IT 6.6.5.3 +It shall be an error to use dispose(q,k1,...,km) if the identifying +variable has been allocated using the form new(p,c1,...,cn) and m is not +equal to n. +.IS +This error is not detected. However, this error can cause more memory +to be freed then was allocated. +Dispose causes a fatal trap 73 when memory already on the free +list is freed again. +.IT 6.6.5.3 +It shall be an error if the variants of a variable to be disposed +are different from those specified by the case-constants to dispose. +.IS +This error is not detected. +.IT 6.6.5.3 +It shall be an error if the value of the pointer parameter of dispose has +nil-value or is undefined. +.IS +The same comments apply as for de-referencing NIL or undefined pointers. +.IT 6.6.5.3 +It shall be an error if a variable created using the second form of new is +accessed by the identified variable of the variable-access of a factor, +of an assignment-statement, or of an actual-parameter. +.IS +This error is not detected. +.IT 6.6.6.2 +It shall be an error if the value of sqr(x) does not exist. +.IS +This error is detected for real-type arguments (real overflow, +trap 4, non-fatal). +.IT 6.6.6.2 +It shall be an error if x in ln(x) is smaller than or equal to 0. +.IS +This error is detected (error in ln, trap 66, non-fatal) +.IT 6.6.6.2 +It shall be an error if x in sqrt(x) is smaller than 0. +.IS +This error is detected (error in sqrt, trap 67, non-fatal) +.sp +In addition to these errors, overflow in the expression exp(x) is +detected (error in exp, trap 65, non-fatal; real overflow, trap 4, non-fatal) +.sp +.IT 6.6.6.3 +It shall be an error if +the integer value of trunc(x) does not exist. +.IS +It depends on the implementations whether this error is detected. +The floating-point emulation detects this error (conversion error, +trap 10, non-fatal). +.IT 6.6.6.3 +It shall be an error if +the integer value of round(x) does not exist. +.IS +It depends on the implementations whether this error is detected. +The floating-point emulation detects this error (conversion error, +trap 10, non-fatal). +.IT 6.6.6.4 +It shall be an error if +the integer value of ord(x) does not exist. +.IS +This error can not occur, because the compiler will not allow +such ordinal types. +.IT 6.6.6.4 +It shall be an error if +the character value of chr(x) does not exist. +.IS +Except when the R-option is off, the compiler generates an EM +range check instruction. The effect of this instruction depends on the +EM implementation. +.IT 6.6.6.4 +It shall be an error if the value of succ(x) does not exist. +.IS +Same comments as for chr(x). +.IT 6.6.6.4 +It shall be an error if the value of pred(x) does not exist. +.IS +Same comments as for chr(x). +.IT 6.6.6.5 +It shall be an error if f in eof(f) is undefined. +.IS +This error is detected (file not yet open, trap 72, fatal). +.IT 6.6.6.5 +It shall be an error if +f in eoln(f) is undefined, or if eof(f) is true at that time. +.IS +The following errors may occur: +.IS +file not yet open, trap 72, fatal; +.br +not readable, trap 97, fatal; +.br +end of file, trap 98, fatal. +.IE +.IT 6.7.1 +It shall be an error if a variable-access used as an operand +in an expression is undefined at the time of its use. +.IS +The compiler performs some limited checks to see if identifiers are +used before they are set. Since it can not always be sure (one could, for +instance, jump out of a loop), only a warning is generated. When an +expression contains a function-call, an error occur if the +function is not assigned at run-time. +.IT 6.7.2.2 +A term of the form x/y shall be an error if y is zero. +.IS +It depends on the EM implementation whether this error is detected. On some +machines, a trap may occur. +.IT 6.7.2.2 +It shall be an error if j is zero in 'i div j'. +.IS +It depends on the EM implementation whether this error is detected. On some +machines, a trap may occur. +.IE +.IT 6.7.2.2 +It shall be an error if +j is zero or negative in i MOD j. +.IS +This error is detected (only positive j in 'i mod j', trap 71, non-fatal). +.IT 6.7.2.2 +It shall be an error if the result of any operation on integer +operands is not performed according to the mathematical +rules for integer arithmetic. +.IS +The reaction depends on the EM implementation. Most implementations, +however, will not notice integer overflow. +.IT 6.8.3.5 +It shall be an error if none of the case-constants is equal to the +value of the case-index upon entry to the case-statement. +.IS +This error is detected (case error, trap 20, fatal). +.IT 6.9.1 +It shall be an error if the sequence of characters read looking for an +integer does not form a signed-integer as specified in 6.1.5. +.IS +This error is detected (digit expected, trap 105, non-fatal). +.IT 6.9.1 +It shall be an error if the sequence of characters read looking for a +real does not form a signed-number as specified in 6.1.5. +.IS +This error is detected (digit expected, trap 105, non-fatal). +.IT 6.9.1 +When read is applied to f, it shall be an error if the buffer-variable f^ +is undefined or the pre-assertions for get do not hold. +.IS +This error is detected (see get(f)). +.IT 6.9.3 +When write is applied to a textfile f, it shall be an error if f is +undefined or f is opened for reading. +.IS +This error is detected (see put(f)). Furthermore, this error is also +detected when f is not a textfile. +.IT 6.9.3.1 +The values of TotalWidth or FracDigits shall be greater than or equal to +one; it shall be an error if either value is less then one. +.IS +When either value is less than zero, an error (illegal field width, trap +75, non-fatal) occurs. Zero values are allowed, in order to maintain some +compatibility with the old Ack-Pascal compiler. +.IT 6.9.5 +It shall be an error if the pre-assertion required for writeln(f) doe not +hold prior to the invocation of page(f); +.IS +This error is detected (see put(f)). +.CH "Extensions to the standard" +.IS +.ti -3 +1.~\ +External routines +.sp +Except for the required directive 'forward' the Ack-Pascal compiler recognizes +the directive 'extern'. +This directive tells the compiler that the procedure block of this +procedure will not be present in the current program. +The code for the body of this procedure must be included at a later +stage of the compilation process. +.PP +This feature allows one to build libraries containing often used routines. +These routines do not have to be included in all the programs using them. +Maintenance is much simpler if there is only one library module to be +changed instead of many Pascal programs. +.PP +Another advantage is that these library modules may be written in a different +language, for instance C or the EM assembly language. +This is useful for accessing some specific EM instructions not generated +by the Pascal compiler. Examples are the system call routines and some +floating point conversion routines. +Another motive could be the optimization of some time-critical program parts. +.PP +The use of external routines, however, is dangerous. +The compiler normally checks for the correct number and type of parameters +when a procedure is called and for the result type of functions. +If an external routine is called these checks are not sufficient, +because the compiler can not check whether the procedure heading of the +external routine as given in the Pascal program matches the actual routine +implementation. +It should be the loader's task to check this. +However, the current loaders are not that smart. +Another solution is to check at run time, at least the number of words +for parameters. Some EM implementations check this. +.PP +For those who wish the use the interface between C and Pascal we +give an incomplete list of corresponding formal parameters in C and Pascal. +.sp 1 +.TS +l l. +Pascal C +a:integer int a +a:char int a +a:boolean int a +a:real double a +a:^type type *a +var a:type type *a +procedure a(pars) struct { + void (*a)() ; + char *static_link ; + } +function a(pars):type struct { + type (*a)() ; + char *static_link ; + } +.TE +The Pascal runtime system uses the following algorithm when calling +function/procedures passed as parameters. +.TS +l l. +if ( static_link ) (*a)(static_link,pars) ; +else (*a)(pars) ; +.TE +.ti -3 +2.~\ +Separate compilation. +.sp +The compiler is able to (separately) compile a collection of declarations, +procedures and functions to form a library. +The library may be linked with the main program, compiled later. +The syntax of these modules is +.EQ + module = [constant-definition-part] + [type-definition-part] + [var-declaration-part] + [procedure-and-function-declaration-part] +.EN +The compiler accepts a program or a module: +.EQ + unit = program | module +.EN +All variables declared outside a module must be imported +by parameters, even the files input and output. +Access to a variable declared in a module is only possible +using the procedures and functions declared in that same module. +By giving the correct procedure/function heading followed by the +directive 'extern' procedures and functions declared in +other units may be used. +.sp +.ti -3 +3.~\ +Assertions. +.sp +When the s-option is off, Ack-Pascal compiler recognizes an additional +statement, the assertion. Assertions can be used as an aid in debugging +and documentation. The syntax is: +.EQ + assertion = 'assert' Boolean-expression +.EN +An assertion is a simple-statement, so +.EQ + simple-statement = [assignment-statement | + procedure-statement | + goto-statement | + assertion + ] +.EN +An assertion causes an error if the Boolean-expression is false. +That is its only purpose. +It does not change any of the variables, at least it should not. +Therefore, do not use functions with side-effects in the Boolean-expression. +If the a-option is turned on, then assertions are skipped by the +compiler. 'assert' is not a word-symbol (keyword) and may be used as identifier. +However, assignment to a variable and calling of a procedure with that +name will be impossible. +If the s-option is turned on, the compiler will not know a thing about +assertions, so using assertions will then give a parse error. +.sp +.ti -3 +4.~\ +Additional procedures. +.sp +Three additional standard procedures are available: +.IS +.IS +.ti -8 +halt:~~~a call of this procedure is equivalent to jumping to the +end of the program. It is always the last statement executed. +The exit status of the program may be supplied +as optional argument. If not, it will be zero. +.ti -8 +release: +.ti -8 +mark:~~~for most applications it is sufficient to use the heap as second stack. +Mark and release are suited for this type of use, more suited than dispose. +mark(p), with p of type pointer, stores the current value of the +heap pointer in p. release(p), with p initialized by a call +of mark(p), restores the heap pointer to its old value. +All the heap objects, created by calls of new between the call of +mark and the call of release, are removed and the space they used +can be reallocated. +Never use mark and release together with dispose! +.sp +.in -10 +.ti -3 +5.~\ +UNIX interfacing. +.sp +If the c-option is turned on, then some special features are available +to simplify an interface with the UNIX environment. +First of all, the compiler allows for a different type +of string constants. +These string constants are delimited by double quotes ('"'). +To put a double quote into these strings, the double quote must be repeated, +like the single quote in normal string constants. +These special string constants are terminated by a zero byte (chr(0)). +The type of these constants is a pointer to a packed array of characters, +with lower bound 1 and unknown upper bound. +.br +Secondly, the compiler predefines a new type identifier 'string' denoting +this just described string type. +.PP +These features are only useful for declaration of +constants and variables of type 'string'. +String objects may not be allocated on the heap and string pointers +may not be de-referenced. +Still these strings are very useful in combination with external routines. +The procedure write is extended to print these zero-terminated +strings correctly. +.sp +.ti -3 +6.~\ +Double length (32 bit) integers. +.sp +If the d-option is turned on, then the additional type 'long' is known +to the compiler. +By default, long variables have integer values in the +range -2147483647..+2147483647, but this can be changed with the -V option +(if the backend can support this). +Long constants can not be declared. +Longs can not be used as control-variables. +It is not allowed to form subranges of type long. +All operations allowed on integers are also +allowed on longs and are indicated by the same +operators: '+', '-', '*', '/', 'div', 'mod'. +The procedures read and write have been extended to handle long +arguments correctly. It is possible to read longs from a file of integers +and vice-versa, but only if longs and integers have the same size. +The default width for longs is 11. +The standard procedures 'abs' and 'sqr' have been extended to work +on long arguments. +Conversion from integer to long, long to real, +real to long and long to integer are automatic, like the conversion +from integer to real. +These conversions may cause a +.IS +conversion error, trap 10, non-fatal +.IE +.sp +.ti -3 +7.~\ +Underscore as letter. +.sp +The character '_' may be used in forming identifiers, if the u- or U-option +is turned on. It is forbidden to start identifiers with underscores, since +this may cause name-clashes with run-time routines. +.sp +.ti -3 +8.~\ +Zero field width in write. +.sp +Zero TotalWidth arguments are allowed. No characters are written for +character, string or Boolean type arguments then. A zero FracDigits +argument for fixed-point representation of reals causes the fraction and +the character '.' to be suppressed. +.sp +.ti -3 +9.~\ +Pre-processing. +.sp +If the very first character of a file containing a Pascal +program is the sharp ('#', ASCII 23(hex)) the file is preprocessed +in the same way as C programs. +Lines beginning with a '#' are taken as preprocessor command lines +and not fed to the Pascal compiler proper. +C style comments, /*......*/, are removed by the C preprocessor, +thus C comments inside Pascal programs are also removed when they +are fed through the preprocessor. +.CH "Deviations from the standard" +Ack-Pascal deviates from the standard proposal in the following ways: +.IS +.ti -3 +1.~\ +Standard procedures and functions are not allowed as parameters in Ack-Pascal. +The same result can be obtained with negligible loss of performance +by declaring some user routines like: +.EQ + function sine(x:real):real; + begin + sine:=sin(x) + end; +.EN +.sp +.ti -3 +2.~\ +The standard procedures read, readln, write and writeln are implemented as +word-symbols, and can therefore not be redeclared. +.CH "Compiler options" +Some options of the compiler may be controlled by using "{$....}". +Each option consists of a lower case letter followed by +, - or an unsigned +number. +Options are separated by commas. +The following options exist: +.in 8 +.sp +.ti -8 +a~+/-~~~\ +this option switches assertions on and off. +If this option is on, then code is included to test these assertions +at run time. Default +. +.sp +.ti -8 +c~+/-~~~\ +this option, if on, allows the use of C-type string constants +surrounded by double quotes. +Moreover, a new type identifier 'string' is predefined. +Default -. +.sp +.ti -8 +d~+/-~~~\ +this option, if on, allows the use of variables of type 'long'. +Default -. +.sp +.ti -8 +i~~\ +with this flag the setsize for a set of integers can be +manipulated. +The number must be the number of bits per set. +The default value is wordsize-1. +.sp +.ti -8 +l~+/-~~~\ +if + then code is inserted to keep track of the source line number. +When this flag is switched on and off, an incorrect line number may appear +if the error occurs in a part of the program for which this flag is off. +These same line numbers are used for the profile, flow and count options +of the EM interpreter em [5]. +Default +. +.sp +.ti -8 +r~+/-~~~\ +if + then code is inserted to check subrange variables against +lower and upper subrange limits. +Default +. +.sp +.ti -8 +s~+/-~~~\ +if + then the compiler will hunt for places in the program +where non-standard features are used, and for each place found +it will generate a warning. Default -. +.sp +.ti -8 +t~+/-~~~\ +if + then each time a procedure is entered, the routine 'procentry' is +called, and each time a procedure exits, the procedure 'procexit' is +called. Both 'procentry' and 'procexit' have a 'string' as parameter. This +means that when a user specifies his or her own procedures, the c-option +must be used. Default procedures are present in the run time library. +Default -. +.sp +.ti -8 +u~+/-~~~\ +if + then the character '_' is treated like a letter, +so that it may be used in identifiers. +Procedure and function identifiers are not allowed to start with an +underscore because they may collide with library routine names. +Default -. +.in 0 +.sp +Some of these flags (c, d, i, s, u, C and U) are only effective when +they appear before the 'program' symbol. The others may be switched +on and off. +.PP +A very powerful debugging tool is the knowledge that inaccessible statements +and useless tests are removed by the EM optimizer. For instance, a +statement like: +.sp +.nf + if debug then + writeln('initialization done'); +.fi +.sp +is completely removed by the optimizer if debug is a constant with +value false. +The first line is removed if debug is a constant with value true. +Of course, if debug is a variable nothing can be removed. +.PP +A disadvantage of Pascal, the lack of preinitialized data, can be +diminished by making use of the possibilities of the EM optimizer. +For instance, initializing an array of reserved words is sometimes +optimized into 3 EM instructions. To maximize this effect +variables must be initialized as much as possible in order of declaration and array entries +in order of decreasing index. +.CH "References" +.in +5 +.ti -5 +[1]~~\ +BSI standard BS 6192: 1982 (ISO 7185). +.sp +.ti -5 +[2]~~\ +A.S.Tanenbaum, J.W.Stevenson, Hans van Staveren, E.G.Keizer, +"Description of a machine architecture for use with block structured languages", +Informatica rapport IR-81. +.sp +.ti -5 +[3]~~\ +UNIX manual ack(I). +.sp +.ti -5 +[4]~~\ +UNIX manual ld(I). +.sp +.ti -5 +[5]~~\ +UNIX manual em(I). +.sp +.ti -5 +[6]~~\ +UNIX manual libpc(VII) +.sp +.ti -5 +[7]~~\ +UNIX manual pc_prlib(VII) diff --git a/doc/peep.doc b/doc/peep.doc new file mode 100644 index 0000000..cb8998c --- /dev/null +++ b/doc/peep.doc @@ -0,0 +1,521 @@ +.\" $Id: peep.doc,v 1.5 1994/06/24 10:02:20 ceriel Exp $ +.TL +Internal documentation on the peephole optimizer +.br +from the Amsterdam Compiler Kit +.NH 1 +Introduction +.PP +Part of the Amsterdam Compiler Kit is a program to do +peephole optimization on an EM program. +The optimizer scans the program to match patterns from a table +and if found makes the optimization from the table, +and with the result of the optimization +it tries to find yet another optimization +continuing until no more optimizations are found. +.PP +Furthermore it does some optimizations that can not be called +peephole optimizations for historical reasons, +like branch chaining and the deletion of unreachable code. +.PP +The peephole optimizer consists of three parts +.IP 1) +A driving table +.IP 2) +A program translating the table to internal format +.IP 3) +C code compiled with the table to make the optimizer proper +.PP +In this document the table format, internal format and +data structures in the optimizer will be explained, +plus a hint on what the code does where it might not be obvious. +It is a simple program mostly. +.NH 1 +Table format +.PP +The driving table consists of pattern/replacement pairs, +in principle one per line, +although a line starting with white space is considered +a continuation line for the previous. +The general format is: +.DS +optimization : pattern ':' replacement '\en' +.sp +pattern : EMlist optional_boolean_expression +.sp +replacement : EM_plus_operand_list +.DE +Example of a simple one +.DS +loc stl $1==0 : zrl $2 +.DE +There is no real limit for the length of the pattern or the replacement, +the replacement might even be longer than the pattern, +and expressions can be made arbitrarily complicated. +.PP +The expressions in the table are made of the following pieces: +.IP - +Integer constants +.IP - +$\fIn\fP, standing for the operand of the \fIn\fP'th EM +instruction in the pattern, +undefined if that instruction has no operand. +.IP - +w, standing for the wordsize of the code optimized. +.IP - +p, for the pointersize. +.IP - +defined(expr), true if expression is defined +.IP - +samesign(expr,expr), true if expressions have the same sign. +.IP - +sfit(expr,expr), ufit(expr,expr), +true if the first expression fits signed or unsigned in the number +of bits given in the second expression. +.IP - +rotate(expr,expr), +first expression rotated left the number of bits given by the second expression. +.IP - +notreg(expr), +true if the local with the expression as number is not a candidate to put +in a register. +.IP - +rom(\fIn\fP,expr), contents of the rom descriptor at index expr that +is associated with the global label that should be the argument of +the \fIn\fP'th EM instruction. +Undefined if such a thing does not exist. +.PP +The usual arithmetic operators may be used on integer values, +if any operand is undefined the expression is undefined, +except for the defined() function above. +An undefined expression used for its truth value is false. +All arithmetic on local label operands is forbidden, +only things allowed are tests for equality. +Arithmetic on global labels makes sense, +i.e. one can add a global label and a constant, +but not two global labels. +.PP +In the table one can use five additional EM instructions in patterns. +These are: +.IP lab +Stands for a local label +.IP LLP +Load Local Pointer, translates into a +.B lol +or into a +.B ldl +depending on the relationship between wordsize and pointersize. +.IP LEP +Load External Pointer, translates into a +.B loe +or into a +.B lde . +.IP SLP +Store Local Pointer, +.B stl +or +.B sdl . +.IP SEP +Store External Pointer, +.B ste +or +.B sde . +.PP +There is only one peephole optimizer, +so the substitutions to be made for the last four instructions +are made at run time before the first optimizations are made. +.NH 1 +Internal format +.PP +The translating program, +.I mktab +converts the table into an array of bytes where all +patterns follow unaligned. +Format of a pattern is: +.IP 1) +One byte for high byte of hash value, +will be explained later on. +.IP 2) +Two bytes for the index of the next pattern in a chain. +.IP 3) +An integer\u*\d, +.FS +* An integer is encoded as a byte when less than 255, +otherwise as a byte containing 255 followed by two +bytes with the real value. +.FE +pattern length. +.IP 4) +The list of pattern opcodes, one per byte. +.IP 5) +An integer expression index, 0 if not used. +.IP 6) +An integer, replacement length. +.IP 7) +A list of pairs consisting of a one byte opcode and an integer +expression index. +.PP +The expressions are kept in an array of triples, +implementing a binary tree. +The +.I mktab +program tries to minimize the number of triples by reusing +duplicates and even reverses the operands of commutative operators +when doing so would spare a triple. +.NH 1 +A tour through the sources +.PP +Now we will walk through the sources and note things of interest. +.NH 2 +The header files +.PP +The header files are the place where data structures and options reside. +.NH 3 +alloc.h +.PP +In the header file alloc.h several defines can be used to select various +kinds of core allocation schemes. +This is important on small machines like the PDP-11 since a complete +procedure must be in core at the same space, +and the peephole optimizer should not be the limiting factor in +determining the maximum size of procedures if possible. +Options are: +.IP - +USEMALLOC, standard malloc() and free() are used instead of the own +core allocation package. +Not recommended unless the own package does not work on some bizarre +machine. +.IP - +COREDEBUG, prints large amounts of information about core management. +Not recommended unless the code is changed and it stops working. +.IP - +SEPID, defining this will add an extra procedure that will +go through a lot of work to scrape the last bytes together if the +system won't provide more. +This is not a good idea if memory is scarce and code and data reside +in the same spaces, since the room used by the procedure might well +be more than the room saved. +.IP - +STACKROOM, number of shorts used in stack space. +This is used if memory is scarce and stack space and data space are +different. +On the PDP-11 a UNIX process starts with an 8K stack segment which +cannot be transferred to the data segment. +Under these conditions one can use a lot of the stack space for storage. +.NH 3 +assert.h +.PP +Just defines the assert macro. +When compiled with -DNDEBUG all asserts will be off. +.NH 3 +ext.h +.PP +Gives external definitions of variables used by more than one module. +.NH 3 +line.h +.PP +Defines the structures used to keep instructions, +one structure per line of EM code, +and the structure to keep arguments of pseudos, +one structure per argument. +Both structures essentially contain a pointer to the next, +a type, +and a union containing information depending on the type. +Core is allocated only for the part of the union used. +.PP +The +.I +struct line +.R +has a very compact encoding for small integers, +they are encoded in the type field. +On the PDP-11 this gives a line structure of only 4 bytes for most +instructions. +.NH 3 +lookup.h +.PP +Contains definition of the struct used for symbol table management, +global labels and procedure names are kept in one table. +.NH 3 +optim.h +.PP +If one defines the DIAGOPT option in this header file, +for every optimization performed a number is written on stderr. +The number gives the number of the pattern in the table +or one of the four special numbers in this header file. +.NH 3 +param.h +.PP +Contains one settable option, +LONGOFF. +If this is not defined the optimizer can only optimize programs +with wordsize 2 and pointersize 2. +Set this only if it must be run on a Z80 or something pathetic like that. +.PP +Other defines here should not be touched. +.NH 3 +pattern.h +.PP +Contains defines of indices in a pattern, +definition of the expression triples, +definitions of the various expression operators +and definition of the result struct where expression results are put. +.PP +This header file is the main one that is also included by +.I mktab . +.NH 3 +proinf.h +.PP +This one contains definitions +for the local label table structs +and for the struct where all information for one procedure is kept. +This is in one struct so it can be saved easily when recursive +procedures have to be resolved. +.NH 3 +tes.h +.PP +Contains the data structure used by the top element size computation. +.NH 3 +types.h +.PP +Collection of typedefs to be used by almost all modules. +.NH 2 +The C code itself. +.PP +The C code will now be the center of our attention. +We will make a walk through the sources and we will try +to follow the sources in a logical order. +So we will start at +.NH 3 +main.c +.PP +The main.c module contains the main() function. +Here nothing spectacular happens, +only thing of interest is the handling of flags: +.IP -L +This is an instruction to the peephole optimizer to perform +one of its auxiliary functions, the generation of a library module. +This makes the peephole optimizer write its output on a temporary file, +and at the end making the real output by first generating a list +of exported symbols and then copying the temporary file behind it. +.IP -n +Disables all optimization. +Only thing the optimizer does now is filling in the blank after the +.I END +pseudo and resolving recursive procedures. +.PP +The place where main() is left is the call to getlines() which brings +us to +.NH 3 +getline.c +.PP +This module reads the EM code and constructs a list of +.I +struct line +.R +records, +linked together backwards, +i.e. the first instruction read is the last in the list. +Pseudos are handled here also, +for most pseudos this just means that a chain of argument records +is linked into the linked line list but some pseudos get special attention: +.IP exc +This pseudo is acted upon right away. +Lines read are shuffled around according to instruction. +.IP mes +Some messages are acted upon. +These are: +.RS +.IP ms_err 8 +The input is drained, just in case it is a pipe. +After that the optimizer exits. +.IP ms_opt +The do not optimize flag is set. +Acts just like -n on the command line. +.IP ms_emx +The word- and pointersize are read, +complain if we are not able to handle this. +.IP ms_reg +We take notice of the offset of this local. +See also comments in the description of peephole.c +.RE +.IP pro +A new procedure starts, if we are already in one save the status, +else process collected input. +Collect information about this procedure and if already in a procedure +call getlines() recursively. +.IP end +Process collected input. +.PP +The phrase "process collected input" is used twice, +which brings us to +.NH 3 +process.c +.PP +This module contains the entry point process() which is called at any +time the collected input must be processed. +It calls a variety of other routines to get the real work done. +Routines in this module are in chronological order: +.IP symknown 12 +Marks all symbols seen until now as known, +i.e. it is now known whether their scope is local or global. +This information is used again during output. +.IP symvalue +Runs through the chain of pseudos to give values to data labels. +This needs an extra pass. +It cannot be done during the getlines pass, since an +.B exc +pseudo could destroy things. +Nor can it be done during the backward pass since it is impossible +to do good fragment numbering backward. +.IP checklocs +Checks whether all local labels referenced are defined. +It needs to be sure about this since otherwise the +semi global optimizations made cannot work. +.IP relabel +This routine finds the final destination for each label in the procedure. +Labels followed by unconditional branches or other labels are marked during +the peephole fase and this leeds to chains of identical labels. +These chains are followed here, and in the local label table each label +has associated with it its replacement label, after this procedure is run. +Care is taken in this routine to prevent a loop in the program to +cause the optimizer to loop. +.IP cleanlocals +This routine empties the local label table after everything +is processed. +.PP +But before this can all be done, +the backward linked list of instructions first has to be reversed, +so here comes +.NH 3 +backward.c +.PP +The routine backward has a number of functions: +.IP - +It reverses the backward linked list, making two forward linked lists, +one for the instructions and one for the pseudos. +.IP - +It notes the last occurrence of data labels in the backward linked list +and puts it in the global symbol table. +This is of course the first occurence in the procedure. +This information is needed to decide whether the symbols are global +or local to this module. +.IP - +It decides about the fragment boundaries of data blocks. +Fragments are numbered backwards starting at 3. +This is done to be able to make the type of an expression +containing a symbol equal to its fragment. +This type can then not clash with the types integer and local label. +.IP - +It allocates a rom buffer to every data label with a rom behind +it, if that rom contains only plain integers at the start. +.PP +The first thing done after process() has called backward() and some +of its own little routines is a call to the real routine, +the one that does the work the program was written for +.NH 3 +peephole.c +.PP +The first routines in peephole.c +implement a linked list for the offsets of local variables +that are candidates for a register implementation. +Several patterns use the notreg() function, +since it is forbidden to combine a load of that variable +with the load of another and +it is not allowed to take the address of that variable. +.PP +The routine peephole hashes the patterns the first time it is called +after which it doesn't do much more than calling optimize. +But first hashpatterns(). +.PP +The patterns are hashed at run time of the optimizer because of +the +.B LLP , +.B LEP , +.B SLP +and +.B SEP +instructions added to the instruction set in this optimizer. +These are first replaced everywhere in the table by the correct +replacement after which the first three instructions of the +pattern are hashed and the pattern is linked into one of the +256 linked lists. +There is a define CHK_HASH in this module that +can be set if the randomness of the hashing +function is not trusted. +.PP +The attention now shifts to optimize(). +This routine calls basicblock() for every piece of code between two labels. +It also notes which labels have another label or a branch behind them +so the relabel() routine from process.c can do something with that. +.PP +Basicblock() keeps making passes over its basic block +until no more optimizations are found. +This might be inefficient if there is a long basicblock with some +deep recursive optimization in one part of it. +The entire basic block is then scanned a lot of times just for +that one piece. +The alternative is backing up after making an optimization and running +through the same code again, but that is difficult +in a single linked list. +.PP +It hashes instructions and calls trypat() for every pattern that has +a full hash value match, +i.e. lower byte and upper byte equal. +Longest pattern is tried first. +.PP +Trypat() checks length and opcodes of the pattern. +If correct it fills the iargs[] array with argument values +and calculates the expression. +If that is also correct the work shifts to tryrepl(). +.PP +Tryrepl() generates the list of replacement instructions, +links it into the list and returns true. +Why then the name tryrepl() if it always succeeds? +Well, there is a mechanism in the optimizer, +unused until today that makes it possible to do optimizations that cannot +be described by the table. +It is possible to give a number as a replacement which will cause the +optimizer to call a routine special() to do some work. +This routine might decide not to do an optimization and return false. +.PP +The last routine that is called from process() is putline() +to write the optimized code, bringing us to +.NH 3 +tes.c +.PP +Contains the routines used by the top element size computation phase, +which is run after the peephole-optimisation. +The main routine of tes.c is tes_instr(). This looks at an instruction and +decides the size of the element on top of the stack after the instruction +is executed. When a label is defined or used, the size of the top element +is remembered for later use. When the information in consistent throuhout +the procedure, it is passed to the code generator by means of an ms_tes +message. +.NH 3 +putline.c +.PP +The major part of putline.c is the standard set of routines +that makes EM compact code. +The extra functions performed are: +.IP - +For every occurence of a global symbol it might be necessary to +output a +.B exa , +.B exp , +.B ina +or +.B inp +pseudo instruction. +That task is performed. +.IP - +The +.B lin +instructions are optimized here, +.B lni +instructions added for +.B lin +instructions and superfluous +.B lin +instructions deleted. + diff --git a/doc/proto.make b/doc/proto.make new file mode 100644 index 0000000..061640d --- /dev/null +++ b/doc/proto.make @@ -0,0 +1,161 @@ +# $Id: proto.make,v 1.4 1997/07/10 07:58:30 ceriel Exp $ + +#PARAMS do not remove this line! + +TBL=tbl +EQN=eqn +PIC=pic +REFER=refer +GRAP=grap + +RESFILES= \ + $(TARGET_HOME)/doc/toolkit.doc \ + $(TARGET_HOME)/doc/install.doc \ + $(TARGET_HOME)/doc/em.doc \ + $(TARGET_HOME)/doc/ack.doc \ + $(TARGET_HOME)/doc/v7bugs.doc \ + $(TARGET_HOME)/doc/peep.doc \ + $(TARGET_HOME)/doc/cg.doc \ + $(TARGET_HOME)/doc/ncg.doc \ + $(TARGET_HOME)/doc/regadd.doc \ + $(TARGET_HOME)/doc/LLgen.doc \ + $(TARGET_HOME)/doc/LLgen_NCER.doc \ + $(TARGET_HOME)/doc/pascal.doc \ + $(TARGET_HOME)/doc/basic.doc \ + $(TARGET_HOME)/doc/crefman.doc \ + $(TARGET_HOME)/doc/ansi_C.doc \ + $(TARGET_HOME)/doc/pcref.doc \ + $(TARGET_HOME)/doc/val.doc \ + $(TARGET_HOME)/doc/6500.doc \ + $(TARGET_HOME)/doc/i80.doc \ + $(TARGET_HOME)/doc/z80.doc \ + $(TARGET_HOME)/doc/m68020.doc \ + $(TARGET_HOME)/doc/sparc.doc \ + $(TARGET_HOME)/doc/top.doc \ + $(TARGET_HOME)/doc/ego.doc \ + $(TARGET_HOME)/doc/occam.doc \ + $(TARGET_HOME)/doc/m2ref.doc \ + $(TARGET_HOME)/doc/ceg.doc \ + $(TARGET_HOME)/doc/nopt.doc \ + $(TARGET_HOME)/doc/int.doc \ + $(TARGET_HOME)/doc/lint.doc \ + $(TARGET_HOME)/doc/install.pr \ + $(TARGET_HOME)/doc/READ_ME \ + $(TARGET_HOME)/doc/Makefile + +install: $(RESFILES) + +$(TARGET_HOME)/doc/toolkit.doc: $(SRC_HOME)/doc/toolkit.doc + cat $(SRC_HOME)/doc/toolkit.doc >$@ + +$(TARGET_HOME)/doc/install.doc: $(SRC_HOME)/doc/install.doc + $(TBL) $(SRC_HOME)/doc/install.doc >$@ + +$(TARGET_HOME)/doc/em.doc: em.X +em.X: + cd em; make "TBL="$(TBL) + +$(TARGET_HOME)/doc/ack.doc: $(SRC_HOME)/doc/ack.doc + cat $(SRC_HOME)/doc/ack.doc >$@ + +$(TARGET_HOME)/doc/v7bugs.doc: $(SRC_HOME)/doc/v7bugs.doc + cat $(SRC_HOME)/doc/v7bugs.doc >$@ + +$(TARGET_HOME)/doc/peep.doc: $(SRC_HOME)/doc/peep.doc + cat $(SRC_HOME)/doc/peep.doc >$@ + +$(TARGET_HOME)/doc/cg.doc: $(SRC_HOME)/doc/cg.doc + cat $(SRC_HOME)/doc/cg.doc >$@ + +$(TARGET_HOME)/doc/ncg.doc: $(SRC_HOME)/doc/ncg.doc + $(TBL) $(SRC_HOME)/doc/ncg.doc >$@ + +$(TARGET_HOME)/doc/regadd.doc: $(SRC_HOME)/doc/regadd.doc + cat $(SRC_HOME)/doc/regadd.doc >$@ + +$(TARGET_HOME)/doc/LLgen.doc: LLgen.X +LLgen.X: + cd LLgen; make "EQN="$(EQN) "TBL="$(TBL) "REFER="$(REFER) "GRAP="$(GRAP) + +$(TARGET_HOME)/doc/LLgen_NCER.doc: LLgen_NCER.X +LLgen_NCER.X: + cd LLgen; make "EQN="$(EQN) "TBL="$(TBL) "REFER="$(REFER) "GRAP="$(GRAP) + +$(TARGET_HOME)/doc/basic.doc: $(SRC_HOME)/doc/basic.doc + cat $(SRC_HOME)/doc/basic.doc >$@ + +$(TARGET_HOME)/doc/crefman.doc: $(SRC_HOME)/doc/crefman.doc + $(EQN) $(SRC_HOME)/doc/crefman.doc >$@ + +$(TARGET_HOME)/doc/ansi_C.doc: $(SRC_HOME)/doc/ansi_C.doc + $(TBL) $(SRC_HOME)/doc/ansi_C.doc >$@ + +$(TARGET_HOME)/doc/pcref.doc: $(SRC_HOME)/doc/pcref.doc + $(TBL) $(SRC_HOME)/doc/pcref.doc >$@ + +$(TARGET_HOME)/doc/val.doc: $(SRC_HOME)/doc/val.doc + cat $(SRC_HOME)/doc/val.doc >$@ + +$(TARGET_HOME)/doc/6500.doc: $(SRC_HOME)/doc/6500.doc + $(TBL) $(SRC_HOME)/doc/6500.doc >$@ + +$(TARGET_HOME)/doc/i80.doc: $(SRC_HOME)/doc/i80.doc + cat $(SRC_HOME)/doc/i80.doc >$@ + +$(TARGET_HOME)/doc/z80.doc: $(SRC_HOME)/doc/z80.doc + cat $(SRC_HOME)/doc/z80.doc >$@ + +$(TARGET_HOME)/doc/m68020.doc: $(SRC_HOME)/doc/m68020.doc + $(EQN) $(SRC_HOME)/doc/m68020.doc | $(TBL) >$@ + +$(TARGET_HOME)/doc/sparc.doc: sparc.X +sparc.X: + cd sparc; make "PIC="$(PIC) "TBL="$(TBL) + +$(TARGET_HOME)/doc/pascal.doc: pascal.X +pascal.X: + cd pascal; make "PIC="$(PIC) + +$(TARGET_HOME)/doc/top.doc: top.X +top.X: + cd top; make "EQN="$(EQN) "TBL="$(TBL) "REFER="$(REFER) + +$(TARGET_HOME)/doc/ego.doc: ego.X +ego.X: + cd ego; make "REFER="$(REFER) "TBL="$(TBL) + +$(TARGET_HOME)/doc/occam.doc: occam.X +occam.X: + cd occam; make "PIC="$(PIC) "TBL="$(TBL) "EQN="$(EQN) + +$(TARGET_HOME)/doc/m2ref.doc: $(SRC_HOME)/doc/m2ref.doc + cat $(SRC_HOME)/doc/m2ref.doc >$@ + +$(TARGET_HOME)/doc/ceg.doc: ceg.X +ceg.X: + cd ceg; make "PIC="$(PIC) "TBL="$(TBL) "REFER="$(REFER) + +$(TARGET_HOME)/doc/nopt.doc: $(SRC_HOME)/doc/nopt.doc + cat $(SRC_HOME)/doc/nopt.doc >$@ + +$(TARGET_HOME)/doc/int.doc: int.X +int.X: + cd int; make "TBL="$(TBL) + +$(TARGET_HOME)/doc/lint.doc: lint.X +lint.X: + cd lint; make + +$(TARGET_HOME)/doc/install.pr: $(SRC_HOME)/doc/install.pr + cat $(SRC_HOME)/doc/install.pr >$@ + +$(TARGET_HOME)/doc/READ_ME: $(SRC_HOME)/doc/READ_ME + cat $(SRC_HOME)/doc/READ_ME >$@ + +$(TARGET_HOME)/doc/Makefile: $(SRC_HOME)/doc/Makefile + cat $(SRC_HOME)/doc/Makefile >$@ + +cmp: + +clean: + -rm -f *.old Out diff --git a/doc/regadd.doc b/doc/regadd.doc new file mode 100644 index 0000000..4ef0768 --- /dev/null +++ b/doc/regadd.doc @@ -0,0 +1,131 @@ +.\" $Id: regadd.doc,v 1.5 1994/06/24 10:02:27 ceriel Exp $ +.TL +Addition of register variables to an existing table. +.NH 1 +Introduction +.PP +This is a short description of the newest feature in the +table driven code generator for the Amsterdam Compiler Kit. +It describes how to add register variables to an existing table. +This assumes a distribution of October 1983 or later. +It is not clear whether one should read this when starting with +a table for a new machine, +or waiting till the table is well debugged already. +.NH 1 +Modifications to the table itself. +.NH 2 +Register section +.PP +Just before the properties of the register one +of the following can be added: +.IP - 2 +regvar +.IP - +regvar ( pointer ) +.IP - +regvar ( loop ) +.IP - +regvar ( float ) +.LP +All register variables of one type must be of the same size, +and they may have no subregisters. +.NH 2 +Codesection +.PP +.IP - 2 +Two pseudo functions are added to the list allowed inside expressions: +.RS +.IP 1) 3 +inreg ( expr ) has as a parameter the offset of a local, +and returns 0,1 or 2: +.RS +.IP 2: 3 +if the variable is in a register. +.IP 1: +if the variable could be in a register but isn't. +.IP 0: +if the variable cannot be in a register. +.RE +.IP 2) +regvar ( expr ) returns the register associated with the variable. +Undefined if it is not in a register. +So regvar ( expr ) is defined if and only if inreg (expr ) == 2. +.RE +.IP - +It is now possible to remove() a register expression, +this is of course needed for a store into a register local. +.IP - +The return out of a procedure may now involve register restores, +so the special word 'return' in the table will invoke a user defined +function. +.NH 1 +Modifications to mach.c +.PP +If register variables are used in a table, the program +.I cgg +will define the word REGVARS during compilation of the sources. +So the following functions described here should be bracketed +by #ifdef REGVARS and #endif. +.IP - 2 +regscore(off,size,typ,freq,totyp) long off; +.br +This function should assign a score to a register variable, +the score should preferably be the estimated number of bytes +gained when it is put in a register. +Off and size are the offset and size of the variable, +typ is the type, that is reg_any, reg_pointer, reg_loop or reg_float. +Freq is the number of times it occurs statically, and totyp +is the type of the register it is planned to go into. +.br +Keep in mind that the gain should be net, that is the cost for +register save/restore sequences and the cost of initialisation +in the case of parameters should already be included. +.IP - +i_regsave() +.br +This function is called at the start of a procedure, just before +register saves are done. +It can be used to initialise some variables if needed. +.IP - +f_regsave() +.br +This function is called at end of the register save sequence. +It can be used to do the real saving if multiple register move +instructions are available. +.IP - +regsave(regstr,off,size) char *regstr; long off; +.br +Should either do the real saving or set up a table to have +it done by f_regsave. +Note that initialisation of parameters should also be done, +or planned here. +.IP - +regreturn() +.br +Should restore saved registers and return. +The function result is already in the function return area by now. +.NH 1 +Examples +.PP +Here are some examples out of the PDP 11 table +.DS +lol inreg($1)==2| | | regvar($1) | | + +lil inreg($1)==2| | | {regdef2, regvar($1)} | | + +stl inreg($1)==2| xsource2 | + remove(regvar($1)) + move(%[1],regvar($1)) | | | + +inl inreg($1)==2| | remove(regvar($1)) + "inc %(regvar($1)%)" + setcc(regvar($1)) | | | +.DE +.NH 1 +Afterthoughts. +.PP +At the time of this writing the tables for the PDP 11 and the M68000 and +the VAX are converted, in all cases the two byte wordsize versions. +No big problems have occurred, but experience has shown that it is +necessary to check the table carefully for all patterns with locals in them. +Code may be generated that uses the memoryslot the local is not in. diff --git a/doc/sparc/1 b/doc/sparc/1 new file mode 100644 index 0000000..79bb4e3 --- /dev/null +++ b/doc/sparc/1 @@ -0,0 +1,53 @@ +.In +.NH +INTRODUCTION +.NH 2 +Why an EM backend for SPARC processors? +.PP +With the introduction of SPARC-based computers like the Sun-4, a +whole new range of fast computers became readily available to the general +public. The power of large mainframes had been captured into a small +desk-top computer at only a fraction of the cost. +.PP +In the older days, a new computer used to be very hard to integrate into +the existing environment, but due to standardization in the software world +incompatibility in hardware no longer means incompatibility in software. +Programs that are written for computer A can often be run on computer B +without major modifications. Unfortunately this is not true for all software. +.PP +There will always be programs that rely on the specific +hardware of a certain computer for many different reasons. They +can be categorized as: +.IP - +poorly written programs +.IP - +programs to directly control hardware (device drivers) +.IP - +code that requires efficiency (time-critical I/O drivers) +.IP - +programs to generate code to run on the hardware (compilers) +.LP +This project for instance, the design and implementation of an EM backend +for SPARC processors, comes in the last category. +.PP +We have designed and implemented an algorithm to convert EM programs to code +that will run directly on the SPARC hardware. Henceforth, both the algorithm +and the implementation will be referred to as the EM-to-SPARC backend, +or simply: the backend. +.NH 2 +Why has nobody done this before? +.PP +Since EM was designed around 1981 and even SPARC has been around for some +years now, one may wonder why nobody has ever written an EM to SPARC backend +before. The reason is twofold. In the first place, there are some +non-trivial problems to be solved in the design phase, and secondly, +the SPARC-design combined with the lack of documentation, would surely +cost a lot of blood, sweat and tears. The absence of +clues to any of the design problems, combined with the \(em at first +glance \(em inhuman +SPARC instruction set did not make this a very attractive project. +.PP +On the other hand, these were exactly the reasons which made us take on +this particular project: it would require design skills, as well as some +hard work; a golden combination for a successful project. +.bp diff --git a/doc/sparc/2 b/doc/sparc/2 new file mode 100644 index 0000000..df29b82 --- /dev/null +++ b/doc/sparc/2 @@ -0,0 +1,109 @@ +.In +.nr H1 1 +.NH +CLOSE-UP LOOK +.NH 2 +What is EM? +.PP +As the abstract of the IR-81 rapport on EM +.[ [ +description of a machine architecture +.]] +says: \*(OQEM is a family +of intermediate languages designed for producing portable compilers.\*(CQ +Because EM is to be used on a wide range of languages and processors, +the instruction set is kept simple enough to allow easy translation to, +or interpretation on, almost any processor. Yet it is also powerful enough +to accommodate easy translation from almost any block-structured language. +.PP +Even though EM was designed in the early 1980s, it +is based on +.\" already shows strong signs of being influenced by +the (then innovative) RISC architecture. All instructions +have 0 or 1 operands, there are no fancy addressing modes as in the +68020's\*(Si move.w a3(_array,d3.w*2), -(sp)\*(So, no explicit registers, +although instructions for higher languages +such as array-operations, multiway branches (case) and +floating point operations are provided. +.PP +To fully understand the discussion in the following chapters, +the reader should at least have some knowledge of EM. +.NH 2 +What is SPARC? +.PP +According to Sun's RISC tutorial: \*(OQSun Microsystems has designed a RISC +architecture, called SPARC, and has implemented that architecture with +the Sun-4 family of supercomputing workstations and servers. SPARC stands +for Scalable Processor ARChitecture, emphasizing its applicability to +large as well as small machines.\*(CQ +.PP +In sharp contrast to EM, SPARC does have +explicit registers (31 integer and 32 floating point, all of which +are 32 bits wide) and +does not support any high level language operations: it does not even have +multiplication or division instructions. Because the SPARC design is +very straightforward, all instructions could be hard-coded (no microcode +involved) to +provided extremely high performance. All register-to-register operations +require exactly one clock cycle, and all register-to-memory and +memory-to-register operations require two clock cycles, one to retrieve +the instruction and one to access external memory. At a clock speed of +over 20 MHz this means that well over 10 VAX MIPS can be achieved: +more than 4 times the speed of a 15 MHz 68020 used in the Sun3/50. +.PP +As above, the reader should also have some general knowledge about +the SPARC processer to be able to understand the following chapters. +.NH 2 +What exactly is a (fast) backend? +.PP +To put in the simplest of ways: a (fast) backend is a set of routines to +translate EM code to code that will run 'on the metal' (for example the SPARC +processor). The distinction between full-fledged backends (code generators) +.[ [ +The table driven code generator +.]] +and fast backends (code expanders) +.[ [ +The Code Expander Generator +.]] +is related to +the compilation-time vs. run-time trade off. Code generators generate +efficient code and code expanders generate code very efficient. +For details about code expanders see also +.[ [ +The design of very fast portable compilers +.]]. +.PP +The reasons for us to implement a code expander are numerous: Our first reason to +implement a code expander, rather than a code generator was that implementing a +code expander would be hard enough already. Code generators only give +more problems and there were already enough problems to be solved. Secondly, +we knew we would never be able to compete with original SPARC compilers due +to loss of information in the frontends (see also chapter 5). By implementing +a code expander we might be able to outrun the existing compilers on a +completely different terrain: compile speed. +.PP +The third 'reason' to implement a code expander lies a little deeper and was +not discovered until we had actually started the implementation... It was only +then that we found out that for certain architectures, such as the SPARC, +the idea behind the code-expander is not necessarily inferior to that +behind a code-generator. It seems that for highly orthogonal instruction +sets it is possible to generate near optimal code without using the +code-expander. We have to say, however, that this is only true for our +optimized version of the code-expander. With the original code-expander +it would not have been possible to generate near-optimal code for the +SPARC processor. +.NH 2 +So, what are the main differences between EM and SPARC? +.PP +The main +difference between EM and SPARC is the stack versus register orientation. +The other differences, such as the presence of high level language +operations in EM, can easily be overcome by subroutines, +or small pieces of in-line SPARC code. +The design-part of this project mostly concentrates on +building a bridge between EM's stack and SPARC's registers. +.PP +In the next chapter we will make a list of all our design problems which +will then be discussed in chapter 4. +.bp diff --git a/doc/sparc/3 b/doc/sparc/3 new file mode 100644 index 0000000..b6ddce8 --- /dev/null +++ b/doc/sparc/3 @@ -0,0 +1,82 @@ +.In +.nr H1 2 +.NH +PROBLEMS +.NH 2 +Maintain SPARC speed +.PP +If we want to generate SPARC code, we should try to generate efficient code +as fast as possible. It would be quite embarrassing to find out that the +same program would run faster on a Motorola 68020 than on a SPARC processor, +when both operate at the same clock frequency. +Looking at some code generated by Sun's C-compiler and optimizing assembler, +we can spot a few remarkable characteristics of the generated SPARC code: +.IP - +There are almost no memory references +.IP - +Parameters to functions are passed through registers. +.IP - +Almost all delay slots\(dg +.FS +\(dg For details about delay slots see the SPARC Architecture Manual, chapter 4, pp. 42-48 +.FE +are filled in by the assembler +.LP +If we want to generate efficient code, we should at least try to +reduce the number of memory references and use registers wherever we can. +Since EM is stack-oriented it references its stack for every operation so +this will not be an easy task; a suitable solution will however be given in +the next chapter. +.NH 2 +Increase compilation speed +.PP +Because we will implement a code expander (fast backend) we should keep +a close eye on efficiency; if we cannot beat regular compilers on producing +efficient code we will try to beat them on fast code generation. +The usual trick to achieve fast compilation is to pack the frontend, +optimizer, code-generator and +assembler all into a single large binary to reduce the overhead of +reading and writing temporary files. Unfortunately, due to the +SPARC instruction set, its relocation information is slightly bizarre +and cannot be represented with the present primitives. +This means that it will not be possible to generate the required output +format directly from our backend. +.PP +There are three solutions here: generate assembler code, and let an +existing assembler generate the required object (\fI.o\fR) files, +create our own primitives than can handle the SPARC relocation format, or +do not use any of the addressing modes that require the bizarre relocation. +Because we have enough on our hands already we will +let the existing assembler deal with generating object files. +.NH 2 +Convert stack to register operations +.PP +As we wrote in the previous chapter, for RISC machines a code expander can +produce almost as efficient code as a code generator. The fact that this is +true for stack-oriented RISC processors is rather obvious. The problem we +face, however, is that the SPARC processor is register, instead of +stack oriented. In the next chapter we will give a suitable solution to +convert most stack accesses to register accesses. +.NH 2 +Miscellaneous +.PP +Besides performance and \fI.o\fR-compatibility there are some other +peculiarities of the SPARC processor and Sun's C-compiler (henceforth +simply called \fIcc\fR). +.PP +For some reason, the SPARC stack pointer requires alignment +on 8 bytes, so it is impossible to push a 4-byte integer on the stack +and then \*(Sisub 4, %sp\*(So\(dd. +.FS +\(dd For more information about SPARC assembler see the Sun-4 Assembly +Language Reference Manual +.FE +This too will be discussed in the next chapter, where we will take a +more in-depth look into this problem and also discuss a couple of +possible solutions. +.PP +Another thing is that \fIcc\fR usually passes the first six parameters of a +function-call through registers. To be \fI.o\fR-compatible we would have to +pass the first six parameters of each function call through registers as well. +Exactly why this is not feasible will also be discussed in the next chapter. +.bp diff --git a/doc/sparc/4 b/doc/sparc/4 new file mode 100644 index 0000000..7775e85 --- /dev/null +++ b/doc/sparc/4 @@ -0,0 +1,468 @@ +.In +.hw data-structures +.nr H1 3 +.NH +SOLUTIONS +.NH 2 +Maintaining SPARC speed +.PP +In chapter 3 we wrote: +.sp 0.3 +.nf +>If we want to generate efficient code, we should at least try to reduce the number of +>memory references and use registers wherever we can. +.fi +.sp 0.3 +In this chapter we will device a strategy to swiftly generate acceptable +code by using push-pop optimization. +Note that this is not the push-pop +optimization already available in the EM-kit, since that is only present +in the assembler-to-binary part which we do not use +.[ [ +The Code Expander Generator +.]]. +Our push-pop optimization +works more like the fake-stack described in +.[ [ +The table driven code generator +.]]. +.NH 3 +Ad-hoc optimization +.PP +Before getting involved in any optimization let's have a look at some +code generated with a straightforward EM to SPARC conversion of the +C statement: \*(Sif(a[i]);\*(So Note that \*(Si%SP\*(So is an alias +for a general purpose +register and acts as the EM stack pointer. It has nothing to do with +\*(Si%sp\*(So \(em the SPARC stack pointer. +Analogous \*(Si%LB\*(So is EMs local base pointer. +.br +.IP +.HS +.TS +; +l s l s l +l1f6 lf6 l2f6 lf6 l. +EM code SPARC code Comment + +lae _a set _a, %g1 ! load address of external _a + dec 4, %SP + st %g1, [%SP] + +lol -4 set -4, %g1 ! load local -4 (i) + ld [%g1+%LB], %g2 + dec 4, %SP + st %g2, [%SP] + +loc 2 set 2, %g1 ! load constant 2 + dec 4, %SP + st %g1, [%SP] + +sli 4 ld [%SP], %g1 ! pop shift count + ld [%SP+4], %g2 ! pop shiftee + sll %g2, %g1, %g3 + inc 4, %SP + st %g3, [%SP] ! push 4 * i + +ads 4 ld [%SP], %g1 ! add pointer and offset + ld [%SP+4], %g2 + add %g1, %g2, %g3 + inc 4, %SP + st %g3, [%SP] ! push address of _a + (4 * i) + +loi 4 ld [%SP], %g1 ! load indirect 4 bytes + ld [%g1], %g2 + st %g2, [%SP] ! push a[i] +cal _f + ... +.TE +.HS +.LP +Although the code is easy understand, it clearly is far from optimal. +The above code uses approximately 60 clock-cycles\(dg +.FS +\(dg In general each instruction only takes one cycle, +except for \*(Sild\*(So and +\*(Sist\*(So which may both require additional clock cycles. The exact amount +of extra cycles needed depends on the SPARC implementation and memory access +time. Furthermore, the +\*(Siset\*(So pseudo-instruction is a bit tricky. It takes one cycle when +its argument lies between -4096 and 4095, and two cycles otherwise. +.FE +to push an array-element on the stack, +something which a 68020 can do in a single instruction. The SPARC +processor may be fast, but not fast enough to justify the above code. +.PP +The same statement can be translated much more efficiently: +.DS +.TS +; +l2f6 lf6 l. +sll %i0, 2, %g2 ! multiply index by 4 +set _a, g3 +ld [%g2+%g3], %g1 ! get contents of a[i] +dec 4, SP +st %g2, [SP] ! push a[i] onto the stack +.TE +.DE +which, instead of 60, uses only 5 clock cycles to retrieve the element +from memory and 5 additional cycles when the result has to be pushed +on the stack. Note that when the result is not a parameter it does not +have to be pushed on the stack. By making efficient use of the SPARC +registers we can fetch \*(Sia[i]\*(So in only 5 cycles! +.NH 3 +Analyzing optimization +.PP +Instead of ad-hoc optimization we will need something more solid. +When one tries to optimize the above code in an ad-hoc manner one will +probably notice the large overhead due to stack access. Almost every EM +instruction requires at least three SPARC instructions: one to carry out +the EM instruction and two to pop and push the result from and onto the +stack. This happens for every instruction, even though the data being pushed +will probably be needed by the next instruction. To optimize this extensive +pushing and popping of data we will use the appropriately named push-pop +optimization. +.PP +The idea behind push-pop optimization is to delay the push operation until +it is almost certain that the data actually has to be pushed. +As is often the case, the data does not have to be pushed, +but will be used as input to another EM instruction. +If we can decide at compile time that this will indeed be +the case we can save the time of first pushing the data and then popping it +back again by temporarily storing the data (possibly only during compilation!) +and using it no sooner than it is actually needed. +.PP +The \*(Sisli 4\*(So instruction, for instance, expects two inputs on top of the +stack: on top a counter and right below that the shiftee (the number +to be shifted). As a result \*(Sisli\*(So +pushes 'shiftee << counter' back to the stack. Now consider the following +sequence, which could be the result of the expression \*(Si4 * i\*(So +.DS +.TS +; +l1f6 lf6 l. +lol -4 +loc 2 +sli 4 +.TE +.DE +In the non-optimized situation the \*(Silol\*(So would push +a local variable (whose offset is -4) on the stack. +Then the \*(Siloc\*(So pushes a 2 on the stack and finally \*(Sisli\*(So +retrieves both these numbers to replace then with the result. +On most machines it is not necessary to +push the 2 on the stack, since it can be used in the shift instruction +as an immediately operand. On a SPARC, for instance, one can write +.DS +.TS +; +l2f6 lf6 l. +ld [%LB-4], %g1 ! load local variable into register g1 +sll %g1, 2, %g2 ! perform the shift-left-by-2 +.TE +.DE +where the output of the \*(Silol\*(So, as well as the immediate operand 2 are used +in the shift instruction. As suggested before, all of this can be +achieved with push-pop optimization. +.NH 3 +A mechanism for push-pop optimization +.PP +To implement the above optimization we need some mechanism to +temporarily store information during compilation. +We need to be able to store, compare and retrieve information from the +temporary storage (cache) without any +loss of information. Before describing all the routines used +to implement our cache we will first describe how the cache works. +.PP +Items in the cache are structures containing an external (\*(Sichar *\*(So), +two registers (\*(Sireg_t\*(So) and a constant (\*(Siarith\*(So), +any of which may be 0. +The value of such a structure is the sum of (the values of) +its elements. To put a register in the cache, one has to be allocated either +by calling \*(Sialloc_reg\*(So which returns a free register, by +\*(Siforced_alloc_reg\*(So which allocates a specific register or any +of the other routines available to allocate a register. The keep things +simple, we will not discuss all of the available primitives here. +When the register +is then put in the cache by the \*(Sipush_reg\*(So routine, the ownership will +be transferred from the user to the cache. Ownership is important, because +only the owner of a register may (and must!) deallocate it. Registers can be +owned by either an (imaginary) register manager, the cache or the user. +When the user retrieves a register from the stack with \*(Sipop_reg\*(So for +instance, ownership is back to the user. +The user should then call \*(Sifree_reg\*(So +to transfer ownership to the register manager or call \*(Sipush_reg\*(So +to give it back to the cache. +Since the cache behaves itself as a stack we will use the term pop resp. push +to get items from, resp. put items in the cache. +.PP +We shall now present the sets of routines that implement the cache. +.IP \(bu +The routines +.DS +\*(Si +reg_t alloc_reg(void) +reg_t alloc_reg_var(void) +reg_t alloc_float(void) +reg_t alloc_float_var(void) +reg_t alloc_double(void) +reg_t alloc_double_var(void) + +void forced_alloc_reg(reg_t) +void soft_alloc_reg(reg_t) + +void free_reg(reg_t) +void free_double_reg(reg_t) +\*(So +.DE +allocate and deallocate registers. If there are no more register left, +i.e. they are owned by the cache, +one or more registers will be freed by flushing part of the cache +onto the real stack. +The \*(Sialloc_xxx_var\*(So primitives try to allocate a register that +can be used to store local variables. (In the current implementation +only the input and local registers.) If none can be found \*(SiNULL\*(So +is returned. \*(Siforced_alloc_reg\*(So forces the allocation of a certain +register. If it was already in use, its contents are moved to another +register. Finally \*(Sisoft_alloc_reg\*(So provides the possibility to +push a register onto the cache and still keep a copy for later use. +(Used to implement the \*(Sidup 4\*(So for example.) +.IP \(bu +The routines +.DS +\*(Si +void push_const(arith) +arith pop_const(void) +\*(So +.DE +push or pop a constant onto or from the stack. Distinction between +constants and other types is made so as not to loose any information; constants +may be used later on as immediate operators, which is not the case +for other types. If \*(Sipop_const\*(So is called, but the element on top of +the cache has either one of the external or register fields non-zero a +fatal error will be reported. +.IP \(bu +The routines +.DS +\*(Si +reg_t pop_reg(void) +reg_t pop_float(void) +reg_t pop_double(void) +reg_t pop_reg_c13(char *n) + +void pop_reg_as(reg_t) + +void push_reg(reg_t) +\*(So +.DE +push or pop a register. These will be used most often since results from one +EM instruction, which are computed in a register, are often used in the next. +When the element on top of the cache is more +than just a register the cache manager +will generate code to compute the sum of its fields and put the result in a +register. This register will then be given to the user. +If the user wants the result is a special register, he should use the +\*(Sipop_reg_as\*(So routine. +The \*(Sipop_reg_c13\*(So gives an optional number (as character string) whose +value can be represented in 13 bits. The constant can then be used as an +offset for the SPARC \*(Sild\*(So and \*(Sist\*(So instructions. +.IP \(bu +The routine +.DS +\*(Si +void push_ext(char *) +\*(So +.DE +pushes an external onto the stack. There is no pop-variant of this one since +there is no use in popping an external. +.IP \(bu +The routines +.DS +\*(Si +void inc_tos(arith n) +void inc_tos_reg(reg_t r) +\*(So +.DE +increment the element on top of the cache by either the constant \*(Sin\*(So +or by a register. The latter is useful for pointer addition when referencing +external memory. +.KS +.IP \(bu +The routine +.DS +\*(Si +int type_of_tos(void) +\*(So +.DE +.KE +returns the type of the element on top of the cache. This is a combination +(binary OR) of \*(SiT_ext\*(So, \*(SiT_reg\*(So or \*(SiT_float\*(So, +\*(SiT_reg2\*(So or \*(SiT_float2\*(So, and \*(SiT_cst\*(So, +and tells the +user which of the three fields are non-zero. When the register-fields +represent \*(Si%g0\*(So, it is considered zero. +.IP \(bu +Miscellaneous routines: +.DS +\*(Si +void init_cache(void) +void cache_need(int) +void change_reg(void) +void flush_cache(void) +\*(So +.DE +\*(Siinit_cache\*(So should be called before any +other cache routines, to initialize some internal datastructures. +\*(Sicache_need\*(So is used to tell the cache that a certain number +of register are needed for the next operation. This way the cache can +load them efficiently in one fell swoop. \*(Sichange_reg\*(So is to be +called when the user changes a register of which the cache (possibly) has +co-ownership. Because the contents of registers in the cache are +not allowed to change the user should call \*(Sichange_reg\*(So to +instruct the cache to copy the contents to some other register. +\*(Siflush_cache\*(So writes the cache to the stack and invalidates +the cache. It should be used before branches, +before labels and on other places where the stack has to be valid (i.e. where +every item on the EM-stack should be stored on the real stack, not in some +virtual cache). +.NH 3 +Implementing push-pop optimization in the EM_table +.PP +As indicated above, there is no regular way to represent the described +optimization in the EM_table. The only possible escapes from the EM_table +are function calls, but that is clearly not enough to implement a good +push-pop optimizer. Therefore we will use a modified version of the EM_table +format, where the description of, say, the \*(Silol\*(So instruction might look +like this\(dg: +.FS +\(dg This is not the way the \*(Silol\*(So actually looks in the EM_table; +it only shows how it \fImight\fR look using the forementioned push/pop +primitives. +.FE +.DS +\*(Si +reg_t A, B; +const_str_t n; + +alloc_reg(A); +push_reg(LB); +inc_tos($1); +B = pop_reg_c13(n); +"ld [$B+$n], $A"; +push_reg(A); +free_reg(B); +\*(So +.DE +For more details about the exact implementation consult +appendix B which contains some characteristic excerpts from the EM_table. +.NH 2 +Stack management +.PP +When converting EM code to some executable code there is the problem of +maintaining multiple stacks. The usual way to do this is described in +.[ [ +Description of a Machine Architecture +.]] +and is shown in figure \*(SN1. +.KE +.PS +copy "pics/EM_stack.orig" +.PE +.ce 1 +\fIFigure \*(SN1: usual stack management. +.KE +.sp +.LP +This means that the EM stack and the hardware stack (used +for subroutine calls, etc.) are interleaved in memory. On the SPARC, however, +this brings up a large problem: in the former model it is assumed that the +resolution of the stack pointer is a word, but this is not the case on the +SPARC processor. On the SPARC processor the stack-pointer as well as the +frame-pointer have to be aligned on 8-byte boundaries, so one can not simply +push a word on the stack and then lower the stack-pointer by 4 bytes! +.NH 3 +Possible solutions +.PP +A simple idea might be to use a swiss-cheese stack; we could +push a 4-byte word onto the stack and then lower the stack by 8. +Unfortunately, this is not a very solid solution, because +pointer-arithmetic involving pointers to objects on the stack would cause +hard-to-predict anomalies. +.PP +Another try would be not to use the hardware stack at all. As long as we +do not generate subroutine-calls everything will be all right. This +approach, however, also has some disadvantages: first we would not be able +to use any of the existing debuggers such as \fIadb\fR, because they all +assume a regular stack format. Secondly, we would not be able to make use +of the SPARC's register windows to keep local variables. Finally, doing all the +administrative work necessary for subroutine calls ourselves instead of +letting the hardware handle it for us, +causes unnecessary procedure-call overhead. +.PP +Yet another alternative would be to emulate the EM-part of the stack, +and to let the hardware handle the subroutine call. Since we will +emulate our own stack, there are no alignment restrictions and because +we will use the hardware procedure call we can still make use of +the register windows. +.NH 3 +Our implementation +.PP +To implement the hybrid stack we need two extra registers: one for the +the EM stack pointer (the forementioned \*(Si%SP\*(So) and one for the +EM local base pointer (\*(Si%LB\*(So). The most elegant solution would be to +put both stacks in different segments, so they would not influence +each other. Unfortunately +.UX +lacks the ability to add segments and +since we will implement our backend under +.UX, +we will have to put +both stacks in the same segment. Exactly how this can be done is shown +in figure \*(SN2. +.DS +.PS +copy "pics/mem_config" +.PE +.ce 1 +\fIFigure \*(SN2: our stack management.\fR +.DE +.sp +During normal procedure execution, the SPARC stack pointer has to point to +a memory location where the operating system can dump the active part of +the register window. The rest of the +register window will be dumped in the therefor pre-allocated (stack) space +by following the frame +pointer. When a signal occurs things get even more complicated and +result in figure \*(SN3. +.DS +.PS +copy "pics/signal_stack" +.PE +.ce 1 +\fIFigure \*(SN3: our signal stack.\fR +.DE +.PP +The exact implementation of the stack is shown in figure \*(SN4. +.KF +.PS +copy "pics/EM_stack.ours" +.PE +.ce 1 +\fIFigure \*(SN4: stack overview.\fR +.KE +.NH 2 +Miscellaneous +.PP +As mentioned in the previous chapter, the generated \fI.o\fR-files are +not compatible with Sun's own object format. The primary reason for +this is that Sun usually passes the first six parameters of a procedure call +through registers. If we were to do that too, we would always have +to fetch the top six words from the stack into registers, even when +the procedure would not have any parameters at all. Apart from this, +structure-passing is another exception in Sun's object format which +makes is impossible to generate object-compatible code.\(dg +.FS +\(dg Exactly how Sun passes structures as parameters is described in +Appendix D of the SPARC Architecture Manual (Software Considerations) +.FE +.bp diff --git a/doc/sparc/5 b/doc/sparc/5 new file mode 100644 index 0000000..8fde122 --- /dev/null +++ b/doc/sparc/5 @@ -0,0 +1,153 @@ +.In +.nr H1 4 +.NH +FUTURE WORK +.NH 2 +A critique of EM +.PP +In general, EM fits its purpose quite well. Numerous compilers have been +written using EM as their intermediate language and it has even become a +commercial product. A great deal of its success is probably due to its +simplicity. There are no extravagant instructions but it does have all the +necessary functions to write a decent compiler. +.PP +There are, however, a few functions that come rather close to being +extravagant. The \*(Silar\*(So function for example \(em used +to fetch an element from an array \(em does not make it much easier +to write a frontend, but does make it unnecessary hard to write an +efficient backend. Other instructions for which it is difficult +to generate efficient code for are those that permit +dynamic operators, such as the \*(Silos\*(So. Dynamic operators, however, provide +significant extra possibilities and can therefore not be disposed of. +Note that even though the array operations \*(Silar\*(So and \*(Sisar\*(So +provide dynamic operators, they do not add additional power, since +they can easily be replaced with a sequence using the \*(Silos\*(So or +\*(Sists\*(So instructions. +.PP +EM code to reference arrays generated by the C frontend can be translated +very efficiently for almost any processor. However the same operation +generated by the Modula-2 frontend (which uses the \*(Silar\*(So), +is much less efficient, although the only difference is that the +latter performs range checking whereas the former does not.\(dg +.FS +\(dg Actually this depends on whether or not explicit range checking in enabled. +This clearly shows that the current code generators are not optimal and +often depend on ad-hoc decisions. +.FE +Since range checking can also be expressed explicitly in +EM (\*(Sirck\*(So) there is no need for any of the array operations +(\*(Siaar\*(So, \*(Silar\*(So and \*(Sisar\*(So). +.PP +Besides efficiency of the array-operations themselves, there still is another +major disadvantage of using these array-operations. In sharp contrast to +all other EM instructions except the \*(Silos\*(So and the \*(Sists\*(So, +they allow dynamic operators, so their effect on the stack-pointer can not +always be +determined at compile-time. This means that efficient caching of the +top-of-stack in registers is almost impossible, +so using these array-operations also effects the +efficiency of the surrounding code. Now that processors are produced with +more and more registers it could be very beneficiary to cache the +top-of-stack, so that the memory/register reference ratio decreases +to the benefit of the overall performance. +.PP +As a final critique, we would also like to discuss the semantics of some of +the EM instructions. In +.[ [ +Description of a Machine Architecture +.]] +it is said that +all signed instructions such as the \*(Siadi\*(So, should cause an exception +on overflow. The unsigned operations such as \*(Siadu\*(So, however, +should act as modulo operations and therefor not perform overflow checking. +Since it is very expensive to perform overflow checking in EM, +we would suggest that the backend takes care of this. For languages which +do not require overflow checking, a simple message could be generated to +disable overflow checking in backends. This way all backends could be +written to fully comply to the official EM definition without any reduction in +efficiency.\(dd +.FS +\(dd Currently many backends do not implement error checks because they +are too expensive and almost never needed. Some frontends even have +facilities build in to generate EM-code to force these checks. If this +trend continues we will end up with a de-facto and a de-jure standard +both developed by the same people but nonetheless incompatible. +.FE +When such messages will be added we would like to suggest +that they can enforce overflow checks on unsigned, as well as signed arithmetic. +.PP +As a conclusion we would like to suggest removal of the array operations from +EM, or at least discontinuation of there usage in frontends. +.NH 2 +\*(OQWanted: Procedure call information\*(CQ +.PP +The advantage of an intermediate language such as EM is that the backend +no longer has to know about any 'quirks' of the 'input'-language. The major +disadvantage, however, is that the backend no longer knows about any 'quirks' +of the 'input'-language... If the SPARC backend ever has to compete +with Sun's own C-compiler for example, removal of the array-operations +will not be enough. The amount of information that is lost during +the translation to EM is too large to ever generate truly efficient SPARC code. +.PP +To write such an efficient backend one needs to know, for example, whether, +when and what type of parameter is being computed, so the result can be stored +in the proper place and scratch registers can be reused. +(On the SPARC processor, for example, it is very beneficiary +to pass the first six parameters of a procedure call through +registers instead of using the stack.) +One way to express such things in EM is to insert extra messages in +the EM-code. The C statement \*(Sia = f(4, a + b);\*(So for example, +could be translated to the following EM-code: +.DS +.TS +; +l1f6 lf6 l. +lol -4 ! a +lol -8 ! b +mes x, 2 ! next instruction will compute 2nd parameter +adi 4 +mes x, 1 ! next instruction will compute 1st parameter +loc 4 +cal _f ! call function f +lfr 4 +stl -4 ! store result in a +.TE +.DE +For a code expander it is important that the \*(Simes\*(So pseudo +instructions appear \fIbefore\fR +the EM instruction that computes the parameter, because that way the final +computation (the \*(Siadi\*(So and \*(Siloc\*(So in the previous example) +can be translated to machine code that performs the required computation +and also puts the result in the required place. If it is found to be +too difficult for the frontend to insert these \*(Simes\*(So instructions +at the right place the peep-hole optimizer might swap the \*(Simes\*(So and +the instruction that computes the parameter. +.PP +For some architectures, it is also +possible to generate more efficient code for a procedure when it is a +so-called leaf-procedure: a procedure that doesn't call other procedures. +On the SPARC, for example, it is not necessary to rotate the register +window for a call to a leaf procedure and it is also possible to use +the global registers for register variables in leaf procedures. +It will be a little harder to insert useful messages about leaf procedures, +because just as with register messages, they are only useful to the +backend when they appear immediately +after or before the \*(Sipro\*(So pseudo instruction. The frontend, +however, only knows whether a certain procedure is a leaf-procedure or not +when it has already generated the entire procedure in EM. Just as with the +\*(Sipro ? / end n\*(So-dilemma the peep-hole optimizer +.[ [ +Using Peephole Optimization +.]] +might be able to lend a hand +and help us out by delaying EM-code generation until it has reached the +end of the procedure. +.PP +As with most optimizations, the main problem is that they have to be +implemented with the \*(Simes\*(So pseudo instruction. +Because the \*(Simes\*(So instruction can have many different meanings +depending on its argument, +it is important that all optimizers recognize and respect them. Addition +of even a single message will require careful inspection of, and maybe even +incorporate small changes to each of the optimizers. +.bp diff --git a/doc/sparc/A b/doc/sparc/A new file mode 100644 index 0000000..2fc580a --- /dev/null +++ b/doc/sparc/A @@ -0,0 +1,184 @@ +.In +.SH +A. MEASUREMENTS +.SH +A.1. \*(OQThe bottom line\*(CQ +.PP +Although examples often are most illustrative, the cruel world out there is +usually more interested in everyday performance figures. To satisfy those +people too, we will present a series of measurements on our code expander +taken from (close to) real life situations. These include measurements +of compile and run times of different programs, +compiled with different compilers. +.SH +A.2. Compile time measurements +.PP +Figure A.2.1 shows compile-time measurements for typical C code: +the dhrystone benchmark\(dg +.[ [ +dhrystone +.]]. +.FS +\(dg To be certain that we only tested the compiler and not the quality of +the code in the library, we have added our own version of +\fIstrcmp\fR and \fIstrcpy\fR and have not used the ones present in the +library. +.FE +The numbers represent the duration of each separate pass of the compiler. +The numbers at the end of each bar represent the total duration of the +compilation process. As with all measurements in this chapter, the +quoted time or duration is the sum of user and system time in seconds. +.PS +copy "pics/compile_bars" +.PE +.DS +.IP cem: 6 +C to EM frontend +.IP opt: +EM peep-hole optimizer +.IP be: +EM to assembler backend +.IP cpp: +Sun's C preprocessor +.IP ccom: +Sun's C compiler +.IP iropt: +Sun's optimizer +.IP cg: +Sun's code generator +.IP as: +Sun's assembler +.IP ld: +Sun's linker +.ce 1 +\fIFigure A.2.1: compile-time measurements.\fR +.DE +.sp +.PP +A close examination of the first two bars in fig A.2.1 shows that the maximum +achievable compile-time +gain compared to \fIcc\fR is about 50% for medium-sized +programs.\(dd +.FS +\(dd (cpp+ccom+as+ld)/(cem+as+ld) = 1.53 +.FE +For small programs the gain will be less, due to the almost constant +start-up time of each pass in the compilation process. Only a +built-in assembler may increase this number up to +180% in the ideal case that the optimizer, backend and assembler +would run in zero time. Speed-ups of 5 to 10 times as mentioned in +.[ [ +fast portable compilers +.]] +are therefore not possible on the Sun-4 family. This is also due to +Sun's implementation of saving and restoring register windows. With +the current implementation in which only a single window is saved +or restored on a register-window overflow, it is very time consuming +when programs have highly dynamic stack use +due to procedure calls (as is often the case with compilers). +.PP +Although we are currently a little slower than \fIcc\fR, it is hard to +blame this on our backend. Optimizing the backend so that it would run +twice as fast would only reduce the total compilation process by +a mere 14%. +.PP +Finally it is nice to see that our push/pop-optimization, +initially designed to generate faster code, has also increased the +compilation speed. (see also figures A.4.1 and A.4.2.) +.SH +A.3. Run time performance +.PP +Figure A.3.1 shows the run-time performance of different compilers. +All results are normalized, where the best available compiler (Sun's +compiler with full optimization) is represented by 1.0 on our scale. +.PS +copy "pics/run-time_bars" +.PE +.ce 1 +\fIFigure A.3.1: run time performance.\fR +.sp 1 +.PP +The fact that our compiler behaves rather poorly compared to Sun's +compiler is due to the fact that the dhrystone benchmark uses +relatively many subroutine calls; all of which have to be 'emulated' +by our backend. +.SH +A.4. Overall performance +.LP +In the next two figures we will show the combined run and compile time +performance of 'our' compiler (the ACK C frontend and our backend) +compared to Sun's C compiler. Figure A.4.1 shows the results from +measurements on the dhrystone benchmark. +.G1 +frame invis left solid bot solid +label left "run time" "(in \(*msec/dhrystone)" +label bot "compile time (in sec)" +coord x 0,21 y 0,610 +ticks left out from 0 to 600 by 200 +ticks bot out from 0 to 20 by 5 +"\(bu" at 3.5, 1000000/1700 +"ack w/o opt" ljust at 3.5 + 1, 1000000/1700 +"\(bu" at 2.8, 1000000/8770 +"ack with opt" below at 2.8 + 0.1, 1000000/8770 +"\(bu" at 16.0, 1000000/10434 +"ack -O4" above at 16.0, 1000000/10434 +"\(bu" at 2.3, 1000000/7270 +"\fIcc\fR" above at 2.3, 1000000/7270 +"\(bu" at 9.0, 1000000/12500 +"\fIcc -O4\fR" above at 9.0, 1000000/12500 +"\(bu" at 5.9, 1000000/15250 +"\fIcc -O\fR" below at 5.9, 1000000/15250 +.G2 +.ce 1 +\fIFigure A.4.1: overall performance on dhrystones. +.sp 1 +.LP +Fortunately for us, dhrystones are not all there is. The following +figure shows the same measurements as the previous one, except +this time we took a benchmark that uses no subroutines: an implementation +of Eratosthenes' sieve: +.G1 +frame invis left solid bot solid +label left "run time" "for one run" "(in sec)" left .6 +label bot "compile time (in sec)" +coord x 0,11 y 0,21 +ticks bot out from 0 to 10 by 5 +ticks left out from 0 to 20 by 5 +"\(bu" at 2.5, 17.28 +"ack w/o opt" above at 2.5, 17.28 +"\(bu" at 1.6, 2.93 +"ack with opt" above at 1.6, 2.93 +"\(bu" at 9.4, 2.26 +"ack -O4" above at 9.4, 2.26 +"\(bu" at 1.5, 7.43 +"\fIcc\fR" above at 1.5, 7.43 +"\(bu" at 2.7, 2.02 +"\fIcc -O4\fR" ljust at 1.9, 1.2 +"\(bu" at 2.6, 2.10 +"\fIcc -O\fR" ljust at 3.1,2.5 +.G2 +.ce 1 +\fIFigure A.4.2: overall performance on Eratosthenes' sieve. +.sp 1 +.PP +Although the above figures speak for themselves, a small comment +may be in place. At first it is clear that our compiler is neither +faster than \fIcc\fR, nor produces faster code than \fIcc -O4\fR. It should +also be noted however, that we do produce better code than \fIcc\fR +at only a very small additional cost. +It is also worth noticing that push-pop optimization +increases run-time speed as well as compile speed. +The first seems rather obvious, +since optimized code is +faster code, but the increase in compile speed may come as a surprise. +The main reason is that the \fIas\fR+\fIld\fR time depends largely on the +amount of generated code, which in general +depends on the efficiency of the code. +Push-pop optimization removes a lot of useless instructions which +would otherwise +have found their way through to the assembler and the loader. +Useless instructions inserted in an early stage in the compilation +process will slow down every following stage, so elimination of useless +instructions in an early stage, even when it requires a little computational +overhead, can often be beneficial to the overall compilation speed. +.bp diff --git a/doc/sparc/B b/doc/sparc/B new file mode 100644 index 0000000..04fdadc --- /dev/null +++ b/doc/sparc/B @@ -0,0 +1,128 @@ +.In +.SH +B. IMPLEMENTATION +.SH +B.1. Excerpts from the non-optimized EM_table +.PP +Even though the non-optimized version of the EM_table is relatively +straight-forward, examples have never hurt anybody. +One of the simplest instructions is the \*(Siloc\*(So, which appears in +our EM_table as follows: +.DS +\f6 +.TA 8 16 24 32 40 48 56 64 +C_loc ==> "set $1, T1"; + "dec 4, SP"; + "st T1, [SP]". +\f1 +.DE +Just as \*(SiSP\*(So is an alias for \*(Si%l0\*(So, \*(SiT1\*(So is +an alias for \*(Si%g1\*(So. +A little more complex is the \*(Siadi\*(So which performs integer +addition. +.DS +\f6 +C_adi ==> "ld [SP], T1"; + "ld [SP+4], T2"; + "add T1, T2, T3"; + "st T3, [SP+4]; + "inc 4, SP". +\f1 +.DE +We could go on with even more complex instructions, but since that would +not contribute to anything the reader is referred to the implementation +for more details. +.SH +B.2. Excerpts from the optimized EM_table +.PP +The optimized EM_table uses the cache primitives mentioned in chapter 4. +This means that the \*(Siloc\*(So this time appears as +.DS +\f6 +C_loc ==> push_const($1). +\f1 +.DE +The \*(Silol\*(So can now be written as +.DS +\f6 +C_lol ==> push_reg(LB); + inc_tos($1); + push_const(4); + C_los(4). +\f1 +.DE +Due to the law of conservation of misery somebody has to do the dirty work. +In this case, it is the \*(Silos\*(So. To show just a small part of +the implementation of the \*(Silos\*(So: +.DS +\f6 +C_los $1 == 4 ==> + if (type_of_tos() == T_cst) { + arith size; + const_str_t n; + + size= pop_const(); + if (size <= 4) { + reg_t a; + reg_t a; + char *LD; + + switch (size) { + case 1: LD = "ldub"; break; + case 2: LD = "lduh"; break; + case 4: LD = "ld"; break; + default: arg_error("C_los", size); + } + a = pop_reg_c13(n); + b = alloc_reg(); + "$LD [$a+$n], $b"; + push_reg(b); + free_reg(a); + } else ... +\f1 +.DE +For the full implementation, the reader is again referred to the actual +implementation. Just to show how other instructions are affected +by the optimization we will show that implementation of the \*(Sitge\*(So +instruction: +.DS +\f6 +C_tge ==> { + reg_t a; + reg_t b; + + a = pop_reg(); + b = alloc_reg(); + " tst $a"; + " bge,a 1f"; + " mov 1, $b"; /* delay slot */ + " set 0, $b"; + "1:"; + free_reg(a); + push_reg(b); + }. + +\f1 +.DE +.SH +.bp +CREDITS +.PP +In order of appearance: +.TS +center; +r c l. +Original idea - Dick Grune +Design & implementation - Philip Homburg + - Raymond Michiels +Tutor - Dick Grune +Assistant Tutor - Ceriel Jacobs +Proofreading - Dick Grune + - Hans van Eck +.TE +.SH +REFERENCES +.PP +.[ +$LIST$ +.] diff --git a/doc/sparc/init b/doc/sparc/init new file mode 100644 index 0000000..ead7034 --- /dev/null +++ b/doc/sparc/init @@ -0,0 +1,20 @@ +.de In +.nr PS 12 +.nr VS 14 +.\" .fp 6 AM +.fp 6 CW +.ds Si \f6\s-1 +.ds So \f1\s+1 +.ds OQ `\h'-1p'` +.ds CQ '\h'-1p'' +.. +.de UX +.ie \\n(UX \s-1UNIX\s0\\$1 +.el \{\ +\s-1UNIX\s0\\$1\(dg +.FS +\(dg \s-1UNIX\s0 is a registered bell of AT&T Trademark Laboratories. +.FE +.nr UX 1 +.\} +.. diff --git a/doc/sparc/intro b/doc/sparc/intro new file mode 100644 index 0000000..b8e2130 --- /dev/null +++ b/doc/sparc/intro @@ -0,0 +1,23 @@ +.In +.hw de-vised +.TL +A fast backend for SPARC processors +.AU +Philip Homburg +Raymond Michiels +.AI +Dept. of Mathematics and Computer Science +Vrije Universiteit +Amsterdam, The Netherlands +.AB +The language EM is an intermediate language for use in compiler +construction. +In this paper we describe the construction of a so-called fast backend +which translates EM code to assembler for SPARC processors. +.br +Our construction deviates strongly from the usual procedure. We have +devised and implemented a virtual stack with which it is possible to +generate very acceptable code without much loss in compile time. +.AE +.PP +.bp diff --git a/doc/sparc/note_on_reg_wins b/doc/sparc/note_on_reg_wins new file mode 100644 index 0000000..c2927c1 --- /dev/null +++ b/doc/sparc/note_on_reg_wins @@ -0,0 +1,58 @@ +When developing a fast compiler for the Sun-4 series we have encountered +rather strange behavior of the Sun kernel. + +The problem is that with lots of nested procedure calls, (as +is often the case in compilers and parsers) the registers fill up which +causes a kernel trap. The kernel will then write out some of the registers +to memory to make room for another window. When returning from the nested +procedure call, just the reverse happens: yet another kernel trap so the +kernel can load the register from memory. + +Unfortunately the kernel only saves or loads a single window (= 16 register) +on each trap. This means that when calling a procedure recursively it causes +a kernel trap on almost every invocation (except for the first few). + +To illustrate this consider the following little program: + +--------------- little program ------------- +f(i) /* calls itself i times */ +int i; +{ + if (i) + f(i-1); +} + +main(argc, argv) +int argc; +char *argv[]; +{ + + + i = atoi(argv[1]); /* # loops */ + j = atoi(argv[2]); /* depth */ + + while (i--) + f(j); +} +------------ end of little program ----------- + + +The performance decreases abruptly when the depth (j) becomes larger +than 5. On a SPARC station we got the following results: + + depth run time (in seconds) + + 1 0.5 + 2 0.8 + 3 1.0 + 4 1.4 <- from here on it's +6 seconds for each + 5 7.6 step deeper. + 6 13.9 + 7 19.9 + 8 26.3 + 9 32.9 + +Things would be a lot better when instead of just 1, the kernel would +save or restore 4 windows (= 64 registers = 50% on our SPARC stations). + + -Raymond. diff --git a/doc/sparc/pics/EM_stack.orig b/doc/sparc/pics/EM_stack.orig new file mode 100644 index 0000000..7cae3f3 --- /dev/null +++ b/doc/sparc/pics/EM_stack.orig @@ -0,0 +1,34 @@ +.PS +.ps -2 +.vs -2 +boxwid = 1.5; +boxht = 0.24 +down; +box "actual parameter n-1"; +box "." "." "." ht 0.6; +box "actual parameter 0"; +move 0.3 +box "return status block"; +{arrow <- right with .w at last box.e; \ +box invis wid 0.3 "LB" } +down +move to 2nd last box.s +move 0.1 +box "local variables" +box "compiler temporaries" +move 0.1 +box "register save block" +move 0.1 +box "dynamic local generators" +move 0.1 +box "operand" +box "operand" +move 0.1 +box "parameter m-1" +box "." "." "." ht 0.6; +box "parameter 0" with .n at last box .s +{ arrow <- right with .w at last box.e; \ +box invis wid 0.3 "SP" } +.ps +2 +.vs +2 +.PE diff --git a/doc/sparc/pics/EM_stack.ours b/doc/sparc/pics/EM_stack.ours new file mode 100644 index 0000000..260f2c6 --- /dev/null +++ b/doc/sparc/pics/EM_stack.ours @@ -0,0 +1,106 @@ +.ps 10 +.vs 12 +.PS +boxwid = 1.3 +boxht = 0.25 +down; +box "floating point" "register dump area" ht 0.6 +box "tmp float store" +box "register dump area" ht 0.6 +{ arrow <- right with .w at 3/4 ; \ +box invis wid 0.3 "%fp" } +move .1 +box dotted "gap" +{ arrow <- right with .w at last box.e; \ +box invis wid 0.3 "%LB" } +move .1 +box "locals" +box "actual parameter n-1"; +box "." "." "." ht 0.6; +box "actual parameter 0"; +{ arrow <- right with .w at last box.e; \ +box invis wid 0.3 "%SP" } +move 0.1 +box "large gap" "(>64kb)" ht 1.0 +box "register dump area" ht 0.6 +{ arrow <- right with .w at 3/4 ; \ +box invis wid 0.3 "%sp" } +move 0.2 +box invis "\\s+2just before call\\s0" +move 1 +box dotted "gap" +box invis "0 or 4 bytes" "for stack alignment" with .w at last box.e +box invis height .7 "when gap is 0 bytes," "%fp == %LB" with .n at 2nd last box.s +.PF +.PS +down; +move to 2.4,0 +box "floating point" "register dump area" ht 0.6 +box "tmp float store" +box "register dump area" ht 0.6 +{ arrow <- right with .w at 3/4 ; \ +box invis wid 0.3 "%fp" } +move .1 +box dotted "gap" +{ arrow <- right with .w at last box.e; \ +box invis wid 0.3 "%LB" } +move .1 +box "locals" +box "actual parameter n-1"; +box "." "." "." ht 0.6; +box "actual parameter 0"; +{ arrow <- right with .w at last box.e; \ +box invis wid 0.3 "%SP" } +move .1 +box dotted "gap" +move .4 +box "floating point" "register dump area" ht 0.6 +box "tmp float store" +box "register dump area" ht 0.6 +{ arrow <- right with .w at 3/4 ; \ +box invis wid 0.3 "%sp" } +move 0.2 +box invis "\\s+2'during' call\\s0" +.PF +.PS +down; +move to 4.8,0 +box "floating point" "register dump area" ht 0.6 +box "tmp float store" +box "register dump area" ht 0.6 +move .1 +box dotted "gap" +move .1 +box "locals" +box "actual parameter n-1"; +box "." "." "." ht 0.6; +box "actual parameter 0"; +move .1 +box dotted "gap" +move .4 +box "floating point" "register dump area" ht 0.6 +box "tmp float store" +box "register dump area" ht 0.6 +{ arrow <- right with .w at 3/4 ; \ +box invis wid 0.3 "%fp" } +move .1 +box dotted "gap" +{ arrow <- right with .w at last box.e; \ +box invis wid 0.3 "%LB" } +move .1 +box "locals" +box "actual parameter n-1"; +box "." "." "." ht 0.6; +box "actual parameter 0"; +{ arrow <- right with .w at last box.e; \ +box invis wid 0.3 "%SP" } +move 0.1 +box "large gap" "(>64kb)" ht 1.0 +box "register dump area" ht 0.6 +{ arrow <- right with .w at 3/4 ; \ +box invis wid 0.3 "%sp" } +move 0.2 +box invis "\\s+2after call\\s0" +.PF +.ps 12 +.vs 14 diff --git a/doc/sparc/pics/compile_bars b/doc/sparc/pics/compile_bars new file mode 100644 index 0000000..657a418 --- /dev/null +++ b/doc/sparc/pics/compile_bars @@ -0,0 +1,49 @@ +.PS +boxht = 0.5 +boxwid = 1 +moveht = 0.65 +down; +{ +right; +box invis "ACK" "w/o" "opt" +box "cem" "0.7" wid 0.7 +box "opt" "0.4" wid 0.4 +box "be" "1.1" wid 1.1 +box "as" "1.4" wid 1.4 +box "ld" "0.4" wid 0.4 +box invis "4.0" wid 0.5 +} +move +{ +right; +box invis "ACK" "with" "opt" +box "cem" "0.7" wid 0.7 +box "opt" "0.4" wid 0.4 +box "be" "0.6" wid 0.6 +box "as" "0.7" wid 0.7 +box "ld" "0.4" wid 0.4 +box invis "2.8" wid 0.5 +} +move +{ +right; +box invis "\fIcc\fR" +box "cpp" "0.2" wid 0.2 +box "ccom" "1.0" wid 1.0 +box "as" "0.7" wid 0.7 +box "ld" "0.4" wid 0.4 +box invis "2.3" wid 0.5 +} +move +{ +right; +box invis "\fIcc -O4\fR" +box "cpp" "0.2" wid 0.2 +box "ccom" "1.0" wid 1.0 +box "iropt" "5.0 (not to scale!)" wid 1.5 +box "cg" "0.7" wid 0.7 +box "as" "1.7" wid 1.7 +box "ld" "0.4" wid 0.4 +box invis "9.0" wid 0.5 +} +.PE diff --git a/doc/sparc/pics/mem_config b/doc/sparc/pics/mem_config new file mode 100644 index 0000000..0ad8818 --- /dev/null +++ b/doc/sparc/pics/mem_config @@ -0,0 +1,34 @@ +.PS +boxwid = 1.3 +down +[ +right +[ +down; +box "stack" ht .6 +box "free" ht 1 +box "heap" ht .3 +box "text" ht .5 +] +move 1 +[ +down; +box "\s-4SPARC stack\s+4" ht .2 +box "\s-4EM stack\s+4" ht .1 +box "\s-4SPARC stack\s+4" ht .1 +box "\s-4EM stack\s+4" ht .1 +box "\s-4free\s+4" ht .2 +box "\s-4SPARC stack\s+4" ht .1 +box "free" ht .8 +box "heap" ht .3 +box "text" ht .5 +] +] +move .3 +[ +right +box invis "regular \(UX memory layout" +move 1 +box invis "memory layout for EM" +] +.PF diff --git a/doc/sparc/pics/perf b/doc/sparc/pics/perf new file mode 100644 index 0000000..a48965e --- /dev/null +++ b/doc/sparc/pics/perf @@ -0,0 +1,12 @@ +.G1 +frame invis left solid bot solid +label left "run time" "(log scale)" left .5 +label bot "compile time (log scale)" +coord x 0.1,10 log x y 1000,20000 log y +ticks left out at 2000,5000,10000,20000 +ticks bot out at 0.1 0.3 1.0 3.0 10 +copy "perf.d" thru X + "\(bu" at $1, $2 + "$3" rjust at $1, $2 +X +.G2 diff --git a/doc/sparc/pics/perf.comp b/doc/sparc/pics/perf.comp new file mode 100644 index 0000000..761fd06 --- /dev/null +++ b/doc/sparc/pics/perf.comp @@ -0,0 +1,7 @@ +in-line in ../A + +2.5 17.28 ack w/o opt +1.6 2.93 ack with opt +9.4 2.26 ack -O4 +1.5 7.43 \fIcc\fR +2.7 2.02 \fIcc -O4\fR diff --git a/doc/sparc/pics/perf.d b/doc/sparc/pics/perf.d new file mode 100644 index 0000000..9cf4081 --- /dev/null +++ b/doc/sparc/pics/perf.d @@ -0,0 +1,4 @@ +1.0 1700 ack w/o opt +1.9 8000 ack with opt +1.6 8000 \fIcc\fR +7 18000 \fIcc -O4\fR diff --git a/doc/sparc/pics/perf.dhry b/doc/sparc/pics/perf.dhry new file mode 100644 index 0000000..8faa4e3 --- /dev/null +++ b/doc/sparc/pics/perf.dhry @@ -0,0 +1,7 @@ +in-line in ../A + +3.5 1700 ack w/o opt +2.8 8770 ack with opt +16.0 10434 ack -O4 +2.3 7270 \fIcc\fR +9.0 12500 \fIcc -O4\fR diff --git a/doc/sparc/pics/reg_layout b/doc/sparc/pics/reg_layout new file mode 100644 index 0000000..58ddd92 --- /dev/null +++ b/doc/sparc/pics/reg_layout @@ -0,0 +1,24 @@ +.nr PS 12 +.nr VS 14 +.PP +.TS +allbox; +l l l l +l2f6 l l2f6 l. +g0 0 l0 EM_SP +g1 temporary 1 l1 EM_LB +g2 temporary 2 l2 +g3 temporary 3 l3 reserved +g4 64k..1M l4 reserved +g5 temporary 4 l5 reserved +g6 line number l6 reserved +g7 file name l7 reserved +o0 param 1 i0 +o1 param 2 i1 +o2 param 3 i2 +o3 param 4 i3 +o4 RETL_LD i4 RETL_ST +o5 RETH_LD i5 RETH_ST +sp stack pointer fp frame pointer +o7 xxx i7 return address +.TE diff --git a/doc/sparc/pics/run-time_bars b/doc/sparc/pics/run-time_bars new file mode 100644 index 0000000..cf4d29f --- /dev/null +++ b/doc/sparc/pics/run-time_bars @@ -0,0 +1,101 @@ +.PS +boxht = 0.5 +boxwid = 1 +moveht = 1 +down; +{ +right; +box invis "ACK" "w/o" "opt." +move +[ +down; +boxht = 0.25 +box wid 4.5 +"Sieve" ljust at last box.w + 0.1,-0.02 +"10(!)" ljust at last box.e + 0.1,-0.02 +box wid 4.5 with .nw at last box.sw +"Dhrystones" ljust at last box.w + 0.1,-0.02 +"10(!)" ljust at last box.e + 0.1,-0.02 +] with .w at last box.e +} +move +{ +right; +box invis "ACK" "with" "our" "opt." +move +[ +down; +boxht = 0.25 +box wid 1.4 +"Sieve" ljust at last box.w + 0.1,-0.02 +"1.4" ljust at last box.e + 0.1,-0.02 +box wid 1.9 with .nw at last box.sw +"Dhrystones" ljust at last box.w + 0.1,-0.02 +"1.9" ljust at last box.e + 0.1,-0.02 +] with .w at last box.e +} +move +{ +right; +box invis "ACK" "-O4" +move +[ +down; +boxht = 0.25 +box wid 1.1 +"Sieve" ljust at last box.w + 0.1,-0.02 +"1.1" ljust at last box.e + 0.1,-0.02 +box wid 1.6 with .nw at last box.sw +"Dhrystones" ljust at last box.w + 0.1,-0.02 +"1.6" ljust at last box.e + 0.1,-0.02 +] with .w at last box.e +} +move +{ +right; +box invis "Sun's" "compiler" "w/o opt." +move +[ +down; +boxht = 0.25 +box wid 3.7 +"Sieve" ljust at last box.w + 0.1,-0.02 +"3.7" ljust at last box.e + 0.1,-0.02 +box wid 2.2 with .nw at last box.sw +"Dhrystones" ljust at last box.w + 0.1,-0.02 +"2.2" ljust at last box.e + 0.1,-0.02 +] with .w at last box.e +} +move +{ +right; +box invis "Sun's" "compiler" "-O" +move +[ +down; +boxht = 0.25 +box wid 1.1 +"Sieve" ljust at last box.w + 0.1,-0.02 +"1.1" ljust at last box.e + 0.1,-0.02 +box wid 0.8 with .nw at last box.sw +"Dhryst." ljust at last box.w + 0.1,-0.02 +"0.8!" ljust at last box.e + 0.1,-0.02 +] with .w at last box.e +} +move +{ +right; +box invis "Sun's" "compiler" "-O4" +move +[ +down; +boxht = 0.25 +box wid 1.0 +"Sieve" ljust at last box.w + 0.1,-0.02 +"1.0" ljust at last box.e + 0.1,-0.02 +box wid 1.0 with .nw at last box.sw +"Dhrystones" ljust at last box.w + 0.1,-0.02 +"1.0" ljust at last box.e + 0.1,-0.02 +] with .w at last box.e +} +.PE diff --git a/doc/sparc/pics/run-time_bars.bup b/doc/sparc/pics/run-time_bars.bup new file mode 100644 index 0000000..6bb014d --- /dev/null +++ b/doc/sparc/pics/run-time_bars.bup @@ -0,0 +1,100 @@ +.PS +boxht = 0.5 +boxwid = 1 +moveht = 1 +down; +{ +right; +box invis "ACK" "w/o" "opt" +move +[ +down; +boxht = 0.25 +box wid 4.5 +"C (arithmetic)" ljust at last box.w + 0.1,-0.02 +"10(!)" ljust at last box.e + 0.1,-0.02 +box wid 4.5 with .nw at last box.sw +"C (dhrystones)" ljust at last box.w + 0.1,-0.02 +"10(!)" ljust at last box.e + 0.1,-0.02 +box wid 4.5 with .nw at last box.sw +"Modula-2" ljust at last box.w + 0.1,-0.02 +"8(!)" ljust at last box.e + 0.1,-0.02 +] with .w at last box.e +} +move +{ +right; +box invis "ACK" "with" "peep-hole" "opt" +move +[ +down; +boxht = 0.25 +box wid 1.4 +"C (arithmetic)" ljust at last box.w + 0.1,-0.02 +"1.4" ljust at last box.e + 0.1,-0.02 +box wid 1.9 with .nw at last box.sw +"C (dhrystones)" ljust at last box.w + 0.1,-0.02 +"1.9" ljust at last box.e + 0.1,-0.02 +box wid 2.5 with .nw at last box.sw +"Modula-2" ljust at last box.w + 0.1,-0.02 +"2.5" ljust at last box.e + 0.1,-0.02 +] with .w at last box.e +} +move +{ +right; +box invis "ACK" "-O4" +move +[ +down; +boxht = 0.25 +box wid 1.1 +"C (arithmetic)" ljust at last box.w + 0.1,-0.02 +"1.1" ljust at last box.e + 0.1,-0.02 +box wid 1.6 with .nw at last box.sw +"C (dhrystones)" ljust at last box.w + 0.1,-0.02 +"1.6" ljust at last box.e + 0.1,-0.02 +box wid 2.5 with .nw at last box.sw +"Modula-2" ljust at last box.w + 0.1,-0.02 +"2.5" ljust at last box.e + 0.1,-0.02 +] with .w at last box.e +} +move +{ +right; +box invis "Sun's" "compiler" "w/o opt." +move +[ +down; +boxht = 0.25 +box wid 3.7 +"C (arithmetic)" ljust at last box.w + 0.1,-0.02 +"3.7" ljust at last box.e + 0.1,-0.02 +box wid 2.2 with .nw at last box.sw +"C (dhrystones)" ljust at last box.w + 0.1,-0.02 +"2.2" ljust at last box.e + 0.1,-0.02 +box wid 1.8 with .nw at last box.sw +"Modula-2" ljust at last box.w + 0.1,-0.02 +"1.8" ljust at last box.e + 0.1,-0.02 +] with .w at last box.e +} +move +{ +right; +box invis "Sun's" "compiler" "-O4" +move +[ +down; +boxht = 0.25 +box wid 1.0 +"C (arith.)" ljust at last box.w + 0.1,-0.02 +"1.0" ljust at last box.e + 0.1,-0.02 +box wid 1.0 with .nw at last box.sw +"C (dhryst.)" ljust at last box.w + 0.1,-0.02 +"1.0" ljust at last box.e + 0.1,-0.02 +box wid 1.0 with .nw at last box.sw +"Modula-2" ljust at last box.w + 0.1,-0.02 +"1.0" ljust at last box.e + 0.1,-0.02 +] with .w at last box.e +} +.PE diff --git a/doc/sparc/pics/signal_stack b/doc/sparc/pics/signal_stack new file mode 100644 index 0000000..6afe5ad --- /dev/null +++ b/doc/sparc/pics/signal_stack @@ -0,0 +1,42 @@ +.PS +boxwid = 1.3 +down +[ +right +[ +down; +box "\s-4SPARC stack\s+4" ht .2 +box "\s-4EM stack\s+4" ht .1 +box "\s-4SPARC stack\s+4" ht .1 +box "\s-4EM stack\s+4" ht .1 +box "\s-4free\s+4" ht .2 +box "\s-4SPARC stack\s+4" ht .1 +box "free" ht .8 +box "heap" ht .3 +box "text" ht .5 +] +move 1 +[ +down; +box "\s-4SPARC stack\s+4" ht .2 +box "\s-4EM stack\s+4" ht .1 +box "\s-4SPARC stack\s+4" ht .1 +box "\s-4EM stack\s+4" ht .1 +box "\s-4free\s+4" ht .2 +box "\s-4SPARC stack\s+4" ht .1 +box "\s-4EM stack\s+4" ht .1 +box "\s-4free\s+4" ht .2 +box "\s-4SPARC stack\s+4" ht .1 +box "free" ht .4 +box "heap" ht .3 +box "text" ht .5 +] +] +move .3 +[ +right +box invis "before signal" +move 1 +box invis "during (1st) signal" +] +.PF diff --git a/doc/sparc/proto.make b/doc/sparc/proto.make new file mode 100644 index 0000000..e46464a --- /dev/null +++ b/doc/sparc/proto.make @@ -0,0 +1,32 @@ +# $Id: proto.make,v 1.3 1994/06/24 10:06:10 ceriel Exp $ + +#PARAMS do not remove this line! + +SRC_DIR = $(SRC_HOME)/doc/sparc +TARGET = $(TARGET_HOME)/doc/sparc.doc + +REFER=refer +TBL=tbl +PIC=pic +GRAP=grap + +SRC = $(SRC_DIR)/refs \ + $(SRC_DIR)/init \ + $(SRC_DIR)/title \ + $(SRC_DIR)/intro \ + $(SRC_DIR)/1 \ + $(SRC_DIR)/2 \ + $(SRC_DIR)/3 \ + 4 \ + $(SRC_DIR)/5 \ + A \ + $(SRC_DIR)/B + +$(TARGET): $(SRC) + $(REFER) -sA+T '-l\", ' -p $(SRC) | $(GRAP) | $(PIC) | $(TBL) > $@ + +4: $(SRC_DIR)/4 + sed 's^pics/^$(SRC_DIR)/pics/^' < $(SRC_DIR)/4 > 4 + +A: $(SRC_DIR)/A + sed 's^pics/^$(SRC_DIR)/pics/^' < $(SRC_DIR)/A > A diff --git a/doc/sparc/refs b/doc/sparc/refs new file mode 100644 index 0000000..ba46c3b --- /dev/null +++ b/doc/sparc/refs @@ -0,0 +1,185 @@ +%T The design of very fast portable compilers +%A A.S. Tanenbaum +%A M.F. Kaashoek +%A K.G. Langendoen +%A C.J.H. Jacobs +%J SIGPLAN Notices +%V 24 +%N 11 +%P 125-131 +%D November 1989 + +%T A Programmer-friendly LL(1) Parser Generator +%A D. Grune +%A C.J.H. Jacobs +%J Software \- Practice and Experience +%V 18 +%N 1 +%P 29-38 +%D January 1988 + +%T The Code Expander Generator +%A Frans Kaashoek +%A Koen Langendoen +%R IM-9 +%I Vrije Universiteit, Amsterdam +%D November 1987 + +%T The ACK Pascal Compiler +%A Aad Geudeke +%A Frans Hofmeester +%R IM-8 +%I Vrije Universiteit, Amsterdam +%D November 1987 + +%T The EM-interpreter +%A Eddo de Groot +%A Leo van den Berge +%R IM-7 +%I Vrije Universiteit, Amsterdam +%D June 1987 + +%T A set of multi\-process primitives for stack based machines +%A K. Bot +%A E. Scheffer +%R IR-122 +%I Vrije Universiteit, Amsterdam +%D December 1986 + +%T An Occam Compiler +%A K. Bot +%A E. Scheffer +%R IM-6 +%I Vrije Universiteit, Amsterdam +%D December 1986 + +%T Language- and Machine-independent Global Optimization on Intermediate Code +%A H.E. Bal +%A A.S. Tanenbaum +%J Computer Languages +%V 11 +%N 2 +%P 105-121 +%D April 1986 + +%T The ACK Target Optimizer +%A H.E. Bal +%R IR-107 +%D 1985 +%I Vrije Universiteit, Amsterdam + +%T Some Topics in Parser Generation +%A C.J.H. Jacobs +%R IR-105 +%D October 1985 +%I Vrije Universiteit, Amsterdam + +%T The CEM compiler +%A E.H. Baalbergen +%A D. Grune +%A M. Waage +%R IM-4 +%I Vrije Universiteit, Amsterdam +%D 1985 + +%T The Design and Implementation of the EM Global Optimizer +%A H.E. Bal +%I Vrije Universiteit, Amsterdam +%R IR-99 +%D March 1985 + +%T Does anybody out there want to write HALF of a compiler? +%A A.S. Tanenbaum +%A E.G. Keizer +%A H. van Staveren +%J Sigplan Notices +%V 19 +%N 8 +%P 106-108 +%D August 1984 + +%T Amsterdam Compiler Kit documentation +%A A.S. Tanenbaum et. al. +%I Vrije Universiteit, Amsterdam +%R IR-90 +%D June 1984 + +%T A Practical Toolkit for Making Portable Compilers +%A A. S. Tanenbaum +%A H. van Staveren +%A E. G. Keizer +%A J. W. Stevenson +%J Communications of the ACM +%V 26 +%N 9 +%P 654-660 +%D September 1983 + +%T Description of a Machine Architecture for use with Block Structured +Languages +%A A. S. Tanenbaum +%A H. van Staveren +%A E. G. Keizer +%A J. W. Stevenson +%R IR-81 +%D August 1983 +%I Vrije Universiteit, Amsterdam + +%T A Unix Toolkit for Making Portable Compilers +%A A.S. Tanenbaum +%A H. van Staveren +%A E.G. Keizer +%A J.W. Stevenson +%J Proceedings USENIX conf. +%C Toronto, Canada +%V 26 +%D July 1983 +%P 255-261 + +%T Using Peephole Optimization on Intermediate Code +%A A.S. Tanenbaum +%A J.M. van Staveren +%A J.W. Stevenson +%J TOPLAS +%V 4 +%N 1 +%P 21-36 +%D January 1982 + +%T EM-1 Compiler +%A A.S. Tanenbaum +%J Pascal News +%D September 1981 +%P 4-38 + +%T A portable compiler for the Proposed ISO Standard Pascal Language +%A A.S. Tanenbaum +%A J.W. Stevenson +%A H. van Staveren +%J Sigplan Notices +%V 15 +%N 10 +%D 1980 + +%T Implications of Structured Programming for Machine Architecture +%A A.S. Tanenbaum +%J CACM +%V 21 +%N 3 +%P 237-246 +%D March 1978 + +%T The table driven code generator from the Amsterdam Compiler Kit (Second +revised edition) +%A H. van Staveren +%I Vrije Universiteit, Amsterdam +%R on-line internal ACK documentation +%D early 1985 + +%T Dhrystone Benchmark: Rationale for Version 2 and Measurement Rules +%A R.P. Weicker +%J Sigplan Notices +%V 23 +%N 8 +%D august 1988 +%P 49-62 diff --git a/doc/sparc/timing b/doc/sparc/timing new file mode 100644 index 0000000..9887db7 --- /dev/null +++ b/doc/sparc/timing @@ -0,0 +1,22 @@ + DHRYSTONES V2.0 + + cc cc -O4 cc -O fccO fccCE ack ack -O4 +compile time: + real 4.0 12.0 10.0 6.4 8.0 31.0 + user 1.6 7.3 4.1 1.9 1.8 2.0 9.3 + sys 0.9 2.1 1.8 2.5 1.5 2.0 7.7 + +run time: 7263 16250 15250 4730 3430 8474 10434 +(stones/sec) + + SIEVE + + cc cc -O4 fccO fccCE ack ack -O4 +compile time: + real 2.4 4.4 x 3.3 6.4 17.0 + user 0.8 1.6 x 0.7 0.7 3.2 + sys 0.7 1.0 x 0.8 1.3 6.2 + +run time: 7.43 2.02 x 12.18 2.93 2.26 + +All ack-derived compilers are shell script driven diff --git a/doc/sparc/title b/doc/sparc/title new file mode 100644 index 0000000..f073f45 --- /dev/null +++ b/doc/sparc/title @@ -0,0 +1,15 @@ +.In +.TL +.sp 1.2c +A fast backend for SPARC processors +.AU +Philip Homburg +Raymond Michiels +.AI +Dept. of Mathematics and Computer Science +Vrije Universiteit +Amsterdam, The Netherlands +.PP +.sp 1i +Afstudeerverslag, 20 augustus 1990 +.bp diff --git a/doc/toolkit.doc b/doc/toolkit.doc new file mode 100644 index 0000000..b372de6 --- /dev/null +++ b/doc/toolkit.doc @@ -0,0 +1,896 @@ +.\" $Id: toolkit.doc,v 1.4 1994/06/24 10:02:30 ceriel Exp $ +.RP +.ND July 1984 +.tr ~ +.ds as * +.TL +A Practical Tool Kit for Making Portable Compilers +.AU +Andrew S. Tanenbaum +Hans van Staveren +E. G. Keizer +Johan W. Stevenson +.AI +Mathematics Dept. +Vrije Universiteit +Amsterdam, The Netherlands +.AB +The Amsterdam Compiler Kit is an integrated collection of programs designed to +simplify the task of producing portable (cross) compilers and interpreters. +For each language to be compiled, a program (called a front end) +must be written to +translate the source program into a common intermediate code. +This intermediate code can be optimized and then either directly interpreted +or translated to the assembly language of the desired target machine. +The paper describes the various pieces of the tool kit in some detail, as well +as discussing the overall strategy. +.sp +Keywords: Compiler, Interpreter, Portability, Translator +.sp +CR Categories: 4.12, 4.13, 4.22 +.sp 12 +Author's present addresses: + A.S. Tanenbaum, H. van Staveren, E.G. Keizer: Mathematics + Dept., Vrije Universiteit, Postbus 7161, 1007 MC Amsterdam, + The Netherlands + + J.W. Stevenson: NV Philips, S&I, T&M, Building TQ V5, Eindhoven, + The Netherlands +.AE +.NH 1 +Introduction +.PP +As more and more organizations acquire many micro- and minicomputers, +the need for portable compilers is becoming more and more acute. +The present situation, in which each hardware vendor provides its own +compilers -- each with its own deficiencies and extensions, and none of them +compatible -- leaves much to be desired. +The ideal situation would be an integrated system containing a family +of (cross) compilers, each compiler accepting a standard source language and +producing code for a wide variety of target machines. +Furthermore, the compilers should be compatible, so programs written in +one language can call procedures written in another language. +Finally, the system should be designed so as to make adding new languages +and new machines easy. +Such an integrated system is being built at the Vrije Universiteit. +Its design and implementation is the subject of this article. +.PP +Our compiler building system, which is called the "Amsterdam Compiler Kit" +(ACK), can be thought of as a "tool kit." +It consists of a number of parts that can be combined to form compilers +(and interpreters) with various properties. +The tool kit is based on an idea (UNCOL) that was first suggested in 1960 +[7], but which never really caught on then. +The problem which UNCOL attempts to solve is how to make a compiler for +each of +.I N +languages on +.I M +different machines without having to write +.I N +x +.I M +programs. +.PP +As shown in Fig. 1, the UNCOL approach is to write +.I N +"front ends," each +of which translates one source language to a common intermediate language, +UNCOL (UNiversal Computer Oriented Language), and +.I M +"back ends," each +of which translates programs in UNCOL to a specific machine language. +Under these conditions, only +.I N ++ +.I M +programs must be written to provide all +.I N +languages on all +.I M +machines, instead of +.I N +x +.I M +programs. +.PP +Various researchers have attempted to design a suitable UNCOL +[2,8], but none of these have become popular. +It is our belief that previous attempts have failed because they have been +too ambitious, that is, they have tried to cover all languages +and all machines using a single UNCOL. +Our approach is more modest: we cater only to algebraic languages +and machines whose memory consists of 8-bit bytes, each with its own address. +Typical languages that could be handled include +Ada, ALGOL 60, ALGOL 68, BASIC, C, FORTRAN, +Modula, Pascal, PL/I, PL/M, PLAIN, and RATFOR, +whereas COBOL, LISP, and SNOBOL would be less efficient. +Examples of machines that could be included are the Intel 8080 and 8086, +Motorola 6800, 6809, and 68000, Zilog Z80 and Z8000, DEC PDP-11 and VAX, +and IBM 370 but not the Burroughs 6700, CDC Cyber, or Univac 1108 (because +they are not byte-oriented). +With these restrictions, we believe the old UNCOL idea can be used as the +basis of a practical compiler-building system. +.KF +.sp 15P +.ce 1 +Fig. 1. The UNCOL model. +.sp +.KE +.NH 1 +An Overview of the Amsterdam Compiler Kit +.PP +The tool kit consists of eight components: +.sp + 1. The preprocessor. + 2. The front ends. + 3. The peephole optimizer. + 4. The global optimizer. + 5. The back end. + 6. The target machine optimizer. + 7. The universal assembler/linker. + 8. The utility package. +.sp +.PP +A fully optimizing compiler, +depicted in Fig. 2, has seven cascaded phases. +Conceptually, each component reads an input file and writes a +transformed output file to be used as input to the next component. +In practice, some components may use temporary files to allow multiple +passes over the input or internal intermediate files. +.KF +.sp 12P +.ce 1 +Fig. 2. Structure of the Amsterdam Compiler Kit. +.sp +.KE +.PP +In the following paragraphs we will briefly describe each component. +After this overview, we will look at all of them again in more detail. +A program to be compiled is first fed into the (language independent) +preprocessor, which provides a simple macro facility, +and similar textual facilties. +The preprocessor's output is a legal program in one of the programming +languages supported, whereas the input is a program possibly augmented +with macros, etc. +.PP +This output goes into the appropriate front end, whose job it is to +produce intermediate code. +This intermediate code (our UNCOL) is the machine language for a simple +stack machine called EM (Encoding Machine). +A typical front end might build a parse tree from the input, and then +use the parse tree to generate EM code, which is similar to reverse Polish. +In order to perform this work, the front end has to maintain tables of +declared variables, labels, etc., determine where to place the +data structures in memory, and so on. +.PP +The EM code generated by the front end is fed into the peephole optimizer, +which scans it with a window of a few instructions, replacing certain +inefficient code sequences by better ones. +Such a search is important because EM contains instructions to handle +numerous important special cases efficiently +(e.g., incrementing a variable by 1). +It is our strategy to relieve the front ends of the burden of hunting for +special cases because there are many front ends and only one peephole +optimizer. +By handling the special cases in the peephole optimizer, +the front ends become simpler, easier to write and easier to maintain. +.PP +Following the peephole optimizer is a global optimizer [5], which +unlike the peephole optimizer, examines the program as a whole. +It builds a data flow graph to make possible a variety of +global optimizations, +among them, moving invariant code out of loops, avoiding redundant +computations, live/dead analysis and eliminating tail recursion. +Note that the output of the global optimizer is still EM code. +.PP +Next comes the back end, which differs from the front ends in a +fundamental way. +Each front end is a separate program, whereas the back end is a single +program that is driven by a machine dependent driving table. +The driving table for a specific machine tells how the EM code is mapped +onto the machine's assembly language. +Although a simple driving table might just macro expand each EM instruction +into a sequence of target machine instructions, a much more sophisticated +translation strategy is normally used, as described later. +For speed, the back end does not actually read in the driving table at run time. +Instead, the tables are compiled along with the back end in advance, resulting +in one binary program per machine. +.PP +The output of the back end is a program in the assembly language of some +particular machine. +The next component in the pipeline reads this program and performs peephole +optimization on it. +The optimizations performed here involve idiosyncracies +of the target machine that cannot be performed in the machine-independent +EM-to-EM peephole optimizer. +Typically these optimizations take advantage of special instructions or special +addressing modes. +.PP +The optimized target machine assembly code then goes into the final +component in the pipeline, the universal assembler/linker. +This program assembles the input to object format, extracting routines from +libraries and including them as needed. +.PP +The final component of the tool kit is the utility package, which contains +various test programs, interpreters for EM code, +EM libraries, conversion programs, and other aids for the implementer and +user. +.NH 1 +The Preprocessor +.PP +The function of the preprocessor is to extend all the programming languages +by adding certain generally useful facilities to them in a uniform way. +One of these is a simple macro system, in which the user can give names to +character strings. +The names can be used in the program, with the knowledge that they will be +macro expanded prior to being input to the front end. +Macros can be used for named constants, expanding short "procedures" +in line, etc. +.PP +Another useful facility provided by the preprocessor is the ability to +include compile-time libraries. +On large projects, it is common to have all the declarations and definitions +gathered together in a few files that are textually included in the programs +by instructing the preprocessor to read them in, thus fooling the front end +into thinking that they were part of the source program. +.PP +A third feature of the preprocessor is conditional compilation. +The input program can be split up into labeled sections. +By setting flags, some of the sections can be deleted by the preprocessor, +thus allowing a family of slightly different programs to be conveniently stored +on a single file. +.NH 1 +The Front Ends +.PP +A front end is a program that converts input in some source language to a +program in EM. +At present, front ends +exist or are in preparation for Pascal, C, and Plain, and are being considered +for Ada, ALGOL 68, FORTRAN 77, and Modula 2. +Each of the present front ends is independent of all the other ones, +although a general-purpose, table-driven front end is conceivable, provided +one can devise a way to express the semantics of the source language in the +driving tables. +The Pascal front end uses a top-down parsing algorithm (recursive descent), +whereas the C and Plain front ends are bottom-up. +.PP +All front ends, independent of the language being compiled, +produce a common intermediate code called EM, which is +the assembly language for a simple stack machine. +The EM machine is based on a memory architecture +containing a stack for local variables, a (static) data area for variables +declared in the outermost block and global to the whole program, and a heap +for dynamic data structures. +In some ways EM resembles P-code [6], but is more general, since it is +intended for a wider class of languages than just Pascal. +.PP +The EM instruction set has been described elsewhere +[9,10,11] +so we will only briefly summarize it here. +Instructions exist to: +.sp + 1. Load a variable or constant of some length onto the stack. + 2. Store the top item on the stack in memory. + 3. Add, subtract, multiply, divide, etc. the top two stack items. + 4. Examine the top one or two stack items and branch conditionally. + 5. Call procedures and return from them. +.sp +.PP +Loads and stores come in several variations, corresponding to the most common +programming language semantics, for example, constants, simple variables, +fields of a record, elements of an array, and so on. +Distinctions are also made between variables local to the current block +(i.e., stack frame), those in the outermost block (static storage), and those +at intermediate lexicographic levels, which are accessed by following the +static chain at run time. +.PP +All arithmetic instructions have a type (integer, unsigned, real, +pointer, or set) and an +operand length, which may either be explicit or may be popped from the stack +at run time. +Monadic branch instructions pop an item from the stack and branch if it is +less than zero, less than or equal to zero, etc. +Dyadic branch instructions pop two items, compare them, and branch accordingly. +.PP +In addition to these basic EM instructions, there is a collection of special +purpose instructions (e.g., to increment a local variable), which are typically +produced from the simple ones by the peephole optimizer. +Although the complete EM instruction set contains nearly 150 instructions, +only about 60 of them are really primitive; the rest are simply abbreviations +for commonly occurring EM instruction sequences. +.PP +Of particular interest is the way object sizes are parametrized. +The front ends allow the user to indicate how many bytes an integer, real, etc. +should occupy. +Given this information, the front ends can allocate memory, determining +the placement of variables within the stack frame. +Sizes for primitive types are restricted to 8, 16, 32, 64, etc. bits. +The front ends are also parametrized by the target machine's word length +and address size so they can tell, for example, how many "load" instructions +to generate to move a 32-bit integer. +In the examples used henceforth, +we will assume a 16-bit word size and 16-bit integers. +.PP +Since only byte-addressable target machines are permitted, +it is nearly +always possible to implement any requested sizes on any target machine. +For example, the designer of the back end tables for the Z80 should provide +code for 8-, 16-, and 32-bit arithmetic. +In our view, the Pascal, C, or Plain programmer specifies what lengths +are needed, +without reference to the target machine, +and the back end provides it. +This approach greatly enhances portability. +While it is true that doing all arithmetic using 32-bit integers on the Z80 +will not be terribly fast, we feel that if that is what the programmer needs, +it should be possible to implement it. +.PP +Like all assembly languages, EM has not only machine instructions, but also +pseudoinstructions. +These are used to indicate the start and end of each procedure, allocate +and initialize storage for data, and similar functions. +One particularly important pseudoinstruction is the one that is used to +transmit information to the back end for optimization purposes. +It can be used to suggest variables that are good candidates to assign to +registers, delimit the scope of loops, indicate that certain variables +contain a useful value (next operation is a load) or not (next operation is +a store), and various other things. +.NH 1 +The Peephole Optimizer +.PP +The peephole optimizer reads in unoptimized EM programs and writes out +optimized ones. +Both the input and output are expressed in a highly compact code, rather than +in ASCII, to reduce the i/o time, which would otherwise dominate the CPU +time. +The program itself is table driven, and is, by and large, ignorant of the +semantics of EM. +The knowledge of EM is contained in a +language- and machine-independent table consisting of about 400 +pattern-replacement pairs. +We will briefly describe the kinds of optimizations it performs below; +a more complete discussion can be found in [9]. +.PP +Each line in the driving table describes one optimization, consisting of a +pattern part and a replacement part. +The pattern part is a series of one or more EM instructions and a boolean +expression. +The replacement part is a series of EM instructions with operands. +A typical optimization might be: +.sp + LOL LOC ADI STL ($1 = $4) and ($2 = 1) and ($3 = 2) ==> INL $1 +.sp +where the text prior to the ==> symbol is the pattern and the text after it is +the replacement. +LOL loads a local variable onto the stack, LOC loads a constant onto the stack, +ADI is integer addition, and STL is store local. +The pattern specifies that four consecutive EM instructions are present, with +the indicated opcodes, and that furthermore the operand of the first +instruction (denoted by $1) and the fourth instruction (denoted by $4) are the +same, the constant pushed by LOC is 1, and the size of the integers added by +ADI is 2 bytes. +(EM instructions have at most one operand, so it is not necessary to specify +the operand number.) +Under these conditions, the four instructions can be replaced by a single INL +(increment local) instruction whose operand is equal to that of LOL. +.PP +Although the optimizations cover a wide range, the main ones +can be roughly divided into the following categories. +\fIConstant folding\fR +is used to evaluate constant expressions, such as 2*3~+~7 at +compile time instead of run time. +\fIStrength reduction\fR +is used to replace one operation, such as multiply, by +another, such as shift. +\fIReordering of expressions\fR +helps in cases like -K/5, which can be better +evaluated as K/-5, because the former requires +a division and a negation, whereas the latter requires only a division. +\fINull instructions\fR +include resetting the stack pointer after a call with 0 parameters, +offsetting zero bytes to access the +first element of a record, or jumping to the next instruction. +\fISpecial instructions\fR +are those like INL, which deal with common special cases +such as adding one to a variable or comparing something to zero. +\fIGroup moves\fR +are useful because a sequence +of consecutive moves can often be replaced with EM code +that allows the back end to generate a loop instead of in line code. +\fIDead code elimination\fR +is a technique for removing unreachable statements, possibly made unreachable +by previous optimizations. +\fIBranch chain compression\fR +can be applied when a branch instruction jumps to another branch instruction. +The first branch can jump directly to the final destination instead of +indirectly. +.PP +The last two optimizations logically belong in the global optimizer but are +in the local optimizer for historical reasons (meaning that the local +optimizer has been the only optimizer for many years and the optimizations were +easy to do there). +.NH 1 +The Global Optimizer +.PP +In contrast to the peephole optimizer, which examines the EM code a few lines +at a time through a small window, the global optimizer examines the +program's large scale structure. +Three distinct types of optimizations can be found here: +.sp + 1. Interprocedural optimizations. + 2. Intraprocedural optimizations. + 3. Basic block optimizations. +.sp +We will now look at each of these in turn. +.PP +Interprocedural optimizations are those spanning procedure boundaries. +The most important one is deciding to expand procedures in line, +especially short procedures that occur in loops and pass several parameters. +If it takes more time or memory to pass the parameters than to do the work, +the program can be improved by eliminating the procedure. +The inverse optimization -- discovering long common code sequences and +turning them into a procedure -- is also possible, but much more difficult. +Like much of the global optimizer's work, the decision to make or not make +a certain program transformation is a heuristic one, based on knowledge of +how the back end works, how most target machines are organized, etc. +.PP +The heart of the global optimizer is its analysis of individual +procedures. +To perform this analysis, the optimizer must locate the basic blocks, +instruction sequences which can be entered only at the top and exited +only at the bottom. +It then constructs a data flow graph, with the basic blocks as nodes and +jumps between blocks as arcs. +.PP +From the data flow graph, many important properties of the program can be +discovered and exploited. +Chief among these is the presence of loops, indicated by cycles in the graph. +One important optimization is looking for code that can be moved outside the +loop, either prior to it or subsequent to it. +Such code motion saves execution time, although it does not save memory. +Unrolling loops is also possible and desirable in some cases. +.PP +Another area in which global analysis of loops is especially important is +in register allocation. +While it is true that EM does not have any registers to allocate, +the optimizer can easily collect information to allow the +back end to allocate registers wisely. +For example, the global optimizer can collect static frequency-of-use +and live/dead information about variables. +(A variable is dead at some point in the program if its current value is +not needed, i.e., the next reference to it overwrites it rather than +reading it; if the current value will eventually be used, the variable is +live.) +If two variables are never simultaneously live over some interval of code +(e.g., the body of a loop), they can be packed into a single variable, +which, if used often enough, may warrant being assigned to a register. +.PP +Many loops involve arrays: this leads to other optimizations. +If an array is accessed sequentially, with each iteration using the next +higher numbered element, code improvement is often possible. +Typically, a pointer to the bottom element of each array can be set up +prior to the loop. +Within the loop the element is accessed indirectly via the pointer, which is +also incremented by the element size on each iteration. +If the target machine has an autoincrement addressing mode and the pointer +is assigned to a register, an array access can often be done in a single +instruction. +.PP +Other intraprocedural optimizations include removing tail recursion +(last statement is a recursive call to the procedure itself), +topologically sorting the basic blocks to minimize the number of branch +instructions, and common subexpression recognition. +.PP +The third general class of optimizations done by the global optimizer is +improving the structure of a basic block. +For the most part these involve transforming arithmetic or boolean +expressions into forms that are likely to result in better target code. +As a simple example, A~+~B*C can be converted to B*C~+~A. +The latter can often +be handled by loading B into a register, multiplying the register by C, and +then adding in A, whereas the former may involve first putting A into a +temporary, depending on the details of the code generation table. +Another example of this kind of basic block optimization is transforming +-B~+~A~<~0 into the equivalent, but simpler, A~<~B. +.NH 1 +The Back End +.PP +The back end reads a stream of EM instructions and generates assembly code +for the target machine. +Although the algorithm itself is machine independent, for each target +machine a machine dependent driving table must be supplied. +The driving table effectively defines the mapping of EM code to target code. +.PP +It will be convenient to think of the EM instructions being read as a +stream of tokens. +For didactic purposes, we will concentrate on two kinds of tokens: +those that load something onto the stack, and those that perform some operation +on the top one or two values on the stack. +The back end maintains at compile time a simulated stack whose behavior +mirrors what the stack of a hardware EM machine would do at run time. +If the current input token is a load instruction, a new entry is pushed onto +the simulated stack. +.PP +Consider, as an example, the EM code produced for the statement K~:=~I~+~7. +If K and I are +2-byte local variables, it will normally be LOL I; LOC 7; ADI~2; STL K. +Initially the simulated stack is empty. +After the first token has been read and processed, the simulated stack will +contain a stack token of type MEM with attributes telling that it is a local, +giving its address, etc. +After the second token has been read and processed, the top two tokens on the +simulated stack will be CON (constant) on top and MEM directly underneath it. +.PP +At this point the back end reads the ADI~2 token and +looks in the driving table to find a line or lines that define the +action to be taken for ADI~2. +For a typical multiregister machine, instructions will exist to add constants +to registers, but not to memory. +Consequently, the driving table will not contain an entry for ADI~2 with stack +configuration CON, MEM. +.PP +The back end is now faced with the problem of how to get from its +current stack configuration, CON, MEM, which is not listed, to one that is +listed. +The table will normally contain rules (which we call "coercions") +for converting between CON, REG, MEM, and similar tokens. +Therefore the back end attempts to "coerce" the stack into a configuration +that +.I is +present in the table. +A typical coercion rule might tell how to convert a MEM into +a REG, namely by performing the actions of allocating a +register and emitting code to move the memory word to that register. +Having transformed the compile-time stack into a configuration allowed for +ADI~2, the rule can be carried out. +A typical rule +for ADI~2 might have stack configuration REG, MEM +and would emit code to add the MEM to the REG, leaving the stack +with a single REG token instead of the REG and MEM tokens present before the +ADI~2. +.PP +In general, there will be more than one possible coercion path. +Assuming reasonable coercion rules for our example, +we might be able to convert +CON MEM into CON REG by loading the variable I into a register. +Alternatively, we could coerce CON to REG by loading the constant into a register. +The first coercion path does the add by first loading I into a register and +then adding 7 to it. +The second path first loads 7 into a register and then adds I to it. +On machines with a fast LOAD IMMEDIATE instruction for small constants +but no fast ADD IMMEDIATE, or vice +versa, one code sequence will be preferable to the other. +.PP +In fact, we actually have more choices than suggested above. +In both coercion paths a register must be allocated. +On many machines, not every register can be used in every operation, so the +choice may be important. +On some machines, for example, the operand of a multiply must be in an odd +register. +To summarize, from any state (i.e., token and stack configuration), a +variety of choices can be made, leading to a variety of different target +code sequences. +.PP +To decide which of the various code sequences to emit, the back end must have +some information about the time and memory cost of each one. +To provide this information, each rule in the driving table, including +coercions, specifies both the time and memory cost of the code emitted when +the rule is applied. +The back end can then simply try each of the legal possibilities (including all +the possible register allocations) to find the cheapest one. +.PP +This situation is similar to that found in a chess or other game-playing +program, in which from any state a finite number of moves can be made. +Just as in a chess program, the back end can look at all the "moves" that can +be made from each state reachable from the original state, and thus find the +sequence that gives the minimum cost to a depth of one. +More generally, the back end can evaluate all paths corresponding to accepting +the next +.I N +input tokens, find the cheapest one, and then make the first move along +that path, precisely the way a chess program would. +.PP +Since the back end is analogous to both a parser and a chess playing program, +some clarifying remarks may be helpful. +First, chess programs and the back end must do some look ahead, whereas the +parser for a well-designed grammar can usually suffice with one input token +because grammars are supposed to be unambiguous. +In contrast, many legal mappings +from a sequence of EM instructions to target code may exist. +Second, like a parser but unlike a chess program, the back end has perfect +information -- it does not have to contend with an unpredictable opponent's +moves. +Third, chess programs normally make a static evaluation of the board and +label the +.I nodes +of the tree with the resulting scores. +The back end, in contrast, associates costs with +.I arcs +(moves) rather than nodes (states). +However, the difference is not essential, since it could +also label each node with the cumulative cost from the root to that node. +.PP +As mentioned above, the cost field in the table contains +.I both +the time and memory costs for the code emitted. +It should be clear that the back end could use either one +or some linear combination of them as the scoring function for evaluating moves. +A user can instruct the compiler to optimize for time or for memory or +for, say, 0.3 x time + 0.7 x memory. +Thus the same compiler can provide a wide range of performance options to +the user. +The writer of the back end table can take advantage of this flexibility by +providing several code sequences with different tradeoffs for each EM +instruction (e.g., in line code vs. call to a run time routine). +.PP +In addition to the time-space tradeoffs, by specifying the depth of search +parameter, +.I N , +the user can effectively also tradeoff compile time vs. object +code quality, for whatever code metric has been chosen. +In summary, by combining the properties of a parser and a game playing program, +it is possible to make a code generator that is table driven, +highly flexible, and has the ability to produce good code from a +stack machine intermediate code. +.NH 1 +The Target Machine Optimizer +.PP +In the model of Fig 2., the peephole optimizer comes before the global +optimizer. +It may happen that the code produced by the global optimizer can also +be improved by another round of peephole optimization. +Conceivably, the system could have been designed to iterate peephole and +global optimizations until no more of either could be performed. +.PP +However, both of these optimizations are done on the machine independent +EM code. +Neither is able to take advantage of the peculiarities and idiosyncracies with +which most target machines are well endowed. +It is the function of the final +optimizer to do any (peephole) optimizations that still remain. +.PP +The algorithm used here is the same as in the EM peephole optimizer. +In fact, if it were not for the differences between EM syntax, which is +very restricted, and target assembly language syntax, +which is less so, precisely the same program could be used for both. +Nevertheless, the same ideas apply concerning patterns and replacements, so +our discussion of this optimizer will be restricted to one example. +.PP +To see what the target optimizer might do, consider the +PDP-11 instruction sequence sub #2,r0; mov (r0),x. +First 2 is subtracted from register 0, then the word pointed to by it +is moved to x. +The PDP-11 happens to have an addressing mode to perform this sequence in +one instruction: mov -(r0),x. +Although it is conceivable that this instruction could be included in the +back end driving table for the PDP-11, it is awkward to do so because it +can occur in so many contexts. +It is much easier to catch things like this in a separate program. +.NH 1 +The Universal Assembler/Linker +.PP +Although assembly languages for different machines may appear very different +at first glance, they have a surprisingly large intersection. +We have been able to construct an assembler/linker that is almost entirely +independent of the assembly language being processed. +To tailor the program to a specific assembly language, it is necessary to +supply a table giving the list of instructions, the bit patterns required for +each one, and the language syntax. +The machine independent part of the assembler/linker is then compiled with the +table to produce an assembler and linker for a particular target machine. +Experience has shown that writing the necessary table for a new machine can be +done in less than a week. +.PP +To enforce a modicum of uniformity, we have chosen to use a common set of +pseudoinstructions for all target machines. +They are used to initialize memory, allocate uninitialized memory, determine the +current segment, and similar functions found in most assemblers. +.PP +The assembler is also a linker. +After assembling a program, it checks to see if there are any +unsatisfied external references. +If so, it begins reading the libraries to find the necessary routines, including +them in the object file as it finds them. +This approach requires libraries to be maintained in assembly language form, +but eliminates the need for inventing a language to express relocatable +object programs in a machine independent way. +It also simplifies the assembler, since producing absolute object code is +easier than producing relocatable object code. +Finally, although assembly language libraries may be somewhat larger than +relocatable object module libraries, the loss in speed due to having more +input may be more than compensated for by not having to pass an intermediate +file between the assembler and linker. +.NH 1 +The Utility Package +.PP +The utility package is a collection of programs designed to aid the +implementers of new front ends or new back ends. +The most useful ones are the test programs. +For example, one test set, EMTEST, systematically checks out a back end by +executing an ever larger subset of the EM instructions. +It starts out by testing LOC, LOL and a few of the other essential instructions. +If these appear to work, it then tries out new instructions one at a time, +adding them to the set of instructions "known" to work as they pass the tests. +.PP +Each instruction is tested with a variety of operands chosen from values +where problems can be expected. +For example, on target machines which have 16-bit index registers but only +allow 8-bit displacements, a fundamentally different algorithm may be needed +for accessing +the first few bytes of local variables and those with offsets of thousands. +The test programs have been carefully designed to thoroughly test all relevant +cases. +.PP +In addition to EMTEST, test programs in Pascal, C, and other languages are also +available. +A typical test is: +.sp + i := 9; \fBif\fP i + 250 <> 259 \fBthen\fP error(16); +.sp +Like EMTEST, the other test programs systematically exercise all features of the +language being tested, and do so in a way that makes it possible to pinpoint +errors precisely. +While it has been said that testing can only demonstrate the presence of errors +and not their absence, our experience is that +the test programs have been invaluable in debugging new parts of the system +quickly. +.PP +Other utilities include programs to convert +the highly compact EM code produced by front ends to ASCII and vice versa, +programs to build various internal tables from human writable input formats, +a variety of libraries written in or compiled to EM to make them portable, +an EM assembler, and EM interpreters for various machines. +.PP +Interpreting the EM code instead of translating it to target machine language +is useful for several reasons. +First, the interpreters provide extensive run time diagnostics including +an option to list the original source program (in Pascal, C, etc.) with the +execution frequency or execution time for each source line printed in the +left margin. +Second, since an EM program is typically about one-third the size of a +compiled program, large programs can be executed on small machines. +Third, running the EM code directly makes it easier to pinpoint errors in +the EM output of front ends still being debugged. +.NH 1 +Summary and Conclusions +.PP +The Amsterdam Compiler Kit is a tool kit for building +portable (cross) compilers and interpreters. +The main pieces of the kit are the front ends, which convert source programs +to EM code, optimizers, which improve the EM code, and back ends, which convert +the EM code to target assembly language. +The kit is highly modular, so writing one front end +(and its associated runtime routines) +is sufficient to implement +a new language on a dozen or more machines, and writing one back end table +and one universal assembler/linker table is all that is needed to bring up all +the previously implemented languages on a new machine. +In this manner, the contents, and hopefully the usefulness, of the toolkit +will increase in time. +.PP +We believe the principal lesson to be learned from our work is that the old +UNCOL idea is basically a sound way to produce compilers, provided suitable +restrictions are placed on the source languages and target machines. +We also believe that although compilers produced by this technology may not +be equal to the very best handcrafted compilers, +in terms of object code quality, they are certainly +competitive with many existing compilers. +However, when one factors in the cost of producing the compiler, +the possible slight loss in performance may be more than compensated for by the +large decrease in production cost. +As a consequence of our work and similar work by other researchers [1,3,4], +we expect integrated compiler building kits to become increasingly popular +in the near future. +.PP +The toolkit is now available for various computers running the +.UX +operating system. +For information, contact the authors. +.NH 1 +References +.LP +.nr r 0 1 +.in +4 +.ti -4 +\fB~\n+r.\fR Graham, S.L. +Table-Driven Code Generation. +.I "Computer~13" , +8 (August 1980), 25-34. +.PP +A discussion of systematic ways to do code generation, +in particular, the idea of having a table with templates that match parts of +the parse tree and convert them into machine instructions. +.sp 2 +.ti -4 +\fB~\n+r.\fR Haddon, B.K., and Waite, W.M. +Experience with the Universal Intermediate Language Janus. +.I "Software Practice & Experience~8" , +5 (Sept.-Oct. 1978), 601-616. +.PP +An intermediate language for use with ALGOL 68, Pascal, etc. is described. +The paper discusses some problems encountered and how they were dealt with. +.sp 2 +.ti -4 +\fB~\n+r.\fR Johnson, S.C. +A Portable Compiler: Theory and Practice. +.I "Ann. ACM Symp. Prin. Prog. Lang." , +Jan. 1978. +.PP +A cogent discussion of the portable C compiler. +Particularly interesting are the author's thoughts on the value of +computer science theory. +.sp 2 +.ti -4 +\fB~\n+r.\fR Leverett, B.W., Cattell, R.G.G, Hobbs, S.O., Newcomer, J.M., +Reiner, A.H., Schatz, B.R., and Wulf, W.A. +An Overview of the Production-Quality Compiler-Compiler Project. +.I Computer~13 , +8 (August 1980), 38-49. +.PP +PQCC is a system for building compilers similar in concept but differing in +details from the Amsterdam Compiler Kit. +The paper describes the intermediate representation used and the code generation +strategy. +.sp 2 +.ti -4 +\fB~\n+r.\fR Lowry, E.S., and Medlock, C.W. +Object Code Optimization. +.I "Commun.~ACM~12", +(Jan. 1969), 13-22. +.PP +A classic paper on global object code optimization. +It covers data flow analysis, common subexpressions, code motion, register +allocation and other techniques. +.sp 2 +.ti -4 +\fB~\n+r.\fR Nori, K.V., Ammann, U., Jensen, K., Nageli, H. +The Pascal P Compiler Implementation Notes. +Eidgen. Tech. Hochschule, Zurich, 1975. +.PP +A description of the original P-code machine, used to transport the Pascal-P +compiler to new computers. +.sp 2 +.ti -4 +\fB~\n+r.\fR Steel, T.B., Jr. UNCOL: the Myth and the Fact. in +.I "Ann. Rev. Auto. Prog." +Goodman, R. (ed.), vol 2., (1960), 325-344. +.PP +An introduction to the UNCOL idea by its originator. +.sp 2 +.ti -4 +\fB~\n+r.\fR Steel, T.B., Jr. +A First Version of UNCOL. +.I "Proc. Western Joint Comp. Conf." , +(1961), 371-377. +.PP +The first detailed proposal for an UNCOL. By current standards it is a +primitive language, but it is interesting for its historical perspective. +.sp 2 +.ti -4 +\fB~\n+r.\fR Tanenbaum, A.S., van Staveren, H., and Stevenson, J.W. +Using Peephole Optimization on Intermediate Code. +.I "ACM Trans. Prog. Lang. and Sys. 3" , +1 (Jan. 1982) pp. 21-36. +.PP +A detailed description of a table-driven peephole optimizer. +The driving table provides a list of patterns to match as well as the +replacement text to use for each successful match. +.sp 2 +.ti -4 +\fB\n+r.\fR Tanenbaum, A.S., Stevenson, J.W., Keizer, E.G., and van Staveren, H. +Description of an Experimental Machine Architecture for use with Block +Structured Languages. +Informatica Rapport 81, Vrije Universiteit, Amsterdam, 1983. +.PP +The defining document for EM. +.sp 2 +.ti -4 +\fB\n+r.\fR Tanenbaum, A.S. +Implications of Structured Programming for Machine Architecture. +.I "Comm. ACM~21" , +3 (March 1978), 237-246. +.PP +The background and motivation for the design of EM. +This early version emphasized the idea of interpreting the intermediate +code (then called EM-1) rather than compiling it. diff --git a/doc/top/proto.make b/doc/top/proto.make new file mode 100644 index 0000000..cc0b344 --- /dev/null +++ b/doc/top/proto.make @@ -0,0 +1,11 @@ +# $Id: proto.make,v 1.2 1994/06/24 10:03:49 ceriel Exp $ + +#PARAMS do not remove this line! + +REFER=refer +TBL=tbl + +SRC_DIR = $(SRC_HOME)/doc/top + +$(TARGET_HOME)/doc/top.doc: $(SRC_DIR)/top.n $(SRC_DIR)/refs.top + $(REFER) -sA+T -l4,2 -p $(SRC_DIR)/refs.top $(SRC_DIR)/top.n | $(TBL) > $@ diff --git a/doc/top/refs.top b/doc/top/refs.top new file mode 100644 index 0000000..c443898 --- /dev/null +++ b/doc/top/refs.top @@ -0,0 +1,84 @@ +%T A Practical Toolkit for Making Portable Compilers +%A A.S. Tanenbaum +%A J.M. van Staveren +%A E.G. Keizer +%A J.W. Stevenson +%I Vrije Universiteit, Amsterdam +%R Rapport nr IR-74 +%D October 1981 + +%T A Practical Toolkit for Making Portable Compilers +%A A.S. Tanenbaum +%A J.M. van Staveren +%A E.G. Keizer +%A J.W. Stevenson +%J CACM +%V 26 +%N 9 +%P 654-660 +%D September 1983 + +%T A Unix Toolkit for Making Portable Compilers +%A A.S. Tanenbaum +%A J.M. van Staveren +%A E.G. Keizer +%A J.W. Stevenson +%J Proceedings USENIX conf. +%C Toronto, Canada +%V 26 +%D July 1983 +%P 255-261 + +%T Using Peephole Optimization on Intermediate Code +%A A.S. Tanenbaum +%A J.M. van Staveren +%A J.W. Stevenson +%J TOPLAS +%V 4 +%N 1 +%P 21-36 +%D January 1982 + +%T Amsterdam Compiler Kit documentation +%A A.S. Tanenbaum +%A E.G. Keizer +%A J.M. van Staveren +%A J.W. Stevenson +%I Vrije Universiteit, Amsterdam +%R Rapport nr IR-90 +%D June 1984 + +%T Language- and Machine-independant Global Optimization on +Intermediate Code +%A H.E. Bal +%A A.S. Tanenbaum +%I Vrije Universiteit, Amsterdam +%R Rapport IR-98 +%D March 1985 + +%T The Design and Implementation of the EM Global Optimizer +%A H.E. Bal +%I Vrije Universiteit, Amsterdam +%R Rapport IR-99 +%D March 1985 + + +%T The C Programming Language +%A B.W. Kernighan +%A D.M. Ritchie +%I Prentice-Hall, Inc +%C Englewood Cliffs,NJ +%D 1978 + +%T Principles of compiler design +%A A.V. Aho +%A J.D. Ullman +%I Addison-Wesley +%C Reading, Massachusetts +%D 1978 + +%T Some Topics in Parser Generation +%A C.J.H. Jacobs +%R Rapport IR-105 +%D October 1985 +%I Vrije Universiteit, Amsterdam diff --git a/doc/top/top.n b/doc/top/top.n new file mode 100644 index 0000000..8e0715c --- /dev/null +++ b/doc/top/top.n @@ -0,0 +1,869 @@ +.ND +.tr ~ +.ds <. +.ds <, +.ds >. . +.ds >, , +.ds [. [ +.ds .] ] +.TL +The ACK Target Optimizer +.AU +H.E. Bal +.AI +Vrije Universiteit +Wiskundig Seminarium, Amsterdam +.AB +The Target Optimizer is one of several optimizers that are part of +the Amsterdam Compiler Kit. +It operates directly on assembly code, +rather than on a higher level intermediate code, +as the Peephole Optimizer and Global Optimizer do. +Consequently, the Target Optimizer can do optimizations +that are highly machine-dependent. +.PP +Each target machine has its own Target Optimizer. +New optimizers are generated by the Target Optimizer Generator, +which uses a machine-dependent table as input. +This document contains full information on how to +write such a table for a new machine. +It also discusses the implementation of the +Target Optimizer and its generator. +.AE +.NH 1 +Introduction +.PP +.FS +This work was supported by the +Stichting Technische Wetenschappen (STW) +under grant VWI03.0001. +.FE +This document describes the target optimizer component +of the Amsterdam Compiler Kit (ACK) . +.[ +tanenbaum staveren amsterdam toolkit +.] +.[ +tanenbaum staveren cacm +.] +.[ +tanenbaum staveren toronto +.] +Optimization takes place in several parts of ACK compilers, +most notably in the Peephole Optimizer +.[ +staveren peephole toplas +.] +and +the Global Optimizer, +.[ +bal tanenbaum global optimization +.] +.[ +bal implementation global optimizer +.] +which are both language- and machine-independent, +and in the machine-specific code generators. +.[ +documentation amsterdam compiler kit +.] +The target optimizer is the finishing touch in this sequence of +optimizers. +It can be used to capture those optimizations that are hard +to express in the other parts of ACK. +These optimizations will typically be very machine-specific. +.PP +The target optimizer operates on the assembly code of some target machine. +Hence there is one target optimizer per machine. +However, just as for the ACK code generators and assemblers, +a framework has been build that allows easy generation of +target optimizers out of machine-independent parts and a +machine-dependent description table (see figure 1.). +So the major part of the code of a target optimizer is +shared among all target optimizers. +.DS +.ft CW + + + |-------------------------| + | machine-independent | + | code | + | | + |-----------------| |-------------------------| +descrip- |target optimizer | | machine-dependent code | + tion --> |generator | ----> | + tables | +table | | | | + |-----------------| |-------------------------| + + target optimizer +.ft R + + Figure 1: Generation of a target optimizer. + +.DE +.PP +This document focusses on the description of the machine-dependent table. +In chapter 2 we give an informal introduction to the optimization +algorithm and to the definition of the table format. +Chapters 3 and 4 discuss the implementation of the target optimizer +and the target optimizer generator. +Appendix A gives full information for writing a description table. +.NH 1 +Global structure of the target optimizer +.PP +The target optimizer is based on the well understood model +of a \fIpeephole optimizer\fR. +.[ +aho ullman compiler +.] +It contains a machine-dependent table +of (pattern,replacement) pairs. +Each pattern describes +a sequence of one or more assembler instructions +that can be replaced by zero or more equivalent, yet cheaper, +instructions (the 'replacement'). +The optimizer maintains a \fIwindow\fR that moves over the input. +At any moment, the window contains some contiguous part of the input. +If the instructions in the current window match some pattern +in the table, +they are replaced by the corresponding replacement; +else, the window moves one instruction to the right. +.PP +In the remainder of this section we will give an informal +description of the machine-dependent table. +A more precise definition is given in appendix A. +We will first discuss the restrictions put on the +format of the assembly code. +.NH 2 +Assumptions about the assembly code format +.PP +We assume that a line of assembly code begins with an +instruction \fImnemonic\fR (opcode), +followed by zero or more \fIoperands\fR. +The mnemonic and the first operand must be separated by a special +character (e.g. a space or a tab). +Likewise, the operands must be separated by a special +character (e.g. a comma). +These separators need not be the same for all machines. +.NH 2 +Informal description of the machine-dependent tables +.PP +The major part of the table consists of (pattern,replacement) pairs +called \fIentries\fR. +.PP +A pattern is a list of instruction descriptions. +Each instruction description describes the instruction mnemonic and +the operands. +.PP +A mnemonic is described either by a string constant or by the +keyword ANY. +As all entities dealt with by the target optimizer are strings, +string constants do not contain quotes. +A string constant matches only itself. +ANY matches every instruction mnemonic. +.nf + +Examples of mnemonic descriptions: +.ft CW + + add + sub.l + mulw3 + ANY +.ft R +.fi +.PP +An operand can also be described by a string constant. +.nf + +Examples: +.ft CW + + (sp)+ + r5 + -4(r6) + +.ft R +.fi +Alternatively, it can be described by means of a \fIvariable name\fR. +Variables have values which are strings. +They have to be declared in the table before the patterns. +Each such declaration defines the name of a variable and +a \fIrestriction\fR to which its value is subjected. +.nf +Example of variable declarations: +.ft CW + + CONST { VAL[0] == '$' }; + REG { VAL[0] == 'r' && VAL[1] >= '0' && VAL[1] <= '3' && + VAL[2] == '\\0' }; + X { TRUE }; + +.ft R +.fi +The keyword VAL denotes the value of the variable, which is +a null-terminated string. +An operand description given via a variable name matches an +actual operand if the actual operand obeys the associated restriction. +.nf +.ft CW + + CONST matches $1, $-5, $foo etc. + REG matches r0, r1, r2 and r3 + X matches anything +.ft R + +.fi +The restriction (between curly braces) may be any legal "C" +.[ +kernighan ritchie c programming +.] +expression. +It may also contain calls to user-defined procedures. +These procedures must be added to the table after the patterns. +.nf + +Example: +.ft CW + + FERMAT_NUMBER { VAL[0] == '$' && is_fermat_number(&VAL[1]) }; + +.ft R +.fi +An operand can also be described by a mixture of a string constant +and a variable name. +The most general form allowed is: +.nf + + string_constant1 variable_name string_constant2 + +Example: +.ft CW + + (REG)+ matches (r0)+, (r1)+, (r2)+ and (r3)+ + +.ft R +.fi +Any of the three components may be omitted, +so the first two forms are just special cases of the general form. +The name of a variable can not be used as a string constant. +In the above context, it is impossible to define an operand that +matches the string "REG". +This limitation is of little consequence, +as the table writer is free to choose the names of variables. +This approach, however, avoids the need for awkward escape sequences. +.PP +A pattern consists of one or more instruction descriptions +(separated by a colon) +followed by an optional constraint. +A pattern "P1 : P2 : .. : Pn C" matches the sequence of +instructions "I1 I2 .. In" if: +.IP (i) 7 +for each i, 1 <= i <= n, Pi matches Ii, as described above; +.IP (ii) +multiple occurrences of the same variable name or of +the keyword ANY stand for the same values throughout the pattern; +.IP (iii) +the optional constraint C is satisfied, i.e. it evaluates to TRUE. +.LP +.nf +The pattern: +.ft CW + + dec REG : move.b CONST,(REG) + +.ft R +matches: +.ft CW + + dec r0 : move.b $4,(r0) + +.ft R +but not: +.ft CW + + dec r0 : move.b $4,(r1) + +.ft R +(as the variable REG matches two different strings). +.fi +If a pattern containing different registers must be described, +extra names for a register should be declared, all sharing +the same restriction. +.nf +Example: +.ft CW + + REG1,REG2 { VAL[0] == 'r' && ..... }; + + addl3 REG1,REG1,REG2 : subl2 REG2,REG1 +.ft R +.fi +.PP +The optional constraint is an auxiliary "C" expression (just like +the parameter restrictions). +The expression may refer to the variables and to ANY. +.nf +Example: +.ft CW + + move REG1,REG2 { REG1[1] == REG2[1] + 1 } + +.ft R +matches +.ft CW + + move r1,r0 + move r2,r1 + move r3,r2 +.ft R +.fi +.PP +The replacement part of a (pattern,replacement) table entry +has the same structure as a pattern, except that: +.IP (i) +it may not contain an additional constraint; +.IP (ii) +it may be empty. +.LP +A replacement may also refer to the values of variables and ANY. +.NH 2 +Examples +.PP +This section contains some realistic examples for +optimization on PDP-11 and Vax assembly code. +.NH 3 +Vax examples +.PP +Suppose the table contains the following declarations: +.nf + +.ft CW + X, LOG { TRUE }; + LAB { VAL[0] == 'L' }; /* e.g. L0017 */ + A { no_side_effects(VAL) }; + NUM { is_number(VAL) }; +.ft R + +.fi +The procedure "no_side_effects" checks if its argument +contains any side effects, i.e. auto increment or auto decrement. +The procedure "is_number" checks if its argument contains only digits. +These procedures must be supplied by the table-writer and must be +included in the table. +.PP +.nf +.ft CW +\fIentry:\fP addl3 X,A,A -> addl2 X,A; +.ft R + +.fi +This entry changes a 3-operand instruction into a cheaper 2-operand +instruction. +An optimization like: +.nf +.ft CW + + addl3 r0,(r2)+,(r2)+ -> addl2 r0,(r2)+ + +.ft R +.fi +is illegal, as r2 should be incremented twice. +Hence the second argument is required to +be side-effect free. +.PP +.nf +.ft CW +\fIentry:\fP addw2 $-NUM,X -> subw2 $NUM,X; +.ft R + +.fi +An instruction like "subw2 $5,r0" is cheaper +than "addw2 $-5,r0", +because constants in the range 0 to 63 are represented +very efficiently on the Vax. +.PP +.nf +.ft CW +\fIentry:\fP bitw $NUM,A : jneq LAB + { is_poweroftwo(NUM,LOG) } -> jbs $LOG,A,LAB; + +.ft R +.fi +A "bitw x,y" sets the condition codes to the bitwise "and" of +x and y. +A "jbs n,x,l" branches to l if bit n of x is set. +So, for example, the following transformation is possible: +.nf +.ft CW + + bitw $32,r0 : jneq L0017 -> jbs $5,r0,L0017 + +.ft R +.fi +The user-defined procedure "is_poweroftwo" checks if its first argument is +a power of 2 and, if so, sets its second argument to the logarithm +of the first argument. (Both arguments are strings). +Note that the variable LOG is not used in the pattern itself. +It is assigned a (string) value by "is_poweroftwo" and is used +in the replacement. +.NH 3 +PDP-11 examples +.PP +Suppose we have the following declarations: +.nf + +.ft CW + X { TRUE }; + A { no_side_effects(VAL) }; + L1, L2 { VAL[0] == 'I' }; + REG { VAL[0] == 'r' && VAL[1] >= '0' && VAL[1] <= '5' && + VAL[2] == '\\0' }; + +.ft P +.fi +The implementation of "no_side_effects" may of course +differ for the PDP-11 and the Vax. +.PP +.nf +.ft CW +\fIentry:\fP mov REG,A : ANY A,X -> mov REG,A : ANY REG,X ; +.ft R + +.fi +This entry implements register subsumption. +If A and REG hold the same value (which is true after "mov REG,A") +and A is used as source (first) operand, it is cheaper to use REG instead. +.PP +.nf +.ft CW +\fIentry:\fP jeq L1 : jbr L2 : labdef L1 -> jne L2 : labdef L1; +.ft R + +.fi +The "jeq L1" is a "skip over an unconditional jump". "labdef L1" +denotes the definition (i.e. defining occurrence) of label L1. +As the target optimizer has to know how such a definition +looks like, this must be expressed in the table (see Appendix A). +.PP +.nf +.ft CW +\fIentry:\fP add $01,X { carry_dead(REST) } -> inc X; +.ft R + +.fi +On the PDP-11, an add-one is not equivalent to an increment. +The latter does not set the carry-bit of the condition codes, +while the former does. +So a look-ahead is needed to see if the rest of the input uses +the carry-bit before changing the condition codes. +A look-ahead of one instruction is provided by +the target optimizer. +This will normally be sufficient for compiler-generated code. +The keyword REST contains the mnemonic of the first instruction of +the rest of the input. +If this instruction uses the carry-bit (e.g. an adc, subc, bhis) +the transformation is not allowed. +.NH 1 +Implementation of the target optimizer +.PP +The target optimizer reads one input file of assembler instructions, +processes it, and writes the optimized code +to the output file. +So it performs one pass over the input. +.NH 2 +The window mechanism +.PP +The optimizer uses a \fIwindow\fR that moves over the input. +It repeatedly tries to match the instructions in the window +with the patterns in the table. +If no match is possible, the window moves +one instruction forwards (to the right). +After a successful match the matched instructions are +removed from the window and are replaced by the +replacement part of the table entry. +Furthermore, the window is moved a few instructions +backwards, +as it is possible that instructions that were rejected earlier now do match. +For example, consider the following patterns: +.DS +.ft CW +cmp $0, X -> tst X ; +mov REG,X : tst X -> move REG.X ; /* redundant test */ +.ft R +.DE +If the input is: +.DS +.ft CW +mov r0,foo : cmp $0,foo +.ft R +.DE +then the first instruction is initially rejected. +However, after the transformation +.DS +.ft CW +cmp $0,foo -> tst foo +.ft R +.DE +the following optimization is possible: +.DS +.ft CW +mov r0,foo : tst foo -> mov r0,foo +.ft R +.DE +.PP +The window is implemented as a \fIqueue\fR. +Matching takes place at the head of the queue. +New instructions are added at the tail. +If the window is moved forwards, the instruction at the head +is not yet written to the output, +as it may be needed later on. +Instead it is added to a second queue, +the \fIbackup queue\fR. +After a successful match, the entire backup queue is +inserted at the front of the window queue, +which effectively implements the shift backwards. +.PP +Both queues have the length of the longest pattern in the table. +If, as a result of a forward window move, +the backup queue gets full, +the instruction at its head is outputted and removed. +Instructions are read from the input whenever the +window queue contains fewer elements than the length +of the longest pattern. +.NH 2 +Pattern matching +.PP +Pattern matching is done in three steps: +.IP (i) 7 +find patterns in the table whose instruction mnemonics +match the mnemonics of the instructions in the +current window; +.IP (ii) +check if the operands of the pattern match the operands of the +instructions in the current window; +.IP (iii) +check if the optional constraint is satisfied. +.LP +For step (i) hashing is used. +The mnemonic of the first instruction of the window +is used to determine a list of possible patterns. +Patterns starting with ANY are always tried. +.PP +Matching of operand descriptions against actual operands +takes place as follows. +The general form of an operand description is: +.DS +string_constant1 variable_name string_constant2 +.DE +The actual operand should begin with string_constant1 and end +on string_constant2. +If so, these strings are stripped from it and the remaining string is +matched against the variable. +Matching a string against a variable is +defined as follows: +.IP 1. +initially (before the entire pattern match) +all variables are uninstantiated; +.IP 2. +matching a string against an uninstantiated variable +succeeds if the restriction associated with the variable is +satisfied. +As a side effect, it causes the variable to be instantiated to +the string; +.IP 3. +matching a string against an instantiated variable succeeds +only if the variable was instantiated to the same string. +.LP +Matching an actual mnemonic against the keyword ANY is defined likewise. +.PP +The matching scheme implements the requirement that multiple occurrences +of the same variable name or of the keyword ANY should +stand for the same values throughout the entire pattern +(see section 2.). +.PP +Both the parameter restriction of 2. and the constraint of step (iii) +are checked by executing the "C" expression. +.NH 2 +Data structures +.PP +The most important data structure is the representation +of the input instructions. +For every instruction we use two representations: +.IP (i) +the textual representation, +i.e. the exact code as it appeared in the input; +.IP (ii) +a structural representation, +containing the opcode and the operands. +.LP +The opcode of an instruction is determined as soon as it is read. +If the line contains a label definition, the opcode is set +to "labdef", so a label definition is treated like a normal +instruction. +.PP +The operands of an instruction are not determined until +they are needed, i.e. until step (i) of the pattern matching +process has succeeded. +For every instruction we keep track of a \fIstate\fR. +After the opcode has successfully been determined, +the state is OPC_ONLY. +Once the operands have been recognized, the state is set to DONE. +If the opcode or operands can not be determined, +or if the instruction cannot be optimized for any other +reason (see Appendix A), the state is set to JUNK +and any attempt to match it will fail. +.PP +For each table entry we record the following information: +.IP (i) 7 +the length of the pattern (i.e. the number of instruction descriptions) +.IP (ii) +a description of the instructions of the pattern +.IP (iii) +the length of the replacement +.IP (iv) +a description of the instructions of the replacement. +.LP +The description of an instruction consists of: +.IP (i) +the opcode +.IP (ii) +for each operand, a description of the operand. +.LP +The description of an operand of the form: +.DS +string_constant1 variable_name string_constant2 +.DE +contains: +.IP (i) +both string constants +.IP (ii) +the number of the variable. +.LP +Each declared variable is assigned a unique number. +For every variable we maintain: +.IP (i) +its state (instantiated or not instantiated) +.IP (ii) +its current value (a string). +.LP +The restrictions on variables and the constraints are stored +in a switch-statement, +indexed by variable number and entry number respectively. +.NH 1 +Implementation of the target optimizer generator +.PP +The target optimizer generator (\fItopgen\fR) +reads a target machine description table and produces +two files: +.IP gen.h: 9 +contains macro definitions for +machine parameters that were changed +in the parameter section of the table (see appendix A) +and for some attributes derived from the table +(longest pattern, number of patterns, number +of variables). +.IP gen.c: +contains the entry description tables, +code for checking the parameter restrictions and constraints +(switch statements) +and the user-defined procedures. +.LP +These two files are compiled together with some machine-independent +files to produce a target optimizer. +.PP +Topgen is implemented using +the LL(1) parser generator system LLgen , +.[ +jacobs topics parser generation +.] +a powerful tool of the Amsterdam Compiler Kit. +This system provides a flexible way of describing the syntax of the tables. +The syntactical description of the table format included +in Appendix A was derived from the LLgen syntax rules. +.PP +The parser uses a simple, hand-written, lexical analyzer (scanner). +The scanner returns a single character in most cases. +The recognition of identifiers is left to the parser, as +this eases the analysis of operand descriptions. +Comments are removed from the input by the scanner, +but white space is passed to the parser, +as it is meaningful in some contexts (it separates the +opcode description from the description of the first operand). +.PP +Topgen maintains two symbol tables, one for variable names and one +for tunable parameters. +The symbol tables are organized as binary trees. +.bp +.NH 1 +References +.[ +$LIST$ +.] +.bp +.SH +Appendix A +.PP +In this appendix we present a complete definition of the target +optimizer description table format. +This appendix is intended for table-writers. +We use syntax rules for the description of the table format. +The following notation is used: +.TS +center; +l l. +{ a } zero or more of a +[ a ] zero or one of a +a b a followed by b +a | b a or b +.TE +Terminals are given in quotes, as in ';'. +.PP +The table may contain white space and comment at all reasonable places. +Comments are as in "C", so they begin with /* and end on */. +Identifiers are sequences of letters, digits and the underscore ('_'), +beginning with a letter. +.PP +.DS +.ft CW +table -> {parameter_line} '%%;' {variable_declaration} '%%;' + {entry} '%%;' user_routines. +.ft R +.DE +A table consists of four sections, containing machine-dependent +constants, variable declarations, pattern rules and +user-supplied subroutines. +.PP +.DS +.ft CW +parameter_line -> identifier value ';' . +.ft R +.DE +A parameter line defines some attributes of the target machines +assembly code. +For unspecified parameters default values apply. +The names of the parameters and the corresponding defaults +are shown in table 1. +.TS +center; +l l. +OPC_TERMINATOR ' ' +OP_SEPARATOR ',' +LABEL_STARTER 'I' +LABEL_TERMINATOR ':' +MAXOP 2 +MAXOPLEN 25 +MAX_OPC_LEN 10 +MAXVARLEN 25 +MAXLINELEN 100 +PAREN_OPEN not defined +PAREN_CLOSE not defined +.TE +.ce 1 +table 1: parameter names and defaults +.DE +The OPC_TERMINATOR is the character that separates the instruction +mnemonic from the first operand (if any). +The OP_SEPARATOR separates adjacent operands. +A LABEL_STARTER is the first character of an instruction label. +(Instruction labels are assumed to start with the same character). +The LABEL_TERMINATOR is the last character of a label definition. +It is assumed that this character is not used in an applied +occurrence of the label identifier. +For example, the defining occurrence may be "I0017:" +and the applied occurrence may be "I0017" +as in "jmp I0017". +MAXOP defines the maximum number of operands an instruction can have. +MAXOPLEN is the maximum length (in characters) of an operand. +MAX_OPC_LEN is the maximum length of an instruction opcode. +MAXVARLEN is the maximum length of a declared string variable. +As variables may be set by user routines (see "bitw" example for +the Vax) the table-writer must have access to this length and +must be able to change it. +MAXLINELEN denotes the maximum length of a line of assembly code. +PAREN_OPEN and PAREN_CLOSE must be used when the operand separator can also +occur within operands, between parentheses of some kind. In this case, +PAREN_OPEN must be set to a string containing the opening parentheses, and +PAREN_CLOSE must be set to a string containing the closing parentheses. +.PP +If a line of assembly code violates any of the assumptions or +exceeds some limit, +the line is not optimized. +Optimization does, however, proceed with the rest of the input. +.PP +.DS +.ft CW +variable_declaration -> identifier {',' identifier} restriction ';' . + +restriction -> '{' anything '}' . +.ft R +.DE +A variable declaration declares one or more string variables +that may be used in the patterns and in the replacements. +If a variable is used as part of an operand description in +a pattern, the entire pattern can only match if the +restriction evaluates to TRUE. +If the pattern does match, the variable is assigned the matching +part of the actual operand. +Variables that are not used in a pattern are initialized to +null-strings and may be assigned a value in the constraint-part of +the pattern. +.PP +The restriction must be a legal "C" expression. +It may not contain a closing bracket ('}'). +Inside the expression, the name VAL stands for the part of the actual +(matching) operand. +The expression may contain calls to procedures that are defined in the +user-routines section. +.DS +.ft CW +entry -> pattern '->' replacement ';' . + +pattern -> instruction_descr + { ':' instruction_descr } + constraint . + +replacement -> [ instruction_descr { ':' instruction_descr } ] . + +instruction_descr -> opcode + white + [ operand_descr { ',' operand_descr } ] . + +constraint -> '{' anything '}' . + +operand_descr -> [ string_constant ] + [ variable_name ] + [ string_constant ] . + +variable_name -> identifier . + +opcode -> anything . +.ft R +.DE +The symbol 'white' stands for white space (space or tab). +An opcode can be any string not containing the special +symbols ';', '{', '}', ':', ',', '->' or white space. +To be recognized, it must begin with a letter. +The opcode should either be a mnemonic of a target machine +instruction or it should be one of the keywords ANY and labdef. +ANY matches any actual opcode. labdef matches only label definitions. +.PP +If an operand description contains an identifier (as defined earlier), +it is checked if the identifier is the name of a declared variable. +This effects the semantics of the matching rules for the operand, +as described in section 2. +An operand may contain at most one such variable name. +.PP +The constraint must be a legal "C" expression, just as the operand restriction. +It may call user-defined procedures and use or change the value of +declared variables. +It may also use the string variable REST, +which contains the mnemonic of the first instruction of the +rest of the input. (REST is a null-string if this mnemonic can +not be determined). +.DS +.ft CW +user_routines -> anything . +.ft R +.DE +The remainder of the table consists of user-defined subroutines. diff --git a/doc/v7bugs.doc b/doc/v7bugs.doc new file mode 100644 index 0000000..d8525f2 --- /dev/null +++ b/doc/v7bugs.doc @@ -0,0 +1,303 @@ +.\" $Id: v7bugs.doc,v 1.5 1994/06/24 10:02:36 ceriel Exp $ +.wh 0 hd +.wh 60 fo +.de hd +'sp 5 +.. +.de fo +'bp +.. +.nr e 0 1 +.de ER +.br +.ne 20 +.sp 2 +.in 5n +.ti -5n +ERROR \\n+e: +.. +.de PS +.sp +.nf +.in +5n +.. +.de PE +.sp +.fi +.in -5n +.. +.sp 3 +.ce +UNIX version 7 bugs +.sp 3 +This document describes the UNIX version 7 errors fixed at the +Vrije Universiteit, Amsterdam. +Several of these are discovered at the VU. +Others are quoted from a list of bugs distributed by BellLabs. +.sp +For each error the differences between the original and modified +source files are given, +as well as a test program. +.ER +C optimizer bug for unsigned comparison +.sp +The following C program caused an IOT trap, while it should not +(compile with 'cc -O prog.c'): +.PS +unsigned i = 0; + +main() { + register j; + + j = -1; + if (i > 40000) + abort(); +} +.PE +BellLabs suggests to make the following patch in c21.c: +.PS +/* modified /usr/src/cmd/c/c21.c */ + +189 if (r==0) { +190 /* next 2 lines replaced as indicated by +191 * Bell Labs bug distribution ( v7optbug ) +192 p->back->back->forw = p->forw; +193 p->forw->back = p->back->back; +194 End of lines changed */ +195 if (p->forw->op==CBR +196 || p->forw->op==SXT +197 || p->forw->op==CFCC) { +198 p->back->forw = p->forw; +199 p->forw->back = p->back; +200 } else { +201 p->back->back->forw = p->forw; +202 p->forw->back = p->back->back; +203 } +204 /* End of new lines */ +205 decref(p->ref); +206 p = p->back->back; +207 nchange++; +208 } else if (r>0) { +.PE +Use the previous program to test before and after the modification. +.ER +The loader fails for large data or text portions +.sp +The loader 'ld' produces a "local symbol botch" error +for the following C program. +.PS +int big1[10000] = { + 1 +}; +int big2[10000] = { + 2 +}; + +main() { + printf("loader is fine\\n"); +} +.PE +We have made the following fix: +.PS +/* original /usr/src/cmd/ld.c */ + +113 struct { +114 int fmagic; +115 int tsize; +116 int dsize; +117 int bsize; +118 int ssize; +119 int entry; +120 int pad; +121 int relflg; +122 } filhdr; + +/* modified /usr/src/cmd/ld.c */ + +113 /* +114 * The original Version 7 loader had problems loading large +115 * text or data portions. +116 * Why not include ??? +117 * then they would be declared unsigned +118 */ +119 struct { +120 int fmagic; +121 unsigned tsize; /* not int !!! */ +122 unsigned dsize; /* not int !!! */ +123 unsigned bsize; /* not int !!! */ +124 unsigned ssize; /* not int !!! */ +125 unsigned entry; /* not int !!! */ +126 unsigned pad; /* not int !!! */ +127 unsigned relflg; /* not int !!! */ +128 } filhdr; +.PE +.ER +Floating point registers +.sp +When a program is swapped to disk if it needs more memory, +then the floating point registers were not saved, so that +it may have different registers when it is restarted. +A small assembly program demonstrates this for the status register. +If the error is not fixed, then the program generates an IOT error. +A "memory fault" is generated if all is fine. +.PS +start: ldfps $7400 +1: stfps r0 + mov r0,-(sp) + cmp r0,$7400 + beq 1b + 4 +.PE +Some digging into the kernel is required to fix it. +The following patch will do: +.PS +/* original /usr/sys/sys/slp.c */ + +563 a2 = malloc(coremap, newsize); +564 if(a2 == NULL) { +565 xswap(p, 1, n); +566 p->p_flag |= SSWAP; +567 qswtch(); +568 /* no return */ +569 } + +/* modified /usr/sys/sys/slp.c */ + +590 a2 = malloc(coremap, newsize); +591 if(a2 == NULL) { +592 #ifdef FPBUG +593 /* +594 * copy floating point register and status, +595 * but only if you must switch processes +596 */ +597 if(u.u_fpsaved == 0) { +598 savfp(&u.u_fps); +599 u.u_fpsaved = 1; +600 } +601 #endif +602 xswap(p, 1, n); +603 p->p_flag |= SSWAP; +604 qswtch(); +605 /* no return */ +606 } +.PE +.ER +Floating point registers. +.sp +A similar problem arises when a process forks. +The child will have random floating point registers as is +demonstrated by the following assembly language program. +The child process will die by an IOT trap and the father prints +the message "child failed". +.PS +exit = 1. +fork = 2. +write = 4. +wait = 7. + +start: ldfps $7400 + sys fork + br child + sys wait + tst r1 + bne bad + stfps r2 + cmp r2,$7400 + beq start + 4 +child: stfps r2 + cmp r2,$7400 + beq ex + 4 +bad: clr r0 + sys write;mess;13. +ex: clr r0 + sys exit + + .data +mess: +.PE +The same file slp.c should be patched as follows: +.PS +/* original /usr/sys/sys/slp.c */ + +499 /* +500 * When the resume is executed for the new process, +501 * here's where it will resume. +502 */ +503 if (save(u.u_ssav)) { +504 sureg(); +505 return(1); +506 } +507 a2 = malloc(coremap, n); +508 /* +509 * If there is not enough core for the +510 * new process, swap out the current process to generate the +511 * copy. +512 */ + +/* modified /usr/sys/sys/slp.c */ + +519 /* +520 * When the resume is executed for the new process, +521 * here's where it will resume. +522 */ +523 if (save(u.u_ssav)) { +524 sureg(); +525 return(1); +526 } +527 #ifdef FPBUG +528 /* copy the floating point registers and status to child */ +529 if(u.u_fpsaved == 0) { +530 savfp(&u.u_fps); +531 u.u_fpsaved = 1; +532 } +533 #endif +534 a2 = malloc(coremap, n); +535 /* +536 * If there is not enough core for the +537 * new process, swap out the current process to generate the +538 * copy. +539 */ +.PE +.ER +/usr/src/libc/v6/stat.c +.sp +Some system calls are changed from version 6 to version 7. +A library of system call entries, that make a version 6 UNIX look like +a version 7 system, is provided to run some +useful version 7 utilities, like 'tar', on UNIX-6. +The entry for 'stat' contained two bugs: +the 24-bit file size was incorrectly converted to 32 bits +(sign extension of bit 15) +and the uid/gid fields suffered from sign extension. +.sp +Transferring files from version 6 to version 7 using 'tar' +will fail for all files for which +.sp + ( (size & 0100000) != 0 ) +.sp +These two errors are fixed if stat.c is modified as follows: +.PS +/* original /usr/src/libc/v6/stat.c */ + +11 char os_size0; +12 short os_size1; +13 short os_addr[8]; + +49 buf->st_nlink = osbuf.os_nlinks; +50 buf->st_uid = osbuf.os_uid; +51 buf->st_gid = osbuf.os_gid; +52 buf->st_rdev = 0; + +/* modified /usr/src/libc/v6/stat.c */ + +11 char os_size0; +12 unsigned os_size1; +13 short os_addr[8]; + +49 buf->st_nlink = osbuf.os_nlinks; +50 buf->st_uid = osbuf.os_uid & 0377; +51 buf->st_gid = osbuf.os_gid & 0377; +52 buf->st_rdev = 0; +.PE diff --git a/doc/val.doc b/doc/val.doc new file mode 100644 index 0000000..14c5135 --- /dev/null +++ b/doc/val.doc @@ -0,0 +1,753 @@ +.\" $Id: val.doc,v 1.5 1994/06/24 10:02:39 ceriel Exp $ +.ll 72n +.wh 0 hd +.wh 60 fo +.de hd +'sp 5 +.. +.de fo +'bp +.. +.tr ~ +. PARAGRAPH +.de PP +.sp +.. +. CHAPTER +.de CH +.br +.ne 15 +.sp 3 +.in 0 +\\fB\\$1\\fR +.in 5 +.PP +.. +. SUBCHAPTER +.de SH +.br +.ne 10 +.sp +.in 5 +\\fB\\$1\\fR +.in 10 +.PP +.. +. INDENT START +.de IS +.sp +.in +5 +.. +. INDENT END +.de IE +.in -5 +.sp +.. +. DOUBLE INDENT START +.de DS +.sp +.in +5 +.ll -5 +.. +. DOUBLE INDENT END +.de DE +.ll +5 +.in -5 +.sp +.. +. EQUATION START +.de EQ +.sp +.nf +.. +. EQUATION END +.de EN +.fi +.sp +.. +. TEST +.de TT +.ti -5 +Test~\\$1:~ +.br +.. +. IMPLEMENTATION 1 +.de I1 +.br +Implementation~1: +.. +. IMPLEMENTATION 2 +.de I2 +.br +Implementation~2: +.. +.de CS +.br +~-~\\ +.. +.br +.fi +.sp 5 +.ce +\fBPascal Validation Suite Report\fR +.CH "Pascal processor identification" +The ACK-Pascal compiler produces code for an EM machine +as defined in [1]. +It is up to the implementor of the EM machine whether errors like +integer overflow, undefined operand and range bound error are recognized or not. +Therefore it depends on the EM machine implementation whether these errors +are recognized in Pascal programs or not. +The validation suite results of all known implementations are given. +.PP +There does not (yet) exist a hardware EM machine. +Therefore, EM programs must be interpreted, or translated into +instructions for a target machine. +The following implementations currently exist: +.IS +.I1 +an interpreter running on a PDP-11 (using UNIX). +The normal mode of operation for this interpreter is to check +for undefined integers, overflow, range errors etc. +.sp +.I2 +a translator into PDP-11 instructions (using UNIX). +Less checks are performed than in the interpreter, because the translator +is intended to speed up the execution of well-debugged programs. +.IE +.CH "Test Conditions" +Tester: E.G. Keizer +.br +Date: October 1983 +.br +Validation Suite version: 3.0 +.PP +The final test run is made with a slightly +modified validation suite. +.SH "Erroneous programs" +Some test did not conform to the standard proposal of February 1979. +It is this version of the standard proposal that is used +by the authors of the validation suite. +.IS +.TT 6.6.3.7-4 +The semicolon between high and integer on line 17 is replaced +by a colon. +.sp +.TT 6.7.2.2-13 +The div operator on line 14 replaced by mod. +.CH "Conformance tests" +Number of tests passed = 150 +.br +Number of tests failed = 6 +.SH "Details of failed tests" +.IS +.TT 6.1.2-1 +Character sequences starting with the 8 characters 'procedur' +or 'function' are +erroneously classified as the word-symbols 'procedure' and 'function'. +.sp +.TT 6.1.3-2 +Identifiers identical in the first eight characters, but +differing in ninth or higher numbered characters are treated as +identical. +.sp +.TT 6.5.1-1 +ACK-Pascal requires all formal program parameters to be +declared with type \fIfile\fP. +.sp +.TT 6.6.6.5-1 +Gives run-time error eof seen at call to eoln. +A have a hunch that this is a error in the suit. +.sp +.TT 6.6.4.1-1 +Redefining the names of some standard procedures leads to incorrect +behaviour of the runtime system. +In this case it crashes without a sensible error message. +.sp +.TT 6.9.3.5.1-1 +This test can not be translated by our compiler because two +non-identical variables are used in the same block with the same first eight +characters. +The test passed after replacement of one of those names. +.IE +.CH "Deviance tests" +Number of deviations correctly detected = 120 +.br +Number of tests not detecting deviations = 20 +.SH "Details of deviations" +The following tests are compiled without a proper error +indication although they do +not conform to the standard. +.IS +.TT 6.1.6-5 +ACK-Pascal allows labels in the range 0..32767. +A warning is produced when testing for deviations from the +standard. +.sp +.TT 6.1.8-5 +A missing space between a number and a word symbol is not +detected. +.sp +.TT 6.2.2-8 +.TT 6.3-6 +.TT 6.4.1-3 +.TT 6.6.1-3 +.TT 6.6.1-4 +Undetected scope error. The scope of an identifier should start at the +beginning of the block in which it is declared. +In the ACK-Pascal compiler the scope starts just after the declaration, +however. +.sp +.TT 6.4.3.3-7 +The values of fields from one variant are accessible from +another variant. +The correlation is exact. +.sp +.TT 6.6.3.3-4 +The passing as a variable parameter of the selector of a +variant part is not detected. +A runtime error is produced because the variant selector is not +initialized. +.sp +.TT 6.8.2.4-2 +.TT 6.8.2.4-3 +.TT 6.8.2.4-4 +.TT 6.8.2.4-5 +.TT 6.8.2.4-6 +The ACK-Pascal compiler does not restrict the places from where +a jump to a label by means of a goto-statement is allowed. +.sp +.TT 6.8.3.9-5 +.TT 6.8.3.9-6 +.TT 6.8.3.9-7 +.TT 6.8.3.9-16 +There are no errors produced for assignments to a variable +in use as control-variable of a for-statement. +.TT 6.8.3.9-8 +.TT 6.8.3.9-9 +Use of a controlled variable after leaving the loop without +intervening initialization is not detected. +.IE +.CH "Error handling" +The results depend on the EM implementation. +.sp +Number of errors correctly detected = +.in +5 +.I1 +32 +.I2 +17 +.in -5 +Number of errors not detected = +.in +5 +.I1 +21 +.I2 +36 +.in -5 +Number of errors incorrectly detected = +.in +5 +.I1 +2 +.I2 +2 +.in -5 +.SH "Details of errors not detected" +The following test fails because the ACK-Pascal compiler only +generates a warning that does not prevent to run the tests. +.IS +.TT 6.6.2-8 +A warning is produced if there is no assignment to a function-identifier. +.IE +With this test the ACK-Pascal compiler issues an error message for a legal +construct not directly related to the error to be detected. +.IS +.TT 6.5.5-2 +Program does not compile. +Buffer variable of text file is not allowed as variable +parameter. +.IE +The following errors are not detected at all. +.IS +.TT 6.2.1-11 +.I2 +The use of an undefined integer is not caught as an error. +.sp +.TT 6.4.3.3-10 +.TT 6.4.3.3-11 +.TT 6.4.3.3-12 +.TT 6.4.3.3-13 +The notion of 'current variant' is not implemented, not even if a tagfield +is present. +.sp +.TT 6.4.5-15 +.TT 6.4.6-9 +.TT 6.4.6-10 +.TT 6.4.6-11 +.TT 6.5.3.2-2 +.I2 +Subrange bounds are not checked. +.sp +.TT 6.4.6-12 +.TT 6.4.6-13 +.TT 6.7.2.4-4 +If the base-type of a set is a subrange, then the set elements are not checked +against the bounds of the subrange. +Only the host-type of this subrange-type is relevant for ACK-Pascal. +.sp +.TT 6.5.4-1 +.I2 +Nil pointers are not detected. +.sp +.TT 6.5.4-2 +.I2 +Undefined pointers are not detected. +.sp +.TT 6.5.5-3 +Changing the file position while the window is in use as actual variable +parameter or as an element of the record variable list of a with-statement +is not detected. +.sp +.TT 6.6.2-9 +An undefined function result is not detected, +because it is never used in an expression. +.sp +.TT 6.6.5.3-6 +.TT 6.6.5.3-7 +Disposing a variable while it is in use as actual variable parameter or +as an element of the record variable list of a with-statement is not detected. +.sp +.TT 6.6.5.3-8 +.TT 6.6.5.3-9 +.TT 6.6.5.3-10 +It is not detected that a record variable, created with the variant form +of new, is used as an operand in an expression or as the variable in an +assignment or as an actual value parameter. +.sp +.TT 6.6.5.3-11 +Use of a variable that is not reinitialized after a dispose is +not detected. +.sp +.TT 6.6.6.4-4 +.TT 6.6.6.4-5 +.TT 6.6.6.4-7 +.I2 +There are no range checks for pred, succ and chr. +.sp +.TT 6.6.6.5-6 +ACK-Pascal considers a rewrite of a file as a defining +occurence. +.sp +.TT 6.7.2.2-8 +.TT 6.7.2.2-9 +.TT 6.7.2.2-10 +.TT 6.7.2.2-12 +.I2 +Division by 0 or integer overflow is not detected. +.sp +.TT 6.8.3.9-18 +The use of the some control variable in two nested for +statements in not detected. +.sp +.TT 6.8.3.9-19 +Access of a control variable after leaving the loop results in +the final-value, although an error should be produced. +.sp +.TT 6.9.3.2-3 +The program stops with a file not open error. +The rewrite before the write is missing in the program. +.sp +.TT 6.9.3.2-4 +.TT 6.9.3.2-5 +Illegal FracDigits values are not detected. +.CH "Implementation dependence" +Number of tests run = 14 +.br +Number of tests incorrectly handled = 0 +.SH "Details of implementation dependence" +.IS +.TT 6.1.9-5 +Alternate comment delimiters are implemented +.sp +.TT 6.1.9-6 +The equivalent symbols @ for ^, (. for [ and .) for ] are not +implemented. +.sp +.TT 6.4.2.2-10 +Maxint = 32767 +.sp +.TT 6.4.3.4-5 +Only elements with non-negative ordinal value are allowed in sets. +.sp +.TT 6.6.6.1-1 +Standard procedures and functions are not allowed as parameters. +.sp +.TT 6.6.6.2-11 +Details of the machine characteristics regarding real numbers: +.IS +.nf +beta = 2 +t = 56 +rnd = 1 +ngrd = 0 +machep = -56 +negep = -56 +iexp = 8 +minexp = -128 +maxexp = 127 +eps = 1.387779e-17 +epsneg = 1.387779e-17 +xmin = 2.938736e-39 +xmax = 1.701412e+38 +.fi +.IE +.sp +.TT 6.7.2.3-3 +.TT 6.7.2.3-4 +All operands of boolean expressions are evaluated. +.sp +.TT 6.8.2.2-1 +.TT 6.8.2.2-2 +The expression in an assignment statement is evaluated +before the variable selection if this involves pointer +dereferencing or array indexing. +.sp +.TT 6.8.2.3-2 +Actual parameters are evaluated in reverse order. +.sp +.TT 6.9.3.2-6 +The default width for integer, Boolean and real are 6, 5 and 13. +.sp +.TT 6.9.3.5.1-2 +The number of digits written in an exponent is 2. +.sp +.TT 6.9.3.6-1 +The representations of true and false are (~true) and (false). +The parenthesis serve to indicate width. +.IE +.CH "Quality measurement" +Number of tests run = 60 +.br +Number of tests handled incorrectly = 1 +.SH "Results of tests" +Several test perform operations on reals on indicate the error +introduced by these operations. +For each of these tests the following two quality measures are extracted: +.sp +.in +5 +maxRE:~~maximum relative error +.br +rmsRE:~~root-mean-square relative error +.in -5 +.sp 2 +.IS +.TT 1.2-1 +.I1 +25 thousand Whetstone instructions per second. +.I2 +169 thousand Whetstone instructions per second. +.sp +.TT 1.2-2 +The value of (TRUEACC-ACC)*2^56/100000 is 1.4 . +This is well within the bounds specified in [3]. +.br +The GAMM measure is: +.I1 +238 microseconds +.I2 +26.3 microseconds. +.sp +.TT 1.2-3 +The number of procedure calls calculated in this test exceeds +the maximum integer value. +The program stops indicating overflow. +.sp +.TT 6.1.3-3 +The number of significant characters for identifiers is 8. +.sp +.TT 6.1.5-8 +There is no maximum to the line length. +.sp +.TT 6.1.5-9 +The error message "too many digits" is given for numbers larger +than maxint. +.sp +.TT 6.1.5-10 +.TT 6.1.5-11 +.TT 6.1.5-12 +Normal values are allowed for real constants and variables. +.sp +.TT 6.1.7-14 +A reasonably large number of strings is allowed. +.sp +.TT 6.1.8-6 +No warning is given for possibly unclosed comments. +.sp +.TT 6.2.1-12 +.TT 6.2.1-13 +.TT 6.2.1-14 +.TT 6.2.1-15 +.TT 6.5.1-2 +Large lists of declarations are possible in each block. +.sp +.TT 6.4.3.2-6 +An 'array[integer] of' is not allowed. +.sp +.TT 6.4.3.2-7 +.TT 6.4.3.2-8 +Large values are allowed for arrays and indices. +.sp +.TT 6.4.3.3-14 +Large amounts of case-constant values are allowed in variants. +.sp +.TT 6.4.3.3-15 +Large amounts of record sections can appear in the fixed part of +a record. +.sp +.TT 6.4.3.3-16 +Large amounts of variants are allowed in a record. +.TT 6.4.3.4-4 +Size and speed of Warshall's algorithm depend on the +implementation of EM: +.IS +.I1 +.br +size: 122 bytes +.br +speed: 5.2 seconds +.sp +.I2 +.br +size: 196 bytes +.br +speed: 0.7 seconds +.IE +.TT 6.5.3.2-3 +Deep nesting of array indices is allowed. +.sp +.TT 6.5.3.2-4 +.TT 6.5.3.2-5 +Arrays can have at least 8 dimensions. +.sp +.TT 6.6.1-8 +Deep static nesting of procedure is allowed. +.sp +.TT 6.6.3.1-6 +Large amounts of formal parameters are allowed. +.sp +.TT 6.6.5.3-12 +Dispose is fully implemented. +.sp +.TT 6.6.6.2-6 +Test sqrt(x): no errors. +The error is within acceptable bounds. +.in +5 +maxRE:~~2~**~-55.50 +.br +rmsRE:~~2~**~-57.53 +.in -5 +.sp +.TT 6.6.6.2-7 +Test arctan(x): may cause underflow or overflow errors. +The error is within acceptable bounds. +.in +5 +.br +maxRE:~~2~**~-55.00 +.br +rmsRE:~~2~**~-56.36 +.in -5 +.sp +.TT 6.6.6.2-8 +Test exp(x): may cause underflow or overflow errors. +The error is not within acceptable bounds. +.in +5 +maxRE:~~2~**~-50.03 +.br +rmsRE:~~2~**~-51.03 +.in -5 +.sp +.TT 6.6.6.2-9 +Test sin(x): may cause underflow errors. +The error is not within acceptable bounds. +.in +5 +maxRE:~~2~**~-38.20 +.br +rmsRE:~~2~**~-43.68 +.in -5 +.sp +Test cos(x): may cause underflow errors. +The error is not within acceptable bounds. +.in +5 +maxRE:~~2~**~-41.33 +.br +rmsRE:~~2~**~-46.62 +.in -5 +.sp +.TT 6.6.6.2-10 +Test ln(x): +The error is not within acceptable bounds. +.in +5 +maxRE:~~2~**~-54.05 +.br +rmsRE:~~2~**~-55.77 +.in -5 +.sp +.TT 6.7.1-3 +.TT 6.7.1-4 +.TT 6.7.1-5 +Complex nested expressions are allowed. +.sp +.TT 6.7.2.2-14 +Test real division: +The error is within acceptable bounds. +.in +5 +maxRE:~~0 +.br +rmsRE:~~0 +.in -5 +.sp +.TT 6.7.2.2-15 +Operations of reals in the integer range are exact. +.sp +.TT 6.7.3-1 +.TT 6.8.3.2-1 +.TT 6.8.3.4-2 +.TT 6.8.3.5-15 +.TT 6.8.3.7-4 +.TT 6.8.3.8-3 +.TT 6.8.3.9-20 +.TT 6.8.3.10-7 +Static deep nesting of function calls, +compound statements, if statements, case statements, repeat +loops, while loops, for loops and with statements is possible. +.sp +.TT 6.8.3.2-2 +Large amounts of statements are allowed in a compound +statement. +.sp +.TT 6.8.3.5-12 +The compiler requires case constants to be compatible with +the case selector. +.sp +.TT 6.8.3.5-13 +.TT 6.8.3.5-14 +Large case statements are possible. +.sp +.TT 6.9-2 +Recursive IO on the same file is well-behaved. +.sp +.TT 6.9.1-6 +The reading of real values from a text file is done with +sufficient accuracy. +.in +5 +maxRE:~~2~**~-54.61 +.br +rmsRE:~~2~**~-56.32 +.in -5 +.sp +.TT 6.9.1-7 +.TT 6.9.2-2 +.TT 6.9.3-3 +.TT 6.9.4-2 +Read, readln, write and writeln may have large amounts of +parameters. +.sp +.TT 6.9.1-8 +The loss of precision for reals written on a text file and read +back is: +.in +5 +maxRE:~~2~**~-53.95 +.br +rmsRE:~~2~**~-55.90 +.in -5 +.sp +.TT 6.9.3-2 +File IO buffers without trailing marker are correctly flushed. +.sp +.TT 6.9.3.5.2-2 +Reals are written with sufficient accuracy. +.in +5 +maxRE:~~0 +.br +rmsRE:~~0 +.in -5 +.IE +.CH "Level 1 conformance tests" +Number of test passed = 4 +.br +Number of tests failed = 1 +.SH "Details of failed tests" +.IS +.TT 6.6.3.7-4 +An expression indicated by parenthesis whose +value is a conformant array is not allowed. +.IE +.CH "Level 1 deviance tests" +Number of deviations correctly detected = 4 +.br +Number of tests not detecting deviations = 0 +.IE +.CH "Level 1 error handling" +The results depend on the EM implementation. +.sp +Number of errors correctly detected = +.in +5 +.I1 +1 +.I2 +0 +.in -5 +Number of errors not detected = +.in +5 +.I1 +0 +.I2 +1 +.in -5 +.SH "Details of errors not detected" +.IS +.TT 6.6.3.7-9 +.I2 +Subrange bounds are not checked. +.IE +.CH "Level 1 quality measurement" +Number of tests run = 1 +.SH "Results of test" +.IS +.TT 6.6.3.7-10 +Large conformant arrays are allowed. +.IE +.CH "Extensions" +Number of tests run = 3 +.SH Details of test failed +.IS +.TT 6.1.9-7 +The alternative relational operators are not allowed. +.sp +.TT 6.1.9-8 +The alternative symbols for colon, semicolon and assignment are +not allowed. +.sp +.TT 6.8.3.5-16 +The otherwise selector in case statements is not allowed. +.IE +.CH "References" +.ti -5 +[1]~~\ +A.S.Tanenbaum, E.G.Keizer, J.W.Stevenson, Hans van Staveren, +"Description of a machine architecture for use with block structured +languages", +Informatica rapport IR-81. +.ti -5 +[2]~~\ +ISO standard proposal ISO/TC97/SC5-N462, dated February 1979. +The same proposal, in slightly modified form, can be found in: +A.M.Addyman e.a., "A draft description of Pascal", +Software, practice and experience, May 1979. +An improved version, received March 1980, +is followed as much as possible for the +current ACK-Pascal. +.ti -5 +[3]~~\ +B. A. Wichman and J du Croz, +A program to calculate the GAMM measure, Computer Journal, +November 1979. diff --git a/doc/z80.doc b/doc/z80.doc new file mode 100644 index 0000000..c6d41dc --- /dev/null +++ b/doc/z80.doc @@ -0,0 +1,80 @@ +. \" $Id: z80.doc,v 2.6 1994/06/24 10:02:42 ceriel Exp $ +.ND April 1985 +.TL +THE Z80 BACK END TABLE +.AU +Frans van Haarlem +.NH 1 +INTRODUCTION +.PP +This table was written to make it run, not to make it clever! +The effect is, that the table written for the intel 8080, +which was made very clever runs faster and requiers less space!! +So, for anyone to run programs on a z80 machine: +n attempt could be made to make this table as clever as the one for the i80, +or the i80 table could be used, for that can run on every z80 too. +.NH +IMPLEMENTATION +.PP +It will not be possible to run the entire Amsterdam Compiler Kit on a +Z80-based computer system. +One has to write a program on another +system, a system where the compiler kit runs on. +This program may be a mixture of high-level languages, such as +C or Pascal, EM and z80 assembly code. +The program should be compiled using the compiler kit, +producing z80 machine code. +This code should come available to the z80 machine +for example by downloading or +by storing it in ROM (Read Only Memory). +Depending on the characteristics of the particular z80 based system, some +adaptions have to be made: +.IP 1) +In \fIhead_em\fP: the base address, which is the address where the first +z80 instruction will be stored, and the initial value of the +stackpointer are set to 0x1000 and 0x7ffe respectivally. +The latter because it could run on a 32K machine as well. +Other systems require other values. +.IP 2) +In \fIhead_em\fP: before calling "__m_a_i_n", the environment +pointer, argument vector and argument count will have to be pushed +onto the stack. +Since this back-end is tested on a system without any knowledge +of these things, dummies are pushed now. +.IP 3) +In \fItail_em\fP: proper routines "putchar" and "getchar" should +be provided. +They should write resp. read a character on/from the monitor. +Maybe some conversions will have to be made. +The ones for the Nascom and Hermac z80 micro's are to be found +in the EM-library. +.IP 4) +In \fIhead_em\fP: an application program returns control to the monitor by +jumping to address 0x20. +Thie may have to be changed on different systems. +For an CPM-machine for example this should be 0x5, to provide a warm boot. +.IP 5) +In \fItail_em\fP: the current version of the z80 back-end has very limited I/O +capabilities, because it was tested on a system that +had no knowlegde of files. +So the implementation of the EM-instruction \fImon\fP is very simple; +it can only do the following things: +.DS + Monitor call 1: + Exit + Monitor call 3: + read, always reads from the monitor. + echos the read character. + ignores file descriptor. + Monitor call 4: + write, always writes on the monitor. + ignores file descriptor. + Monitor call 5: + open file, returns file descriptor -1. + Monitor call 6: + close file, returns error code = 0. + Monitor call 54: + io-control, returns error code = 0. +.DE +If the system should do file-handling the routine ".mon" +should be extended thoroughly. diff --git a/emtest/Makefile b/emtest/Makefile new file mode 100644 index 0000000..a3ea72f --- /dev/null +++ b/emtest/Makefile @@ -0,0 +1,20 @@ +ACK=ack +tested: last + set -x ;\ + for i in `awk '{for(i=\$$1;i<=151;i++)print i}' last ` ;\ + do \ + echo $$i; \ + echo $$i >last; \ + select $$i tests > test.e; \ + $(ACK) test.e; \ + a.out \ + : ok; \ + done + rm -f test.e a.out + >tested + +last: tests test.h select + echo 0 >last + +select: select.c + $(CC) -O -o select select.c diff --git a/emtest/READ_ME b/emtest/READ_ME new file mode 100644 index 0000000..021ae77 --- /dev/null +++ b/emtest/READ_ME @@ -0,0 +1,136 @@ +This directory contains test programs for EM implementations. +The test programs are all part of the file "tests". +Each individual test program looks like: + + TEST 004: test ... + ... ; data declarations etc. + MAIN nlocal + ... ; part of the body of MAIN + PROC + ... ; subroutines used by this test + +The PROC part is optional, so the smallest test program looks like: + + TEST 000: null test + MAIN 0 + +The keywords used by "select", like TEST, MAIN, PROC, HOL, OK and ERRLAB, +all consist of upper case letters and start in column one. +A convention for test numbers is to use 3 digit numbers, possibly left +padded with zero's. + +A program, called "select", is provided to combine a range of tests +into a single test program. +"Select" expects a range as argument, like 0-127, or -127, or 0-. +Tests that have a TEST number in that range are included. +"Select" also expects the file from which the tests should +be selected as an argument. +If no argument is given, or only a range argument, select expects +the tests to slect from on standard input. + +To prevent name clashes, some rules must be obeyed: + - data label names, procedure names and instruction label numbers + must be unique over all tests. A good habit is to use the + three digit test number as suffix. + - only keyword of "select" may start with uppercase letters in column + one, to allow for expansion in the future. + - because only a single 'hol' pseudo is allowed, "select" must + generate the 'hol' pseudo. An individual test may request + some 'hol' space by a special HOL line, starting in column one + and followed by a single number, the number of bytes needed. + This number must consists of digits only, no constant symbols, + because "select" must compute the maximum, so before the + preprocessor has replaced the constant symbols by their values. + - a similar problem is caused by the number of bytes of local + storage for 'main'. An individual test may specify the number + of bytes it needs as parameter to the MAIN line. + Again, the number must consist of digits only. + +Test programs print a sequence of integers greater than 1. +This sequence is terminated by the number 1 as soon as an error is detected. +If all tests are performed correctedly the number 0 is printed. + +To allow test programs to print integers without the full machinery of +conversion and i/o routines, the EM instruction 'nop' is used. +Each time this instruction is executed, the current line number as +maintained by the 'lin' instruction must be printed, followed by a +newline, at least during debugging. + +The following abbrevation may be used in test programs: + + OK -> lin n + nop + +Numbers are automatically assigned in order of static appearance. +As soon as an error is detected you must branch to label 1, by instructions +like 'bra *1' and 'zne *1'. +Label 1 is automatically provided in the main routine. +If you jump to label 1 in a subroutine, then that subroutine must +end with ERRLAB, like in: + + PROC + pro $test,0 + ... + bra *1 + ... + ret 0 + ERRLAB + end + +An option to "select" is to generate 'fil' instructions whenever a +new test starts. +This is useful if 'nop' prints the 'fil' string as well as the 'lin' number. +This 'f' option is on by default, off if a '-f' flag is given. + +The EM file generated by "select" includes "test.h". +"test.h" may contain definitions of the following symbols: + W2S: the size of double precision integers, if implemented. + FS: the size of single precision floats, if implemented. + F2S: the size of double precision floats, if implemented. +The value of these symbols, if defined, must be the size of the object involved. + +Two other symbols are used: + EM_PSIZE: pointer size + EM_WSIZE: word size +The machine dependent translation program, like 8086 and vax2, give +definitions of these symbols while calling the EM encode program. +Because these size names occur quite often, they may be abbreviated: + WS -> EM_WSIZE + PS -> EM_PSIZE + +Before running the tests in the file "tests", it is wise to test +the necessary basic functions with some simple tests like + + TEST 000: null + MAIN 0 +and + TEST 001: ok + MAIN 0 + OK +and + TEST 998: error + MAIN 0 + bra *1 +and + TEST 999: test lni + MAIN 0 + lin 1 + lni + loe 0 + loc 2 + bne *1 + OK +The first two of these are part of "tests" as well. The last two are +not included in "tests" intensionally, because they would fail. +The last tests fails because it references the ABS block which is +inaccessable after an 'hol' pseudo. +Proceed as follows for each of these basic tests: + - make a file called 'basic' containing the test + - run select: + select basic >basic.e + - compile by + machine basic.e + - and load and run + + where machine should be replaced by the name of program + used to compile EM programs for the current machine. diff --git a/emtest/ok b/emtest/ok new file mode 100755 index 0000000..1eca33b --- /dev/null +++ b/emtest/ok @@ -0,0 +1,10 @@ +trap "" 1 2 + +while read x +do + case $x in + 0) exit 0;; + bad) exit 1;; + esac +done +exit 1 diff --git a/emtest/select.c b/emtest/select.c new file mode 100644 index 0000000..fec581a --- /dev/null +++ b/emtest/select.c @@ -0,0 +1,256 @@ +/* $Id: select.c,v 1.7 1994/06/24 10:06:45 ceriel Exp $ */ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + * + */ + +#include +#include +#include + +#define LINSIZ 100 + +int sigs[] = { + SIGHUP, + SIGINT, + SIGQUIT, + SIGTERM, + 0 +}; + +char *prog; +char line[LINSIZ]; +int nlocals = 0; +int nhol = 0; +int nerrors = 0; +int oknum = 2; +int fflag = 1; +int low = 0; +int high = 999; + +FILE *file1; +FILE *file2; +FILE *file3; +char name1[] = "/usr/tmp/f1XXXXXX"; +char name2[] = "/usr/tmp/f2XXXXXX"; +char name3[] = "/usr/tmp/f3XXXXXX"; + +char *to3dig(); + +stop() { + unlink(name1); + unlink(name2); + unlink(name3); + exit(nerrors); +} + +main(argc,argv) char **argv; { + register *p; + register char *s; + + prog = *argv++; --argc; + mktemp(name1); + mktemp(name2); + mktemp(name3); + for (p = sigs; *p; p++) + if (signal(*p, stop) == SIG_IGN) + signal(*p, SIG_IGN); + while (argc > 0 && argv[0][0] == '-') { + switch (argv[0][1]) { + case 'f': + fflag ^= 1; + break; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + high = atoi(&argv[0][1]); + break; + default: + usage(); + break; + } + argc--; + argv++; + } + if (argc > 0 && argv[0][0] >= '0' && argv[0][0] <= '9') { + s = argv[0]; + do + low = low*10 + *s++ - '0'; + while (*s >= '0' && *s <= '9'); + if (*s == 0) + high = low; + else if (*s++ == '-') { + high = atoi(s); + if (high == 0) + high = 999; + } else + fatal("bad range %s", argv[0]); + argc--; + argv++; + } + if (argc > 1) + usage(); + if (argc == 1 && freopen(argv[0], "r", stdin) == NULL) + fatal("cannot open %s", argv[0]); + if ((file1 = fopen(name1, "w")) == NULL) + fatal("cannot create %s", name1); + if ((file2 = fopen(name2, "w")) == NULL) + fatal("cannot create %s", name2); + if ((file3 = fopen(name3, "w")) == NULL) + fatal("cannot create %s", name3); + if (getline()) + while (select()) + ; + fclose(file1); + fclose(file2); + fclose(file3); + combine(); + stop(); +} + +select() { + register FILE *f; + int i; + + if (sscanf(line, "TEST %d", &i) != 1) + fatal("bad test identification(%s)", line); + if (i < low || i > high) { + while (getline()) + if (line[0] == 'T') + return(1); + return(0); + } + fprintf(file2, "; %s\n", line); + if (fflag) { + char *s = to3dig(i); + fprintf(file1, ".%s\n", s); + fprintf(file1, " con \"tst%s\"\n", s); + fprintf(file2, " fil .%s\n", s); + } + f = file1; + while (getline()) { + switch (line[0]) { + case 'T': + return(1); + case 'M': + if (sscanf(line, "MAIN%d", &i) != 1 || i%4 != 0) + break; + if (i > nlocals) + nlocals = i; + f = file2; + continue; + case 'P': + if (strcmp(line, "PROC") != 0) + break; + f = file3; + continue; + case 'H': + if (f != file1 || + sscanf(line, "HOL%d", &i) != 1 || + i%4 != 0) + break; + if (i > nhol) + nhol = i; + continue; + case 'O': + if (strcmp(line, "OK") != 0) + break; + fprintf(f, " lin %d\n nop\n", oknum++); + continue; + case 'E': + if (f != file3 || strcmp(line, "ERRLAB") != 0) + break; + fprintf(f, "1\n lin 1\n nop\n loc 1\n loc 1\n mon\n"); + continue; + default: + putline(f); + continue; + } + fatal("bad line (%s)", line); + } + return(0); +} + +combine() { + + printf("#define WS EM_WSIZE\n"); + printf("#define PS EM_PSIZE\n"); + printf("#include \"test.h\"\n"); + printf(" mes 2,WS,PS\n"); + printf(" mes 1\n"); + printf(" mes 4,300\n"); + if (nhol) + printf(" hol %d,0,0\n", nhol); + copy(name1); + printf(" exp $_m_a_i_n\n"); + printf(" pro $_m_a_i_n,%d\n", nlocals); + printf(" loc 123\n"); + printf(" loc -98\n"); + copy(name2); + printf(" loc -98\n"); + printf(" bne *1\n"); + printf(" loc 123\n"); + printf(" bne *1\n"); + printf(" lin 0\n"); + printf(" nop\n"); + printf(" loc 0\n"); + printf(" ret WS\n"); + printf("1\n"); + printf(" lin 1\n"); + printf(" nop\n"); + printf(" loc 1\n"); + printf(" ret WS\n"); + printf(" end\n"); + copy(name3); +} + +copy(s) char *s; { + if (freopen(s, "r", stdin) == NULL) + fatal("cannot reopen %s", s); + while (getline()) + putline(stdout); +} + +getline() { + register len; + + if (fgets(line, LINSIZ, stdin) == NULL) + return(0); + len = strlen(line); + if (line[len-1] != '\n') + fatal("line too long(%s)", line); + line[len-1] = 0; + return(1); +} + +putline(f) FILE *f; { + fprintf(f, "%s\n", line); +} + +fatal(s, a1, a2, a3, a4) char *s; { + fprintf(stderr, "%s: ", prog); + fprintf(stderr, s, a1, a2, a3, a4); + fprintf(stderr, " (fatal)\n"); + nerrors++; + stop(); +} + +usage() { + fprintf(stderr, "usage: %s -f [[low]-[high]] [testcollection]\n", prog); + nerrors++; + stop(); +} + +char * +to3dig(i) + register int i; +{ + static char buf[4]; + register char *s = buf; + + *s++ = (i % 1000) / 100 + '0'; + *s++ = (i % 100) / 10 + '0'; + *s++ = (i % 10) + '0'; + *s = '\0'; + return buf; +} diff --git a/emtest/test.h b/emtest/test.h new file mode 100644 index 0000000..9e1ea2d --- /dev/null +++ b/emtest/test.h @@ -0,0 +1,8 @@ +/* $Id: test.h,v 1.9 1994/06/24 10:06:48 ceriel Exp $ */ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/*#define W2S 4 /* double word size */ +/*#define FS 4 /* float size */ +/*#define F2S 8 /* double size */ diff --git a/emtest/tests b/emtest/tests new file mode 100644 index 0000000..68d558a --- /dev/null +++ b/emtest/tests @@ -0,0 +1,4597 @@ +TEST 000: empty +MAIN 0 +TEST 001: OK +MAIN 0 +OK +TEST 002: test loc, bne +MAIN 0 + loc 0 + loc 0 + bne *1 +OK + loc 1 + loc 1 + bne *1 +OK + loc -1 + loc -1 + bne *1 +OK + loc 127 + loc 127 + bne *1 +OK + loc -127 + loc -127 + bne *1 +OK + loc -128 + loc -128 + bne *1 +OK +#if WS > 1 + loc 255 + loc 255 + bne *1 +OK + loc 256 + loc 256 + bne *1 +OK + loc 257 + loc 257 + bne *1 +OK + loc 32767 + loc 32767 + bne *1 +OK + loc -32767 + loc -32767 + bne *1 +OK + loc -32768 + loc -32768 + bne *1 +OK +#endif /* WS > 1 */ +#if WS > 2 + loc 65535 + loc 65535 + bne *1 +OK + loc 65536 + loc 65536 + bne *1 +OK + loc 65537 + loc 65537 + bne *1 +OK + loc 2147483647 + loc 2147483647 + bne *1 +OK + loc -2147483647 + loc -2147483647 + bne *1 +OK + loc -2147483648 + loc -2147483648 + bne *1 +OK +#endif /* WS > 2 */ +TEST 003: test signed and unsigned loc +MAIN 0 +#if WS == 1 + loc 128 + loc -128 + bne *1 +OK + loc 129 + loc -127 + bne *1 +OK + loc 255 + loc -1 + bne *1 +OK +#endif +#if WS == 2 + loc 32768 + loc -32768 + bne *1 +OK + loc 32769 + loc -32767 + bne *1 +OK + loc 65535 + loc -1 + bne *1 +OK +#endif +#if WS == 4 + loc 2147483648 + loc -2147483648 + bne *1 +OK + loc 2147483649 + loc -2147483647 + bne *1 +OK + loc 4294967295 + loc -1 + bne *1 +OK +#endif +TEST 004: test lol, stl, del, inl, zrl +MAIN 8 + loc 95 + stl -4 + lol -4 + loc 95 + bne *1 +OK + lol 0 + stl -4 + loc 125 + stl -8 + loc 125 + lol -8 + bne *1 +OK + lol 0 + lol -4 + bne *1 +OK + zrl -4 + lol -4 + loc 0 + bne *1 +OK + inl -4 + lol -4 + loc 1 + bne *1 + loc -1 + stl -4 + inl -4 + lol -4 + loc 0 + bne *1 +OK + del -4 + lol -4 + loc -1 + bne *1 + loc 1 + stl -4 + del -4 + lol -4 + loc 0 + bne *1 +OK +TEST 005: test loe, ste, zre, ine, dee +HOL 8 +MAIN 0 + loc 95 + ste WS + loe WS + loc 95 + bne *1 +OK + loc 125 + ste 0 + loc 125 + loe 0 + bne *1 +OK + loc 95 + ste 0 + loe WS + loe 0 + bne *1 +OK + zre WS + loe WS + loc 0 + bne *1 +OK + ine WS + loe WS + loc 1 + bne *1 + loc -1 + ste WS + ine WS + loe WS + loc 0 + bne *1 +OK + dee WS + loe WS + loc -1 + bne *1 + loc 1 + ste WS + dee WS + loe WS + loc 0 + bne *1 +OK +TEST 006: test named global data: con, rom, bss +a006 + bss 4,0,0 +b006 + con 35 +c006 + bss 4,0,1 +d006 + bss 4,35,1 +e006 + rom 57 +MAIN 0 + loc 0 + loe c006 + bne *1 +OK + loe b006 + ste a006 + loe d006 + loe a006 + bne *1 +OK + loe e006 + ste b006 + loc 57 + loe b006 + bne *1 +OK +TEST 007: test lal, lil +MAIN 12 + lal -8 +#if WS == PS + stl -4 +#endif +#if WS*2 == PS + sdl -4 +#endif + loc 117 + stl -8 + lil -4 + loc 117 + bne *1 +OK +TEST 008: test sil +MAIN 12 + loc 29 + stl -8 + lal -8 +#if WS == PS + stl -4 +#endif +#if WS*2 == PS + sdl -4 +#endif + loc 110 + sil -4 + lol -8 + loc 110 + bne *1 +OK +TEST 009: test lof +MAIN 20 + lal -16 + loc 120 + stl -12 + lof 4 + loc 120 + bne *1 +OK +TEST 010: test stf +MAIN 20 + loc 180 + lal -16 + stf 8 + lol -8 + loc 180 + bne *1 +OK +TEST 011: test loi WS +MAIN 20 + loc 140 + stl -12 + lal -12 + loi WS + loc 140 + bne *1 +OK +TEST 012: test loi 1, sti 1 +MAIN 20 +#if WS > 1 + loc 519 + lal -10 + sti 1 + lal -10 + loi 1 + loc 7 + bne *1 + loc 129 + lal -10 + sti 1 + lal -10 + loi 1 + loc 9 + ble *1 +OK +#endif +TEST 013: test loi 2*WS +MAIN 20 + loc 77 + stl -12 + loc 119 + stl -12+WS + lal -12 + loi 2*WS + loc 77 + bne *1 + loc 119 + bne *1 +OK +TEST 014: test loi 4*WS +MAIN 20 + loc 150 + stl -16+(3*WS) + loc 152 + stl -16+(2*WS) + loc 154 + stl -16+WS + loc 156 + stl -16 + lal -16 + loi 4*WS + loc 156 + bne *1 + loc 154 + bne *1 + loc 152 + bne *1 + loc 150 + bne *1 +OK +TEST 015: test los WS with 1 +MAIN 20 + loc 7 + lal -10 + sti 1 + lal -10 + loc 1 + los WS + loc 7 + bne *1 +OK +TEST 016: test los WS with 2 +MAIN 20 +#if WS == 1 + loc 77 + loc 78 + stl -12 + stl -11 + lal -12 + loc 2 + los WS + loc 78 + bne *1 + loc 77 + bne *1 +#endif +#if WS > 1 + loc 777 + lal -12 + sti 2 + lal -12 + loc 2 + los WS + loc 777 + bne *1 +#endif +OK +TEST 017: test los WS with 4 +MAIN 20 +#if WS == 1 + loc 14 + loc 15 + loc 16 + loc 17 + lal -12 + sti 4 + lal -12 + loc 4 + los WS + loc 17 + bne *1 + loc 16 + bne *1 + loc 15 + bne *1 + loc 14 + bne *1 +#endif +#if WS == 2 + loc 3001 + loc 3002 + stl -12 + stl -10 + lal -12 + loc 4 + los WS + loc 3002 + bne *1 + loc 3001 + bne *1 +#endif +#if WS > 2 + loc 123001 + stl -12 + lal -12 + loc 4 + los WS + loc 123001 + bne *1 +#endif +OK +TEST 018: test ldl +MAIN 20 + loc 77 + stl -12 + loc 123 + stl -12+WS + ldl -12 + loc 77 + bne *1 + loc 123 + bne *1 +OK +TEST 019: test lde +HOL 20 +MAIN 0 + loc 70 + ste 12 + loc 71 + ste 12+WS + lde 12 + loc 70 + bne *1 + loc 71 + bne *1 +OK +TEST 020: test ldf +MAIN 20 + loc 123 + loc 77 + sdl -8 + lal -13 + ldf 5 + loc 77 + bne *1 + loc 123 + bne *1 +OK +TEST 021: test ldf +MAIN 20 + loc 75 + stl -12 + loc 77 + stl -12+WS + lal -20 + ldf 8 + loc 75 + bne *1 + loc 77 + bne *1 +OK +TEST 022: test sdl +MAIN 20 + loc 30 + loc 31 + sdl -12 + lol -12+WS + loc 30 + bne *1 + lol -12 + loc 31 + bne *1 +OK +TEST 023: test sde +HOL 24 +MAIN 0 + loc 40 + loc 41 + sde 16 + loe 16 + loc 41 + bne *1 + loe 16+WS + loc 40 + bne *1 +OK +TEST 024: test sdf +HOL 24 +MAIN 0 + loc 51 + loc 50 + lae 6 + sdf 10 + loe 16 + loc 50 + bne *1 + loc 51 + loe 16+WS + bne *1 +OK +TEST 025: test sti 1 +MAIN 20 + loc 4136 + lal -11 + sti 1 + loc 1034 + lal -10 + sti 1 + lal -10 + loi 1 + loc 10 + bne *1 + lal -11 + loi 1 + loc 40 + bne *1 +OK +TEST 026: test sti 1 and lol +MAIN 20 + loc 257 + stl -12+WS + loc 514 + stl -12-WS + loc 50 + lal -12 + sti 1 +#if WS > 1 + loc -50 + lal -11 + sti 1 +#endif +#if WS > 2 + loc 49 + lal -10 + sti 1 + loc -49 + lal -9 + sti 1 +#endif + loc 257 + lol -12+WS + bne *1 + loc 514 + lol -12-WS + bne *1 +OK +TEST 027: test sti 2 +MAIN 20 +#if WS == 1 + ldc 90 +#else + loc 90 +#endif + lal -12 + sti 2 +#if WS != 2 + lal -12 + loi 2 +#else + lol -12 +#endif +#if WS == 1 + ldc 90 +#else + loc 90 +#endif + bne *1 +OK +TEST 028: test sti 4 +MAIN 20 +#if WS == 1 + loc 100 + loc 101 +#endif +#if WS <= 2 + loc 102 +#endif + loc 103 + lal -16 + sti 4 + lol -16 + loc 103 + bne *1 +OK +TEST 029: test sts WS with 1 +MAIN 20 + loc 57 + lal -13 + sti 1 + loc 123 + lal -11 + sti 1 + loc 20 + lal -12 + loc 1 + sts WS + lal -12 + loi 1 + loc 20 + bne *1 + lal -11 + loi 1 + loc 123 + bne *1 + lal -13 + loi 1 + loc 57 + bne *1 +OK +TEST 030: test sts WS with WS +MAIN 20 + loc 210 + lal -12 + loc WS + sts WS + lol -12 + loc 210 + bne *1 +OK +TEST 031: test sts WS with 4 +MAIN 20 +#if WS == 1 + loc 100 + loc 101 +#endif +#if WS <= 2 + loc 102 +#endif + loc 103 + lal -16 + loc 4 + sts WS + lol -16 + loc 103 + bne *1 +OK +TEST 032: test adi WS +MAIN 0 + loc 1 + loc 1 + adi WS + loc 2 + bne *1 +OK + loc 5 + loc -6 + adi WS + loc -1 + bne *1 +OK +#if WS >= 2 + loc -1007 + loc +999 + adi WS + loc -8 + bne *1 +OK + loc -1300 + loc +1405 + adi WS + loc 105 + bne *1 +OK + loc -30000 + loc -20 + adi WS + loc -30020 + bne *1 +OK +#endif +#if WS >= 4 + loc -100007 + loc +99999 + adi WS + loc -8 + bne *1 +OK + loc -1300 + loc +140567 + adi WS + loc 139267 + bne *1 +OK + loc -30000000 + loc -20 + adi WS + loc -30000020 + bne *1 +OK +#endif +TEST 033: test sbi WS +MAIN 0 + loc 1 + loc 1 + sbi WS + loc 0 + bne *1 +OK + loc 5 + loc -6 + sbi WS + loc +11 + bne *1 +OK +#if WS >= 2 + loc -1007 + loc +999 + sbi WS + loc -2006 + bne *1 +OK + loc -1300 + loc +1405 + sbi WS + loc -2705 + bne *1 +OK + loc -30000 + loc -20 + sbi WS + loc -29980 + bne *1 +OK +#endif +#if WS >= 4 + loc -100007 + loc +99999 + sbi WS + loc -200006 + bne *1 +OK + loc -1300 + loc +140567 + sbi WS + loc -141867 + bne *1 +OK + loc -30000000 + loc -20 + sbi WS + loc -29999980 + bne *1 +OK +#endif +TEST 034: test mli WS +MAIN 0 + loc 5 + loc 9 + mli WS + loc 45 + bne *1 +OK + loc 0 + loc 10 + mli WS + loc 0 + bne *1 +OK +#if WS > 1 + loc -99 + loc 7 + mli WS + loc -693 + bne *1 +OK + loc -100 + loc -90 + mli WS + loc 9000 + bne *1 +OK +#endif +#if WS > 2 + loc 123456 + loc 200 + mli WS + loc 24691200 + bne *1 +OK +#endif +TEST 035: test dvi WS +MAIN 0 + loc 15 + loc 5 + dvi WS + loc 3 + bne *1 +OK + loc 100 + loc -7 + dvi WS + loc -14 + bne *1 +OK + loc -100 + loc 7 + dvi WS + loc -14 + bne *1 +OK +#if WS >= 2 + loc -1000 + loc -201 + dvi WS + loc 4 + bne *1 +OK +#endif +#if WS >= 4 + loc 1234567 + loc -100 + dvi WS + loc -12345 + bne *1 +OK +#endif +TEST 036: test rmi WS +MAIN 0 + loc 100 + loc 7 + rmi WS + loc 2 + bne *1 +OK + loc -100 + loc 7 + rmi WS + loc -2 + bne *1 +OK + loc 100 + loc -7 + rmi WS + loc 2 + bne *1 +OK + loc -100 + loc -7 + rmi WS + loc -2 + bne *1 +OK +#if WS >= 2 + loc -1000 + loc -201 + rmi WS + loc -196 + bne *1 +OK +#endif +#if WS >= 4 + loc 1234567 + loc -100 + rmi WS + loc 67 + bne *1 +OK +#endif +TEST 037: test ngi WS +MAIN 0 + loc 100 + ngi WS + loc -100 + bne *1 +OK + loc -100 + ngi WS + loc 100 + bne *1 +OK +TEST 038: test sli,slu WS +MAIN 0 + loc 5 + loc 3 + sli WS + loc 40 + bne *1 +OK + loc -2 + loc 4 + sli WS + loc -32 + bne *1 +OK + loc 5 + loc 3 + slu WS + loc 40 + bne *1 +OK + loc -2 + loc 4 + slu WS + loc -32 + bne *1 +OK +TEST 039: test sri,sru WS +MAIN 0 + loc 64 + loc 5 + sri WS + loc 2 + bne *1 +OK + loc -3 + loc 2 + sri WS + loc -1 + bne *1 +OK + loc -16 + loc 3 + sri WS + loc -2 + bne *1 +OK + loc 64 + loc 5 + sru WS + loc 2 + bne *1 +OK + loc -3 + loc 2 + sru WS +#if WS == 1 + loc 63 +#endif +#if WS == 2 + loc 16383 +#endif +#if WS == 4 + loc 1073741823 +#endif + bne *1 +OK +TEST 040: test rol WS +MAIN 0 + loc 1 + loc 3 + rol WS + loc 8 + bne *1 +OK +#if WS > 1 + loc 16384 + loc WS * 8 - 1 + rol WS + loc 8192 + bne *1 +OK + loc -2 + loc WS * 8 - 1 + rol WS +#if WS == 2 + loc 32767 +#endif +#if WS == 4 + loc 2147483647 +#endif + bne *1 +OK + loc -28671 + loc 0 + rol WS + loc -28671 + bne *1 +OK +#endif +TEST 041: test ror WS +MAIN 0 + loc 4 + loc 5 + ror WS +#if WS == 1 + loc 32 +#endif +#if WS == 2 + loc 8192 +#endif +#if WS == 4 + loc 536870912 +#endif + bne *1 +OK +#if WS == 2 + loc 32767 + loc 15 + ror WS + loc -2 + bne *1 +OK +#endif + loc -28 + loc 0 + ror WS + loc -28 + bne *1 +OK +TEST 042: test inc +MAIN 0 + loc 20 + inc + loc 21 + bne *1 +OK + loc -50 + inc + loc -49 + bne *1 +OK +TEST 043: test dec +MAIN 0 + loc 66 + dec + loc 65 + bne *1 +OK + loc -44 + dec + loc -45 + bne *1 +OK +TEST 044: test adp +MAIN 20 + lal -10 + adp -2 + lal -12 + cmp + zne *1 +OK + loc 519 + lal -11 + sti 1 + lal -12 + adp 1 + loi 1 + loc 7 + bne *1 +OK +TEST 045: test inn WS +MAIN 0 + loc 8 + loc 3 + inn WS + loc 1 + bne *1 +OK + loc 8 + loc 4 + inn WS + loc 0 + bne *1 +OK + loc 2 + loc -5 + inn WS + loc 0 + bne *1 +OK + loc -5 + loc 3 + inn WS + loc 1 + bne *1 +OK +TEST 046: test inn ? +MAIN 0 + loc 8 + loc 3 + loc WS + inn ? + loc 1 + bne *1 +OK + loc 8 + loc 4 + loc WS + inn ? + loc 0 + bne *1 +OK + loc 2 + loc -5 + loc WS + inn ? + loc 0 + bne *1 +OK + loc -5 + loc 3 + loc WS + inn ? + loc 1 + bne *1 +OK +TEST 047: test set +MAIN 0 + loc 3 + set WS + loc 8 + bne *1 +OK + loc 6 + set 2*WS + loc 64 + bne *1 + loc 0 + bne *1 +OK + loc 0 + set 4*WS + loc 1 + bne *1 + loc 0 + bne *1 + loc 0 + bne *1 + loc 0 + bne *1 +OK +TEST 048: test set ? +MAIN 0 + loc 3 + loc WS + set ? + loc 8 + bne *1 +OK + loc 6 + loc 2*WS + set ? + loc 64 + bne *1 + loc 0 + bne *1 +OK + loc 0 + loc 4*WS + set ? + loc 1 + bne *1 + loc 0 + bne *1 + loc 0 + bne *1 + loc 0 + bne *1 +OK +TEST 049: test aar +a049 + rom 5,2,4 +b049 + con 5,2,1 +MAIN 20 + lal -12 + loc 5 + lae a049 + aar WS + lal -12 + cmp + zne *1 +OK + lal -11 + loc 7 + lae b049 + aar WS + lal -9 + cmp + zne *1 +OK +TEST 050: test aar ? +a050 + rom 5,2,4 +b050 + con 5,2,1 +MAIN 20 + lal -12 + loc 5 + lae a050 + loc WS + aar ? + lal -12 + cmp + zne *1 +OK + lal -11 + loc 7 + lae b050 + loc WS + aar ? + lal -9 + cmp + zne *1 +OK +TEST 051: test lar +a051 + rom 5,2,2*WS +b051 + con 5,2,1 +MAIN 20 + loc 40 + loc 41 + sdl -16+(2*WS) + lal -16 + loc 6 + lae a051 + lar WS + loc 41 + bne *1 + loc 40 + bne *1 +OK + loc 42 + lal -13 + sti 1 + lal -15 + loc 7 + lae b051 + lar WS + lal -13 + loi 1 + bne *1 +OK +TEST 052: test lar ? +a052 + rom 5,2,2*WS +b052 + con 5,2,1 +MAIN 20 + loc 40 + loc 41 + sdl -16+(2*WS) + lal -16 + loc 6 + lae a052 + loc WS + lar ? + loc 41 + bne *1 + loc 40 + bne *1 +OK + loc 42 + lal -13 + sti 1 + lal -15 + loc 7 + lae b052 + loc WS + lar ? + lal -13 + loi 1 + bne *1 +OK +TEST 053: test sar +a053 + rom 5,2,3*WS +b053 + con 5,2,1 +MAIN 28 : assert WS <= 8 + loc 39 + loc 40 + loc 41 + lal -28 + loc 6 + lae a053 + sar WS + lal -28+(3*WS) + loi 3*WS + loc 41 + bne *1 + loc 40 + bne *1 + loc 39 + bne *1 +OK + loc -2 + lal -11 + loc 7 + lae b053 + sar WS + lal -9 + loi 1 + loc 254 + bne *1 +OK +TEST 054: test sar ? +a054 + rom 5,2,3*WS +b054 + con 5,2,1 +MAIN 28 + loc 39 + loc 40 + loc 41 + lal -28 + loc 6 + lae a054 + loc WS + sar ? + lal -28+(3*WS) + loi 3*WS + loc 41 + bne *1 + loc 40 + bne *1 + loc 39 + bne *1 +OK + loc -2 + lal -11 + loc 7 + lae b054 + loc WS + sar ? + lal -9 + loi 1 + loc 254 + bne *1 +OK +TEST 055: test tlt +MAIN 0 + loc 1 + tlt + loc 0 + bne *1 +OK + loc 0 + tlt + loc 0 + bne *1 +OK + loc -3 + tlt + loc 1 + bne *1 +OK +TEST 056: test tle +MAIN 0 + loc 8 + tle + loc 0 + bne *1 +OK + loc 0 + tle + loc 1 + bne *1 +OK + loc -19 + tle + loc 1 + bne *1 +OK +TEST 057: test teq +MAIN 0 + loc 17 + teq + loc 0 + bne *1 +OK + loc 0 + teq + loc 1 + bne *1 +OK + loc -100 + teq + loc 0 + bne *1 +OK +TEST 058: test tne +MAIN 0 + loc 76 + tne + loc 1 + bne *1 +OK + loc 0 + tne + loc 0 + bne *1 +OK + loc -99 + tne + loc 1 + bne *1 +OK +TEST 059: test tge +MAIN 0 + loc 14 + tge + loc 1 + bne *1 +OK + loc 0 + tge + loc 1 + bne *1 +OK + loc -76 + tge + loc 0 + bne *1 +OK +TEST 060: test tgt +MAIN 0 + loc 20 + tgt + loc 1 + bne *1 +OK + loc 0 + tgt + loc 0 + bne *1 +OK + loc -66 + tgt + loc 0 + bne *1 +OK +TEST 061: test cmi WS +MAIN 0 + loc 10 + loc 20 + cmi WS + tlt + loc 1 + bne *1 +OK + loc 20 + loc 10 + cmi WS + tgt + loc 1 + bne *1 +OK + loc 44 + loc 44 + cmi WS + loc 0 + bne *1 +OK +TEST 062: test cmp, adp, cmu +MAIN 20 + lal -5 + lal -2 + cmp + tlt + loc 1 + bne *1 +OK + lal -2 + lal -5 + cmp + tle + loc 0 + bne *1 +OK + lal -2 + lal -2 + cmp + loc 0 + bne *1 +OK + lal -5 + adp 3 + lal -2 + cmp + loc 0 + bne *1 +OK + lal -5 + adp 4 + lal -2 + cmp + tge + loc 1 + bne *1 +OK + loc 6 + loc 6 + cmu WS + loc 0 + bne *1 +OK + loc 17 + loc 27 + cmu WS + tlt + loc 1 + bne *1 +OK +#ifdef W2S + loc 17 + loc 6 + loc 27 + loc 6 + cmu W2S + tlt + loc 1 + bne *1 +OK + loc 6 + loc 27 + loc 6 + loc 17 + cmu W2S + tgt + loc 1 + bne *1 +OK +#endif +TEST 063: test cms, zer +MAIN 0 + loc 6 + loc -10 + loc -125 + loc 30 + loc 6 + loc -10 + loc -125 + loc 30 + loc 4*WS + cms + teq + loc 1 + bne *1 +OK + loc 6 + loc -10 + loc -125 + loc 30 + loc 6 + loc -10 + loc -125 + loc 30 + cms 4*WS + teq + loc 1 + bne *1 +OK + loc 6 + loc -10 + loc -125 + loc 30 + loc 6 + loc -10 + loc -126 + loc 30 + loc 4*WS + cms + tne + loc 1 + bne *1 +OK + loc 6 + loc -10 + loc -125 + loc 30 + loc 6 + loc -10 + loc -126 + loc 30 + cms 4*WS + tne + loc 1 + bne *1 +OK + loc 0 + loc 0 + loc 0 + loc 0 + zer 4*WS + cms 4*WS + teq + loc 1 + bne *1 +OK +TEST 064: test blt +MAIN 0 + loc 3 + loc 4 + blt *1064 + bra *1 +1064 + loc 4 + loc 3 + blt *1 + loc -5 + loc -4 + blt *2064 + bra *1 +2064 + loc -4 + loc -5 + blt *1 + loc 4 + loc 4 + blt *1 +OK +TEST 065: test ble +MAIN 0 + loc 3 + loc 4 + ble *1065 + bra *1 +1065 + loc 4 + loc 3 + ble *1 + loc -99 + loc -5 + ble *2065 + bra *1 +2065 + loc -99 + loc -99 + ble *3065 + bra *1 +3065 + loc 100 + loc -100 + ble *1 +OK +TEST 066: test beq +MAIN 0 + loc 3 + loc 3 + beq *1066 + bra *1 +1066 + loc 3 + loc 4 + beq *1 + loc -18 + loc -17 + beq *1 + loc 0 + loc 0 + beq *2066 + bra *1 +2066 +OK +TEST 067: test bne +MAIN 0 + loc 50 + loc 50 + bne *1 + loc 50 + loc 51 + bne *1067 + bra *1 +1067 + loc 0 + loc 0 + bne *1 +OK +TEST 068: test bge +MAIN 0 + loc 100 + loc 101 + bge *1 + loc 101 + loc 100 + bge *1068 + bra *1 +1068 + loc -100 + loc -99 + bge *1 + loc -100 + loc 100 + bge *1 + loc 0 + loc 0 + bge *2068 + bra *1 +2068 + loc 50 + loc 50 + bge *3068 + bra *1 +3068 +OK +TEST 069: test bgt +MAIN 0 + loc 3 + loc 10 + bgt *1 + loc 10 + loc 3 + bgt *1069 + bra *1 +1069 + loc -100 + loc -50 + bgt *1 + loc -100 + loc 50 + bgt *1 + loc 5 + loc 5 + bgt *1 +OK +TEST 070: test zlt +MAIN 0 + loc 4 + zlt *1 + loc -4 + zlt *1070 + bra *1 +1070 + loc 0 + zlt *1 +OK +TEST 071: test zle +MAIN 0 + loc 4 + zle *1 + loc -4 + zlt *1071 + bra *1 +1071 + loc 0 + zle *2071 + bra *1 +2071 +OK +TEST 072: test zeq +MAIN 0 + loc 4 + zeq *1 + loc -4 + zeq *1 + loc 0 + zeq *1072 + bra *1 +1072 +OK +TEST 073: test zne +MAIN 0 + loc 4 + zne *1073 + bra *1 +1073 + loc -4 + zne *2073 + bra *1 +2073 + loc 0 + zne *1 +OK +TEST 074: test zge +MAIN 0 + loc 4 + zge *1074 + bra *1 +1074 + loc -4 + zge *1 + loc 0 + zge *2074 + bra *1 +2074 +OK +TEST 075: test zgt +MAIN 0 + loc 4 + zgt *1075 + bra *1 +1075 + loc -4 + zgt *1 + loc 0 + zgt *1 +OK +TEST 076: test asp +MAIN 12 + loc 104 + loc 4 + loc 5 + loc 6 + asp 2*WS + stl -4 + loc 104 + bne *1 +OK +TEST 077: test cal +HOL 8 +MAIN 0 + cal $p077 + loe 4 + loc 34 + bne *1 +OK +PROC + pro $p077,0 + loc 34 + ste 4 + ret 0 + end +TEST 078: test cai +HOL 8 +MAIN 0 + lpi $p078 + cai + loe 4 + loc 34 + bne *1 +OK +PROC + pro $p078,0 + loc 34 + ste 4 + ret 0 + end +TEST 079: test ads WS +MAIN 20 + lal -6 + loc -2 + ads WS + lal -8 + cmp + zne *1 +OK + lal -6 + loc 2 + ads WS + lal -4 + cmp + zne *1 +OK +; test adp + lal -5 + adp 1 + lal -4 + cmp + zne *1 +OK +TEST 080: test sbs +HOL 12 +MAIN 20 + lal -4 + lal -6 + sbs WS + loc 2 + bne *1 +OK + lae 7 + lae 10 + sbs WS + loc -3 + bne *1 +OK +TEST 081: test lor +#define LB 0 +#define SP 1 +MAIN 20 + lor LB + lxl 0 + cmp + zne *1 +OK + loc 31 + lor SP + loi WS + bne *1 +OK + lor SP + lal -12 + sti PS + asp -4 + lor SP + lal -8 + sti PS + asp 4 + lal -12 + loi PS + lal -8 + loi PS + sbs WS + loc 4 + bne *1 +OK +TEST 082: test ass WS +#define SP 1 +MAIN 20 + loc 104 + loc 50 + loc 60 + loc 2*WS + ass WS + loc 104 + bne *1 +OK + lor SP + lal -8 + sti PS + loc -24 + ass WS + lor SP + lal -4 + sti PS + lal -8 + loi PS + lal -4 + loi PS + sbs WS + loc 24 + bne *1 + loc 24 + ass WS +OK + lor SP + lal -8 + loi PS + cmp + zne *1 +OK +TEST 083: test blm +HOL 28 +MAIN 32 + loc 61 + ste 12 + loc 0 + stl -4 + lae 12 + lal -4 + blm WS + lol -4 + loc 61 + bne *1 +OK + loc 44 + loc 43 + loc 42 + loc 41 + lal -20 + sti 4*WS + lal -20 + lae 8 + blm 4*WS + loe 8 + loc 41 + bne *1 + loe 8+WS + loc 42 + bne *1 + loe 8+(2*WS) + loc 43 + bne *1 + loe 8+(3*WS) + loc 44 + bne *1 +OK +TEST 084: test bls WS +HOL 28 +MAIN 32 + loc 20 + stl -8 + lal -8 + lae 20 + loc WS + bls WS + loe 20 + loc 20 + bne *1 +OK + loc 55 + stl -8+WS + loc 56 + stl -8 + lal -8 + lae 20 + loc 2*WS + bls WS + loe 20 + loc 56 + bne *1 + loe 20+WS + loc 55 + bne *1 +OK +TEST 085: test dup +MAIN 0 + loc 199 + dup WS + bne *1 +OK + loc 130 + loc 150 + dup 2*WS + loc 150 + bne *1 + loc 130 + bne *1 + loc 150 + bne *1 + loc 130 + bne *1 +OK +; test dus WS + loc 400 + loc 399 + loc 2*WS + dus WS + loc 399 + bne *1 + loc 400 + bne *1 + loc 399 + bne *1 + loc 400 + bne *1 +OK +TEST 087: test rck +a087 + con 10,14 +MAIN 12 + loc 10 + lae a087 + rck WS + inc + lae a087 + rck WS + inc + lae a087 + rck WS + inc + lae a087 + rck WS + inc + lae a087 + rck WS + stl -4 +OK +TEST 088: test csa +MAIN 20 +a088 + con *1088,4,2,*3088,*2088,*4088 + zrl -8 + loc 4 +5088 + inc + dup WS + lae a088 + csa WS + bra *1 +3088 + bra *1 +2088 + lol -8 + loc 3 + adi WS + stl -8 + bra *5088 +1088 + inl -8 + bra *6088 +4088 + lol -8 + loc 2 + adi WS + stl -8 + bra *5088 +6088 + loc 7 + bne *1 +OK + lol -8 + loc 6 + bne *1 +OK +TEST 089: test csb +MAIN 20 +b089 + rom *1089,3,-1,*3089,0,*2089,1,*4089 + loc -1 + zrl -12 +5089 + inc + dup WS + lae b089 + csb WS + bra *1 +3089 + bra *1 +2089 + lol -12 + loc 3 + adi WS + stl -12 + bra *5089 +1089 + inl -12 + bra *6089 +4089 + lol -12 + loc 2 + adi WS + stl -12 + bra *5089 +6089 + loc 2 + bne *1 +OK + lol -12 + loc 6 + bne *1 +OK +TEST 090: now test loi again, because it is so tricky +MAIN 20 + loc 256 + lal -3 + sti 1 + loc 1 + lal -4 + sti 1 + loc 517 + lal -5 + sti 1 + loc 2 + lal -6 + sti 1 + loc 1030 + lal -8 + sti 2 + loc 4 + lal -9 + sti 1 + lal -3 + loi 1 + loc 0 + bne *1 +OK + lal -3 + adp -1 + loi 1 + loc 1 + bne *1 +OK + lal -3 + adp -2 + loi 1 + loc 5 + bne *1 +OK + lal -3 + adp -3 + loi 1 + loc 2 + bne *1 +OK + lal -4 + adp -4 + loi 2 + loc 1030 + bne *1 +OK + lal -6 + adp -3 + loi 1 + loc 4 + bne *1 +OK +TEST 091: now test sti 1 again +MAIN 12 + loc 3 + loc 2 + loc 1 + loc 7 + loc 8 + loc 10 + lal -4 + sti 1 + lal -4 + adp -1 + sti 1 + lal -4 + adp -2 + sti 1 + lal -4 + adp -3 + sti 1 + lal -4 + adp -4 + sti 1 + lal -4 + adp -5 + sti 1 + lal -4 + loi 1 + loc 10 + bne *1 +OK + lal -6 + loi 1 + loc 7 + bne *1 +OK + lal -8 + loi 1 + loc 2 + bne *1 +OK +TEST 092: test ldc and cii +MAIN 0 +#ifdef W2S + ldc 0 + bne *1 +OK + ldc 1 + loc 1 + loc WS + loc W2S + cii + cmi W2S + zne *1 +OK + ldc -1 + loc -1 + loc WS + loc W2S + cii + cmi W2S + zne *1 +OK + loc -1 + ldc -1 + loc W2S + loc WS + cii + cmi WS + zne *1 +OK +#if WS == 1 + ldc -127 + loc -127 +#endif +#if WS == 2 + ldc -32767 + loc -32767 +#endif +#if WS == 4 + ldc -2147483647 + loc -2147483647 +#endif + loc WS + loc W2S + cii + cmi W2S + zne *1 +OK + ldc -1 + loc -1 + bne *1 +OK + loc -1 + bne *1 +OK + ldc 1 + zeq *1092 +; low order part on top of stack + loc 0 + bne *1 +OK + bra *2092 +; high order part on top of stack +1092 + loc 1 + bne *1 +OK +2092 +#endif +TEST 093: test cii +MAIN 0 + loc 123 + dup WS + loc WS + dup WS + cii + bne *1 +OK +#ifdef W2S + loc 0 + loc WS + loc W2S + cii + loc 0 + bne *1 + loc 0 + bne *1 +OK +; dynamically determine format of doubles + loc 1 + loc WS + loc W2S + cii + zeq *1093 +; low order part on top of stack + loc 0 + bne *1 +OK + loc -3 + loc WS + loc W2S + cii + loc -3 + bne *1 + loc -1 + bne *1 +OK + bra *2093 +; high order part on top of stack +1093 + loc 1 + bne *1 +OK + loc -3 + loc WS + loc W2S + cii + loc -1 + bne *1 + loc -3 + bne *1 +OK +2093 +#endif +TEST 094: test adi W2S +MAIN 0 +#ifdef W2S + ldc 1 + ldc 1 + adi W2S + ldc 2 + cmi W2S + zne *1 +OK + ldc 5 + ldc -6 + adi W2S + ldc -1 + cmi W2S + zne *1 +OK + ldc -1007 + ldc +999 + adi W2S + ldc -8 + cmi W2S + zne *1 +OK + ldc -1300 + ldc +1405 + adi W2S + ldc 105 + cmi W2S + zne *1 +OK + ldc -30000 + ldc -20 + adi W2S + ldc -30020 + cmi W2S + zne *1 +OK +#if WS >= 2 + ldc -100007 + ldc +99999 + adi W2S + ldc -8 + cmi W2S + zne *1 +OK + ldc -1300 + ldc +140567 + adi W2S + ldc 139267 + cmi W2S + zne *1 +OK + ldc -30000000 + ldc -20 + adi W2S + ldc -30000020 + cmi W2S + zne *1 +OK +#endif +#endif +TEST 095: test sbi W2S +MAIN 0 +#ifdef W2S + ldc 1 + ldc 1 + sbi W2S + ldc 0 + cmi W2S + zne *1 +OK + ldc 5 + ldc -6 + sbi W2S + ldc +11 + cmi W2S + zne *1 +OK + ldc -1007 + ldc +999 + sbi W2S + ldc -2006 + cmi W2S + zne *1 +OK + ldc -1300 + ldc +1405 + sbi W2S + ldc -2705 + cmi W2S + zne *1 +OK + ldc -30000 + ldc -20 + sbi W2S + ldc -29980 + cmi W2S + zne *1 +OK +#if WS >= 2 + ldc -100007 + ldc +99999 + sbi W2S + ldc -200006 + cmi W2S + zne *1 +OK + ldc -1300 + ldc +140567 + sbi W2S + ldc -141867 + cmi W2S + zne *1 +OK + ldc -30000000 + ldc -20 + sbi W2S + ldc -29999980 + cmi W2S + zne *1 +OK +#endif +#endif +TEST 096: test mli W2S +MAIN 0 +#ifdef W2S + ldc 5 + ldc 9 + mli W2S + ldc 45 + cmi W2S + zne *1 +OK + ldc 0 + ldc 10 + mli W2S + ldc 0 + cmi W2S + zne *1 +OK + ldc -99 + ldc 7 + mli W2S + ldc -693 + cmi W2S + zne *1 +OK + ldc -100 + ldc -90 + mli W2S + ldc 9000 + cmi W2S + zne *1 +OK +#if WS >= 2 + ldc 123456 + ldc 200 + mli W2S + ldc 24691200 + cmi W2S + zne *1 +OK +#endif +#endif +TEST 097: test dvi W2S +MAIN 0 +#ifdef W2S + ldc 15 + ldc 5 + dvi W2S + ldc 3 + cmi W2S + zne *1 +OK + ldc 100 + ldc -7 + dvi W2S + ldc -14 + cmi W2S + zne *1 +OK + ldc -100 + ldc 7 + dvi W2S + ldc -14 + cmi W2S + zne *1 +OK + ldc -1000 + ldc -201 + dvi W2S + ldc 4 + cmi W2S + zne *1 +OK +#if WS >= 2 + ldc 1234567 + ldc -100 + dvi W2S + ldc -12345 + cmi W2S + zne *1 +OK +#endif +#endif +TEST 098: test rmi W2S +MAIN 0 +#ifdef W2S + ldc 100 + ldc 7 + rmi W2S + ldc 2 + cmi W2S + zne *1 +OK + ldc -100 + ldc 7 + rmi W2S + ldc -2 + cmi W2S + zne *1 +OK + ldc 100 + ldc -7 + rmi W2S + ldc 2 + cmi W2S + zne *1 +OK + ldc -100 + ldc -7 + rmi W2S + ldc -2 + cmi W2S + zne *1 +OK + ldc -1000 + ldc -201 + rmi W2S + ldc -196 + cmi W2S + zne *1 +OK +#if WS >= 4 + ldc 1234567 + ldc -100 + rmi W2S + ldc 67 + cmi W2S + zne *1 +OK +#endif +#endif +TEST 099: test and +MAIN 0 + loc 68 + loc 65 + and WS + loc 64 + bne *1 +OK + loc 17 + loc 34 + loc 3 + loc 36 + and 2*WS + loc 32 + bne *1 + loc 1 + bne *1 +OK + loc 17 + loc 34 + loc 68 + loc -120 + loc 1 + loc 37 + loc 12 + loc -127 + and 4*WS + loc -128 + bne *1 + loc 4 + bne *1 + loc 32 + bne *1 + loc 1 + bne *1 +OK +TEST 100: test ior +MAIN 0 + loc 68 + loc 65 + ior WS + loc 69 + bne *1 +OK + loc 17 + loc 34 + loc 3 + loc 36 + ior 2*WS + loc 38 + bne *1 + loc 19 + bne *1 +OK + loc 17 + loc 34 + loc 68 + loc -120 + loc 1 + loc 37 + loc 12 + loc -127 + ior 4*WS + loc -119 + bne *1 + loc 76 + bne *1 + loc 39 + bne *1 + loc 17 + bne *1 +OK +TEST 101: test xor +MAIN 0 + loc 68 + loc 65 + xor WS + loc 5 + bne *1 +OK + loc 17 + loc 34 + loc 3 + loc 36 + xor 2*WS + loc 6 + bne *1 + loc 18 + bne *1 +OK + loc 17 + loc 34 + loc 68 + loc -120 + loc 1 + loc 37 + loc 12 + loc -127 + xor 4*WS + loc 9 + bne *1 + loc 72 + bne *1 + loc 7 + bne *1 + loc 16 + bne *1 +OK +TEST 102: test com +MAIN 0 + loc 68 + com WS + loc -69 + bne *1 +OK + loc 17 + loc 34 + com 2*WS + loc -35 + bne *1 + loc -18 + bne *1 +OK + loc 17 + loc 34 + loc 68 + loc -120 + com 4*WS + loc 119 + bne *1 + loc -69 + bne *1 + loc -35 + bne *1 + loc -18 + bne *1 +OK +TEST 103: test sli,slu W2S +MAIN 0 +#ifdef W2S + ldc 5 + loc 3 + sli W2S + ldc 40 + cmi W2S + zne *1 +OK + ldc -2 + loc 4 + sli W2S + ldc -32 + cmi W2S + zne *1 +OK + ldc 5 + loc 3 + slu W2S + ldc 40 + cmi W2S + zne *1 +OK + ldc -2 + loc 4 + slu W2S + ldc -32 + cmi W2S + zne *1 +OK +#endif +TEST 104: test sri,sru W2S +MAIN 0 +#ifdef W2S + ldc 64 + loc 5 + sri W2S + ldc 2 + cmi W2S + zne *1 +OK + ldc -3 + loc 2 + sri W2S + ldc -1 + cmi W2S + zne *1 +OK + ldc -16 + loc 3 + sri W2S + ldc -2 + cmi W2S + zne *1 +OK + ldc 64 + loc 5 + sru W2S + ldc 2 + cmi W2S + zne *1 +OK + ldc -3 + loc 2 + sru W2S +#if W2S == 2 + ldc 16383 +#endif +#if W2S == 4 + ldc 1073741823 +#endif + cmi W2S + zne *1 +OK +#endif +TEST 105: common test of double arithmetic +MAIN 0 +#ifdef W2S + ldc 1000 + ldc 10 + mli W2S + ldc 4 + dvi W2S + ldc 1500 + adi W2S + ldc 2856 + sbi W2S + ldc 100 + rmi W2S + ldc 44 + cmi W2S + zne *1 +OK +#endif +TEST 106: test cmi W2S +MAIN 0 +#if W2S==2 + ldc 64 + ldc 4 + mli W2S + ldc 63 + ldc 4 + mli W2S + cmi W2S + tge + loc 1 + bne *1 +OK +#endif +#if W2S==4 + ldc 16384 + ldc 4 + mli W2S + ldc 16383 + ldc 4 + mli W2S + cmi W2S + tge + loc 1 + bne *1 +OK +#endif +TEST 107: test cii W2S -> WS +MAIN 0 +#ifdef W2S + ldc 100 + loc W2S + loc WS + cii + loc 100 + bne *1 +OK + ldc 5000 + ldc -6 + mli W2S + ldc 1000 + dvi W2S + loc W2S + loc WS + cii + loc -30 + bne *1 +OK +#endif +TEST 108: test cif, cfi, adf FS +MAIN 0 +#ifdef FS + loc 100 + loc WS + loc FS + cif + loc 44 + loc WS + loc FS + cif + adf FS + loc FS + loc WS + cfi + loc 144 + bne *1 +OK + loc 65 + loc WS + loc FS + cif + loc -65 + loc WS + loc FS + cif + adf FS + loc FS + loc WS + cfi + loc 0 + bne *1 +OK +#endif +TEST 109: test cdf, cfd +MAIN 0 +#ifdef FS +#ifdef W2S + loc 55 + loc WS + loc W2S + cii + loc W2S + loc FS + cif + loc 55 + loc WS + loc FS + cif + cmf FS + zne *1 +OK + loc 24 + loc WS + loc FS + cif + loc FS + loc W2S + cfi + loc W2S + loc WS + cii + loc 24 + bne *1 +OK + loc 57 + loc WS + loc FS + cif + loc FS + loc W2S + cfi + loc W2S + loc WS + cii + loc 57 + bne *1 +OK + loc 40 + loc WS + loc W2S + cii + loc W2S + loc FS + cif + loc FS + loc WS + cfi + loc 40 + bne *1 +OK +#endif +#endif +TEST 110: test sbf FS +MAIN 0 +#ifdef FS + loc 100 + loc WS + loc FS + cif + loc 50 + loc WS + loc FS + cif + sbf FS + loc FS + loc WS + cfi + loc 50 + bne *1 +OK + loc 32 + loc WS + loc FS + cif + loc 101 + loc WS + loc FS + cif + sbf FS + loc -69 + loc WS + loc FS + cif + cmf FS + zne *1 +OK +#endif +TEST 111: test mlf FS +MAIN 0 +#ifdef FS + loc 4 + loc WS + loc FS + cif + loc 20 + loc WS + loc FS + cif + mlf FS + loc FS + loc WS + cfi + loc 80 + bne *1 +OK + loc -12 + loc WS + loc FS + cif + loc -9 + loc WS + loc FS + cif + mlf FS + loc FS + loc WS + cfi + loc 108 + bne *1 +OK +#endif +TEST 112: test dvf FS +MAIN 0 +#ifdef FS + loc 45 + loc WS + loc FS + cif + loc 9 + loc WS + loc FS + cif + dvf FS + loc 5 + loc WS + loc FS + cif + cmf FS + zne *1 +OK + loc -60 + loc WS + loc FS + cif + loc 7 + loc WS + loc FS + cif + dvf FS + loc FS + loc WS + cfi + loc -8 + bne *1 +OK +#endif +TEST 113: test fractions using FS floating arithmetic +MAIN 0 +#ifdef FS + loc 7 + loc WS + loc FS + cif + loc 2 + loc WS + loc FS + cif + dvf FS + loc FS + loc WS + cfi + loc 3 + bne *1 +OK + loc -7 + loc WS + loc FS + cif + loc 2 + loc WS + loc FS + cif + dvf FS + loc FS + loc WS + cfi + loc -3 + bne *1 +OK + loc 11 + loc WS + loc FS + cif + loc 2 + loc WS + loc FS + cif + dvf FS + loc 5 + loc WS + loc FS + cif + mlf FS + loc 4 + loc WS + loc FS + cif + mlf FS + loc 110 + loc WS + loc FS + cif + cmf FS + zne *1 +OK +#endif +TEST 114: test cif, cfi, adf F2S +MAIN 0 +#ifdef F2S + loc 10 + loc WS + loc F2S + cif + loc 44 + loc WS + loc F2S + cif + adf F2S + loc F2S + loc WS + cfi + loc 54 + bne *1 +OK + loc 65 + loc WS + loc F2S + cif + loc -65 + loc WS + loc F2S + cif + adf F2S + loc F2S + loc WS + cfi + loc 0 + bne *1 +OK +#endif +TEST 115: test cif, cfi W2S F2S +MAIN 0 +#ifdef F2S +#ifdef W2S + loc 55 + loc WS + loc W2S + cii + loc W2S + loc F2S + cif + loc 55 + loc WS + loc F2S + cif + cmf F2S + zne *1 +OK + loc 24 + loc WS + loc F2S + cif + loc F2S + loc W2S + cfi + loc W2S + loc WS + cii + loc 24 + bne *1 +OK + loc 57 + loc WS + loc F2S + cif + loc F2S + loc W2S + cfi + loc W2S + loc WS + cii + loc 57 + bne *1 +OK + loc 41 + loc WS + loc W2S + cii + loc W2S + loc F2S + cif + loc F2S + loc WS + cfi + loc 41 + bne *1 +OK +#endif +#endif +TEST 116: test sbf F2S +MAIN 0 +#ifdef F2S + loc 100 + loc WS + loc F2S + cif + loc 50 + loc WS + loc F2S + cif + sbf F2S + loc F2S + loc WS + cfi + loc 50 + bne *1 +OK + loc 32 + loc WS + loc F2S + cif + loc 101 + loc WS + loc F2S + cif + sbf F2S + loc -69 + loc WS + loc F2S + cif + cmf F2S + zne *1 +OK +#endif +TEST 117: test fmu F2S +MAIN 0 +#ifdef F2S + loc 4 + loc WS + loc F2S + cif + loc 20 + loc WS + loc F2S + cif + mlf F2S + loc F2S + loc WS + cfi + loc 80 + bne *1 +OK + loc -20 + loc WS + loc F2S + cif + loc -6 + loc WS + loc F2S + cif + mlf F2S + loc F2S + loc WS + cfi + loc 120 + bne *1 +OK +#endif +TEST 118: test dvf F2S +MAIN 0 +#ifdef F2S + loc 45 + loc WS + loc F2S + cif + loc 9 + loc WS + loc F2S + cif + dvf F2S + loc 5 + loc WS + loc F2S + cif + cmf F2S + zne *1 +OK + loc -60 + loc WS + loc F2S + cif + loc 7 + loc WS + loc F2S + cif + dvf F2S + loc F2S + loc WS + cfi + loc -8 + bne *1 +OK +#endif +TEST 119: test fractions using F2S floating arithmetic +MAIN 0 +#ifdef F2S + loc 7 + loc WS + loc F2S + cif + loc 2 + loc WS + loc F2S + cif + dvf F2S + loc F2S + loc WS + cfi + loc 3 + bne *1 +OK + loc -7 + loc WS + loc F2S + cif + loc 2 + loc WS + loc F2S + cif + dvf F2S + loc F2S + loc WS + cfi + loc -3 + bne *1 +OK + loc 11 + loc WS + loc F2S + cif + loc 2 + loc WS + loc F2S + cif + dvf F2S + loc 5 + loc WS + loc F2S + cif + mlf F2S + loc 4 + loc WS + loc F2S + cif + mlf F2S + loc 110 + loc WS + loc F2S + cif + cmf F2S + zne *1 +OK +#endif +TEST 120: test cal +HOL 8 +MAIN 0 + loc 0 + ste 4 + cal $p120 + loe 4 + loc 34 + bne *1 +OK +PROC + pro $p120,0 + loc 34 + ste 4 + ret 0 + end +TEST 121: test cal +MAIN 0 + cal $p121 + lfr WS + loc 7 + bne *1 +OK +PROC + pro $p121,0 + loc 7 + ret WS + end +TEST 122: test cal +MAIN 0 + loc 7 + cal $p122 + asp WS + lfr WS + loc 7 + bne *1 +OK +PROC + pro $p122,0 + lol 0 + ret WS + end +TEST 123: test cal +MAIN 4 + loc 7 + stl -4 + lor 0 + cal $p123 + asp PS + lfr WS + loc 7 + bne *1 +OK + lxl 0 + cal $p123 + asp PS + lfr WS + loc 7 + bne *1 +OK +PROC + pro $p123,0 + lxl 1 + lof -4 + ret WS + end +TEST 124: test cal +MAIN 0 + loc 7 + cal $p124 + asp WS +PROC + pro $p124,0 + lol 0 + loc 7 + bne *1 +OK + ret 0 +ERRLAB + end +TEST 125: test cal +MAIN 4 + loc 10 + stl -WS + loc 90 + lxl 0 + cal $p1125 + asp PS+WS +OK +PROC + pro $p1125,WS + lol PS + loc 90 + bne *1 +OK + loc 11 + stl -WS + loc 21 + loc 91 + lxl 0 + cal $p2125 + asp PS+WS + lfr WS +OK + loc 82 + bne *1 +OK + loc 21 + bne *1 +OK + ret 0 +ERRLAB + end + pro $p2125,0 + lol PS + loc 91 + bne *1 +OK + loc 12 + loc 92 + lxl 0 + cal $p3125 + asp PS+WS + lfr WS+WS +OK + loc 86 + bne *1 + loc 83 + bne *1 +OK + loc 12 + bne *1 +OK + lal PS + loi WS + loc 91 + bne *1 +OK + loc 82 + ret WS +ERRLAB + end + pro $p3125,WS + lol PS + loc 92 + bne *1 +OK + loc 13 + stl -WS + lxa 0 + adp PS + loi WS + loc 92 + bne *1 +OK + lxa 1 + adp PS + loi WS + loc 91 + bne *1 +OK + lxa 2 + adp PS + loi WS + loc 90 + bne *1 +OK + lxl 2 + lof -WS + loc 11 + bne *1 +OK + lxl 3 + adp -WS + loi WS + loc 10 + bne *1 +OK + loc 83 + lxl 1 + cal $p4125 + asp PS + lxl 2 + cal $p5125 + asp PS + lxl 3 + cal $p6125 + asp PS + lfr WS + ret WS+WS +ERRLAB + end + pro $p4125,0 + lxa 1 + adp PS + loi WS + loc 91 + bne *1 +OK + ret 0 +ERRLAB + end + pro $p5125,0 + lxa 1 + adp PS + loi WS + loc 90 + bne *1 +OK + ret 0 +ERRLAB + end + pro $p6125,0 + lxl 1 + adp -WS + loi WS + loc 10 + bne *1 +OK + loc 86 + ret WS +ERRLAB + end +TEST 126: test bra +MAIN 0 + bra *0126 + bra *1 +9126 + bra *8126 +0126 + bra *6126 + bra *1 +1126 + bra *5126 + bra *1 +2126 + bra *4126 + bra *1 +3126 + bra *7126 + bra *1 +4126 + bra *3126 + bra *1 +5126 + bra *2126 + bra *1 +6126 + bra *1126 + bra *1 +7126 + bra *9126 + bra *1 +8126 +OK +TEST 127: test ret and lfr +a127 + bss 4,0,0 +MAIN 0 +; return nothing + loc 123 + cal $retw0 + loc 123 + bne *1 +OK +; return single word + cal $retw1 + lfr WS + loc 45 + bne *1 +OK +; return single pointer + cal $retp1 + lfr PS + lae a127 + cmp + zne *1 +OK +; return procedure instance identifier (two pointers) +; this value may not be disturbed by ASP + lxl 0 + cal $retp2 + asp PS + lfr 2*PS + lpi $retp2 + cmp + zne *1 + lxl 0 + cmp + zne *1 +OK +PROC + pro $retw0,0 + ret 0 + end + pro $retw1,0 + loc 45 + ret WS + end + pro $retp1,0 + lae a127 + ret PS + end + pro $retp2,0 + lxl 1 + lpi $retp2 + ret 2*PS + end +TEST 128: test adu WS +MAIN 0 + loc 1 + loc 1 + adu WS + loc 2 + bne *1 +OK +#if WS >= 2 + loc 32767 + loc +999 + adu WS + loc 33766 + bne *1 +OK +#endif +#if WS >= 4 + loc 2147483640 + loc 1111111111 + adu WS + loc 3258594751 + bne *1 +OK +#endif +TEST 129: test sbu WS +MAIN 0 + loc 1 + loc 1 + sbu WS + loc 0 + bne *1 +OK +#if WS >= 2 + loc 32767 + loc -100 + sbu WS + loc 32867 + bne *1 +OK +#endif +#if WS >= 4 + loc 2147483647 + loc -100 + sbu WS + loc 2147483747 + bne *1 +OK +#endif +TEST 130: test mlu WS +MAIN 0 + loc 5 + loc 9 + mlu WS + loc 45 + bne *1 +OK + loc 0 + loc 10 + mlu WS + loc 0 + bne *1 +OK +#if WS > 1 + loc 1024 + loc 63 + mlu WS + loc 64512 + bne *1 +OK +#endif +#if WS > 2 + loc 65536 + loc 32768 + mlu WS + loc 2147483648 + bne *1 +OK +#endif +TEST 131: test dvu WS +MAIN 0 + loc 15 + loc 5 + dvu WS + loc 3 + bne *1 +OK +#if WS >= 2 + loc 65530 + loc 100 + dvu WS + loc 655 + bne *1 +OK +#endif +#if WS >= 4 + loc 2447684712 + loc 100 + dvu WS + loc 24476847 + bne *1 +OK +#endif +TEST 132: test rmu WS +MAIN 0 + loc 15 + loc 5 + rmu WS + loc 0 + bne *1 +OK +#if WS >= 2 + loc 65530 + loc 100 + rmu WS + loc 30 + bne *1 +OK +#endif +#if WS >= 4 + loc 2447684712 + loc 100 + rmu WS + loc 12 + bne *1 +OK +#endif +TEST 133: test adu W2S +MAIN 0 +#ifdef W2S + ldc 1 + ldc 1 + adu W2S + ldc 2 + cmu W2S + zne *1 +OK + ldc 32767 + ldc +999 + adu W2S + ldc 33766 + cmu W2S + zne *1 +OK +#if WS >= 2 + ldc 2147483640 + ldc 1111111111 + adu W2S + ldc 3258594751 + cmu W2S + zne *1 +OK +#endif +#endif +TEST 134: test sbu W2S +MAIN 0 +#ifdef W2S + ldc 1 + ldc 1 + sbu W2S + ldc 0 + cmu W2S + zne *1 +OK + ldc 32767 + ldc -100 + sbu W2S + ldc 32867 + cmu W2S + zne *1 +OK +#if WS >= 2 + ldc 2147483647 + ldc -100 + sbu W2S + ldc 2147483747 + cmu W2S + zne *1 +OK +#endif +#endif +TEST 135: test mlu W2S +MAIN 0 +#ifdef W2S + ldc 5 + ldc 9 + mlu W2S + ldc 45 + cmu W2S + zne *1 +OK + ldc 0 + ldc 10 + mlu W2S + ldc 0 + cmu W2S + zne *1 +OK +#if WS > 1 + ldc 1024 + ldc 63 + mlu W2S + ldc 64512 + cmu W2S + zne *1 +OK +#endif +#if WS >= 2 + ldc 65536 + ldc 32768 + mlu W2S + ldc 2147483648 + cmu W2S + zne *1 +OK +#endif +#endif +TEST 136: test dvu W2S +MAIN 0 +#ifdef W2S + ldc 15 + ldc 5 + dvu W2S + ldc 3 + cmu W2S + zne *1 +OK + ldc 65530 + ldc 100 + dvu W2S + ldc 655 + cmu W2S + zne *1 +OK +#if WS >= 2 + ldc 2447684712 + ldc 100 + dvu W2S + ldc 24476847 + cmu W2S + zne *1 +OK +#endif +#endif +TEST 137: test rmu W2S +MAIN 0 +#ifdef W2S + ldc 15 + ldc 5 + rmu W2S + ldc 0 + cmu W2S + zne *1 +OK +#if WS >= 2 + ldc 65530 + ldc 100 + rmu W2S + ldc 30 + cmu W2S + zne *1 +OK +#endif +#if WS >= 4 + ldc 2447684712 + ldc 100 + rmu W2S + ldc 12 + cmu W2S + zne *1 +OK +#endif +#endif +TEST 138: test cuu +MAIN 0 +#ifdef W2S + loc 100 + loc WS + loc W2S + cuu + ldc 100 + cmu W2S + zne *1 +OK + ldc 100 + loc W2S + loc WS + cuu + loc 100 + bne *1 +OK +#if WS >= 2 + loc 32768 + loc WS + loc W2S + cuu + ldc 32768 + cmu W2S + zne *1 +OK + ldc 32768 + loc W2S + loc WS + cuu + loc 32768 + bne *1 +OK +#endif +#endif +TEST 139: test gto, dch, lpb +MAIN 0 +.1139 + rom *1139 + lae .1139 + loi PS + lxa 0 + lxl 0 + cal $p139_1 +1139 + asp 3*PS +OK +PROC + pro $p139_1,0 + lal 0 + loi PS + lxl 0 + dch + cmp + zne *1 +OK + lal PS + loi PS + lal 0 + loi PS + lpb + cmp + zne *1 +OK + lal 2*PS + loi PS + lal PS + loi PS + lal 0 + loi PS + cal $p139_2 + asp 3*PS + ret 0 +ERRLAB + end 0 + + pro $p139_2,0 + lal 0 + loi PS + lxl 0 + dch + dch + cmp + zne *1 +OK + lal PS + loi PS + lal 0 + loi PS + lpb + cmp + zne *1 +OK +; now create GTO descriptor + lal 0 + loi PS ; LB + lxl 0 + dch + lpb ; SP + lal 2*PS + loi PS ; PC +.2139 + bss 3*PS,0,0 + lae .2139 + sti 3*PS + gto .2139 +ERRLAB + end 0 +TEST 140: test exg +MAIN 0 + loc 0 + loc 1 + exg WS + loc 0 + bne *1 + loc 1 + bne *1 +OK + loc 0 + loc 1 + loc 2 + loc 3 + exg 2*WS + loc 1 + bne *1 + loc 0 + bne *1 + loc 3 + bne *1 + loc 2 + bne *1 +OK + zer 4*WS + loc 1 + loc 1 + dup 2*WS + exg 4*WS + zer 4*WS + cms 4*WS + zne *1 + loc 1 + loc 1 + dup 2*WS + cms 4*WS + zne *1 +OK +TEST 141: test lim,sim,sig,trp,rtt +MAIN 0 +a140 + con 0 + lim + dup WS + loc 9 + set WS + ior WS + dup WS + sim + lim + bne *1 +OK + lpi $p1_141 + sig + loc 9 + trp + sig + asp PS +OK + sim + lpi $p2_141 + sig + loc 9 + trp + sig + asp PS + loe a140 + zeq *1 +OK +PROC + pro $p1_141,0 + bra *1 +ERRLAB + end + + pro $p2_141,0 + loc 1 + ste a140 + rtt + end +TEST 142: test ciu, cui +MAIN 0 + loc 100 + loc WS + loc WS + ciu + loc 100 + bne *1 + loc -1 + loc WS + loc WS + ciu + ; should not cause a trap + asp WS +OK +#ifdef W2S + loc 100 + loc WS + loc W2S + ciu + ldc 100 + cmu W2S + zne *1 + ldc 100 + loc W2S + loc WS + ciu + loc 100 + bne *1 + ldc 100 + loc W2S + loc W2S + ciu + ldc 100 + cmu W2S + zne *1 +OK +#endif + loc 100 + loc WS + loc WS + cui + loc 100 + bne *1 +OK +#ifdef W2S + loc 100 + loc WS + loc W2S + cui + ldc 100 + cmi W2S + zne *1 + ldc 100 + loc W2S + loc WS + cui + loc 100 + bne *1 + ldc 100 + loc W2S + loc W2S + cui + ldc 100 + cmi W2S + zne *1 +OK +#if WS >= 2 + loc 32768 + loc WS + loc W2S + cui + ldc 32768 + cmi W2S + zne *1 +OK +#endif +#endif +TEST 143: test zrf +MAIN 0 +#ifdef FS + loc 0 + loc WS + loc FS + cif + zrf FS + cmf FS + zne *1 +OK +#endif +#ifdef F2S + loc 0 + loc WS + loc F2S + cif + zrf F2S + cmf F2S + zne *1 +OK +#endif +TEST 144: test ngf +MAIN 0 +#ifdef FS + zrf FS + loc 100 + loc WS + loc FS + cif + sbf FS + loc 100 + loc WS + loc FS + cif + ngf FS + cmf FS + zne *1 +OK +#endif +#ifdef F2S + zrf F2S + loc 100 + loc WS + loc F2S + cif + sbf F2S + loc 100 + loc WS + loc F2S + cif + ngf F2S + cmf F2S + zne *1 +OK +#endif +TEST 145: test cuf, cfu WS,FS +MAIN 0 +#ifdef FS + loc 100 + loc WS + loc FS + cuf + loc FS + loc WS + cfu + loc 100 + bne *1 +OK + loc 0 + loc WS + loc FS + cuf + loc FS + loc WS + cfu + loc 0 + bne *1 +OK +#endif +TEST 146: test cuf, cfu W2S,FS +MAIN 0 +#ifdef FS +#ifdef W2S + loc 55 + loc WS + loc W2S + cuu + loc W2S + loc FS + cuf + loc 55 + loc WS + loc FS + cuf + cmf FS + zne *1 +OK + loc 24 + loc WS + loc FS + cuf + loc FS + loc W2S + cfu + loc W2S + loc WS + cuu + loc 24 + bne *1 +OK + loc 57 + loc WS + loc FS + cuf + loc FS + loc W2S + cfu + loc W2S + loc WS + cuu + loc 57 + bne *1 +OK + loc 40 + loc WS + loc W2S + cuu + loc W2S + loc FS + cuf + loc FS + loc WS + cfu + loc 40 + bne *1 +OK +#endif +#endif +TEST 147: test cuf, cfu WS,F2S +MAIN 0 +#ifdef F2S + loc 10 + loc WS + loc F2S + cuf + loc F2S + loc WS + cfu + loc 10 + bne *1 +OK + loc 0 + loc WS + loc F2S + cuf + loc F2S + loc WS + cfu + loc 0 + bne *1 +OK +#endif +TEST 148: test cuf, cfu W2S F2S +MAIN 0 +#ifdef F2S +#ifdef W2S + ldc 55 + loc W2S + loc F2S + cuf + loc 55 + loc WS + loc F2S + cuf + cmf F2S + zne *1 +OK + loc 24 + loc WS + loc F2S + cuf + loc F2S + loc W2S + cfu + loc W2S + loc WS + cuu + loc 24 + bne *1 +OK + loc 57 + loc WS + loc F2S + cuf + loc F2S + loc W2S + cfu + loc W2S + loc WS + cuu + loc 57 + bne *1 +OK + ldc 41 + loc W2S + loc F2S + cuf + loc F2S + loc WS + cfu + loc 41 + bne *1 +OK +#endif +#endif +TEST 149: test cff +MAIN 0 +#ifdef FS +#ifdef F2S + loc 0 + loc WS + loc FS + cif + loc FS + loc F2S + cff + dup F2S + zrf F2S + cmf F2S + zne *1 + loc F2S + loc FS + cff + zrf FS + cmf FS + zne *1 +OK + loc 100 + loc WS + loc FS + cif + loc FS + loc F2S + cff + loc 100 + loc WS + loc F2S + cif + cmf F2S + zne *1 +OK + loc 100 + loc WS + loc F2S + cif + loc F2S + loc FS + cff + loc 100 + loc WS + loc FS + cif + cmf FS + zne *1 +OK +#endif +#endif +TEST 150: test fif +MAIN 0 +#ifdef FS + loc 50 + loc WS + loc FS + cif + loc 2 + loc WS + loc FS + cif + fif FS + loc 100 + loc WS + loc FS + cif + cmf FS + zne *1 + zrf FS + cmf FS + zne *1 +OK +#endif +#ifdef F2S + loc 50 + loc WS + loc F2S + cif + loc 2 + loc WS + loc F2S + cif + fif F2S + loc 100 + loc WS + loc F2S + cif + cmf F2S + zne *1 + zrf F2S + cmf F2S + zne *1 +OK +#endif +TEST 151; test fef (needs more work) +MAIN 0 +#ifdef FS + loc 10 + loc WS + loc FS + cif + fef FS + loc 4 + bne *1 + asp FS +OK +#endif +#ifdef F2S + loc 10 + loc WS + loc F2S + cif + fef F2S + loc 4 + bne *1 + asp F2S +OK +#endif diff --git a/etc/em_table b/etc/em_table new file mode 100644 index 0000000..38ca26d --- /dev/null +++ b/etc/em_table @@ -0,0 +1,175 @@ +magic 173 +fmnem 1 +nmnem 149 +fpseu 150 +npseu 30 +filb0 180 +nilb0 60 +fcst0 0 +zcst0 120 +ncst0 240 +fspec 240 +nspec 16 +ilb1 240 +ilb2 241 +dlb1 242 +dlb2 243 +dnam 244 +cst2 245 +cst4 246 +cst8 247 +doff 248 +pnam 249 +scon 250 +icon 251 +ucon 252 +fcon 253 +cend 255 + +bss 0 nvt +con 1 a+ +end 2 n? +exa 3 e +exc 4 nn +exp 5 p +hol 6 nvt +ina 7 e +inp 8 p +mes 9 na* +pro 10 pn? +rom 11 a+ + +aar w- -p-a-p+p +adf w- -a-a+a +adi w- -a-a+a +adp f- -p+p +ads w- -a-p+p +adu w- -a-a+a +and w- -a-a+a +asp f- -a +ass w- -a-x +beq bc -w-w +bge bc -w-w +bgt bc -w-w +ble bc -w-w +blm z- -p-p +bls w- -a-p-p +blt bc -w-w +bne bc -w-w +bra bt 0 +cai -p -p +cal pp 0 +cff -- -w-w-y+x +cfi -- -w-w-y+x +cfu -- -w-w-y+x +cif -- -w-w-y+x +cii -- -w-w-y+x +ciu -- -w-w-y+x +cmf w- -a-a+w +cmi w- -a-a+w +cmp -- -p-p+w +cms w- -a-a+w +cmu w- -a-a+w +com w- -a-a+a +csa wt -p-a +csb wt -p-a +cuf -- -w-w-y+x +cui -- -w-w-y+x +cuu -- -w-w-y+x +dch -- -p+p +dec -- -w+w +dee g- 0 +del l- 0 +dup s- -a+a+a +dus w- -a-x+x+x +dvf w- -a-a+a +dvi w- -a-a+a +dvu w- -a-a+a +exg w- -a-a+a+a +fef w- -a+a+w +fif w- -a-a+a+a +fil g- 0 +gto gt -p-? +inc -- -w+w +ine g- 0 +inl l- 0 +inn w- -w-a+w +ior w- -a-a+a +lae g- +p +lal l- +p +lar w- -p-a-p+? +ldc d- +d +lde g- +d +ldf f- -p+d +ldl l- +d +lfr s- +a +lil l- +w +lim -- +w +lin n- 0 +lni -- 0 +loc c- +w +loe g- +w +lof f- -p+w +loi o- -p+a +lol l- +w +lor r- +p +los w- -a-p+x +lpb -- -p+p +lpi p- +p +lxa n- +p +lxl n- +p +mlf w- -a-a+a +mli w- -a-a+a +mlu w- -a-a+a +mon -- -?+? +ngf w- -a+a +ngi w- -a+a +nop -- 0 +rck w- -p-a+a +ret zt -a-? +rmi w- -a-a+a +rmu w- -a-a+a +rol w- -w-a+a +ror w- -w-a+a +rtt -t -? +sar w- -p-a-p-? +sbf w- -a-a+a +sbi w- -a-a+a +sbs w- -p-p+a +sbu w- -a-a+a +sde g- -d +sdf f- -p-d +sdl l- -d +set w- -w+a +sig -- -p-p+p+p +sil l- -w +sim -- -w +sli w- -w-a+a +slu w- -w-a+a +sri w- -w-a+a +sru w- -w-a+a +ste g- -w +stf f- -p-w +sti o- -p-a +stl l- -w +str r- -p +sts w- -a-p-x +teq -- -w+w +tge -- -w+w +tgt -- -w+w +tle -- -w+w +tlt -- -w+w +tne -- -w+w +trp -p -w+? +xor w- -a-a+a +zeq bc -w +zer w- +a +zge bc -w +zgt bc -w +zle bc -w +zlt bc -w +zne bc -w +zre g- 0 +zrf w- +a +zrl l- 0 + diff --git a/etc/ip_spec.t b/etc/ip_spec.t new file mode 100644 index 0000000..54ca88f --- /dev/null +++ b/etc/ip_spec.t @@ -0,0 +1,354 @@ +aar mwPo 1 34 +adf sP 1 35 +adi mwPo 2 36 +adp 2 38 +adp mPo 2 39 +adp sP 1 41 +adp sN 1 42 +ads mwPo 1 43 +and mwPo 1 44 +asp mwPo 5 45 +asp swP 1 50 +beq 2 51 +beq sP 1 52 +bge sP 1 53 +bgt sP 1 54 +ble sP 1 55 +blm sP 1 56 +blt sP 1 57 +bne sP 1 58 +bra 2 59 +bra sN 2 60 +bra sP 2 62 +cal mPo 28 64 +cal sP 1 92 +cff - 93 +cif - 94 +cii - 95 +cmf sP 1 96 +cmi mwPo 2 97 +cmp - 99 +cms sP 1 100 +csa mwPo 1 101 +csb mwPo 1 102 +dec - 103 +dee sw 1 104 +del swN 1 105 +dup mwPo 1 106 +dvf sP 1 107 +dvi mwPo 1 108 +fil u 109 +inc - 110 +ine w2 111 +ine sw 1 112 +inl mwN 3 113 +inl swN 1 116 +inn sP 1 117 +ior mwPo 1 118 +ior sP 1 119 +lae u 120 +lae sw 7 121 +lal P2 128 +lal N2 129 +lal mP 1 130 +lal mN 1 131 +lal swP 1 132 +lal swN 2 133 +lar mwPo 1 135 +ldc mP 1 136 +lde w2 137 +lde sw 1 138 +ldl mP 1 139 +ldl swN 1 140 +lfr mwPo 2 141 +lfr sP 1 143 +lil swN 1 144 +lil swP 1 145 +lil mwP 2 146 +lin 2 148 +lin sP 1 149 +lni - 150 +loc 2 151 +loc mP 34 0 +loc mN 1 152 +loc sP 1 153 +loc sN 1 154 +loe w2 155 +loe sw 5 156 +lof 2 161 +lof mwPo 4 162 +lof sP 1 166 +loi 2 167 +loi mPo 1 168 +loi mwPo 4 169 +loi sP 1 173 +lol wP2 174 +lol wN2 175 +lol mwP 4 176 +lol mwN 8 180 +lol swP 1 188 +lol swN 1 189 +lxa mPo 1 190 +lxl mPo 2 191 +mlf sP 1 193 +mli mwPo 2 194 +rck mwPo 1 196 +ret mwP 2 197 +ret sP 1 199 +rmi mwPo 1 200 +sar mwPo 1 201 +sbf sP 1 202 +sbi mwPo 2 203 +sdl swN 1 205 +set sP 1 206 +sil swN 1 207 +sil swP 1 208 +sli mwPo 1 209 +ste w2 210 +ste sw 3 211 +stf 2 214 +stf mwPo 2 215 +stf sP 1 217 +sti mPo 1 218 +sti mwPo 4 219 +sti sP 1 223 +stl wP2 224 +stl wN2 225 +stl mwP 2 226 +stl mwN 5 228 +stl swN 1 233 +teq - 234 +tgt - 235 +tlt - 236 +tne - 237 +zeq 2 238 +zeq sP 2 239 +zer sP 1 241 +zge sP 1 242 +zgt sP 1 243 +zle sP 1 244 +zlt sP 1 245 +zne sP 1 246 +zne sN 1 247 +zre w2 248 +zre sw 1 249 +zrl mwN 2 250 +zrl swN 1 252 +zrl wN2 253 +aar e2 0 +aar e- 1 +adf e2 2 +adf e- 3 +adi e2 4 +adi e- 5 +ads e2 6 +ads e- 7 +adu e2 8 +adu e- 9 +and e2 10 +and e- 11 +asp ew2 12 +ass e2 13 +ass e- 14 +bge e2 15 +bgt e2 16 +ble e2 17 +blm e2 18 +bls e2 19 +bls e- 20 +blt e2 21 +bne e2 22 +cai e- 23 +cal e2 24 +cfi e- 25 +cfu e- 26 +ciu e- 27 +cmf e2 28 +cmf e- 29 +cmi e2 30 +cmi e- 31 +cms e2 32 +cms e- 33 +cmu e2 34 +cmu e- 35 +com e2 36 +com e- 37 +csa e2 38 +csa e- 39 +csb e2 40 +csb e- 41 +cuf e- 42 +cui e- 43 +cuu e- 44 +dee ew2 45 +del ewP2 46 +del ewN2 47 +dup e2 48 +dus e2 49 +dus e- 50 +dvf e2 51 +dvf e- 52 +dvi e2 53 +dvi e- 54 +dvu e2 55 +dvu e- 56 +fef e2 57 +fef e- 58 +fif e2 59 +fif e- 60 +inl ewP2 61 +inl ewN2 62 +inn e2 63 +inn e- 64 +ior e2 65 +ior e- 66 +lar e2 67 +lar e- 68 +ldc e2 69 +ldf e2 70 +ldl ewP2 71 +ldl ewN2 72 +lfr e2 73 +lil ewP2 74 +lil ewN2 75 +lim e- 76 +los e2 77 +los e- 78 +lor esP 1 79 +lpi e2 80 +lxa e2 81 +lxl e2 82 +mlf e2 83 +mlf e- 84 +mli e2 85 +mli e- 86 +mlu e2 87 +mlu e- 88 +mon e- 89 +ngf e2 90 +ngf e- 91 +ngi e2 92 +ngi e- 93 +nop e- 94 +rck e2 95 +rck e- 96 +ret e2 97 +rmi e2 98 +rmi e- 99 +rmu e2 100 +rmu e- 101 +rol e2 102 +rol e- 103 +ror e2 104 +ror e- 105 +rtt e- 106 +sar e2 107 +sar e- 108 +sbf e2 109 +sbf e- 110 +sbi e2 111 +sbi e- 112 +sbs e2 113 +sbs e- 114 +sbu e2 115 +sbu e- 116 +sde eu 117 +sdf e2 118 +sdl ewP2 119 +sdl ewN2 120 +set e2 121 +set e- 122 +sig e- 123 +sil ewP2 124 +sil ewN2 125 +sim e- 126 +sli e2 127 +sli e- 128 +slu e2 129 +slu e- 130 +sri e2 131 +sri e- 132 +sru e2 133 +sru e- 134 +sti e2 135 +sts e2 136 +sts e- 137 +str esP 1 138 +tge e- 139 +tle e- 140 +trp e- 141 +xor e2 142 +xor e- 143 +zer e2 144 +zer e- 145 +zge e2 146 +zgt e2 147 +zle e2 148 +zlt e2 149 +zne e2 150 +zrf e2 151 +zrf e- 152 +zrl ewP2 153 +dch e- 154 +exg esP 1 155 +exg e2 156 +exg e- 157 +lpb e- 158 +gto eu 159 +ldc 4 0 +lae 4 1 +lal P4 2 +lal N4 3 +lde w4 4 +ldf 4 5 +ldl wP4 6 +ldl wN4 7 +lil wP4 8 +lil wN4 9 +loc 4 10 +loe w4 11 +lof 4 12 +lol wP4 13 +lol wN4 14 +lpi 4 15 +adp 4 16 +asp w4 17 +beq 4 18 +bge 4 19 +bgt 4 20 +ble 4 21 +blm 4 22 +blt 4 23 +bne 4 24 +bra 4 25 +cal 4 26 +dee w4 27 +del wP4 28 +del wN4 29 +fil 4 30 +gto 4 31 +ine w4 32 +inl wP4 33 +inl wN4 34 +lin 4 35 +sde 4 36 +sdf 4 37 +sdl wP4 38 +sdl wN4 39 +sil wP4 40 +sil wN4 41 +ste w4 42 +stf 4 43 +stl wP4 44 +stl wN4 45 +zeq 4 46 +zge 4 47 +zgt 4 48 +zle 4 49 +zlt 4 50 +zne 4 51 +zre w4 52 +zrl wP4 53 +zrl wN4 54 +loi 4 55 +sti 4 56 diff --git a/etc/new_table b/etc/new_table new file mode 100755 index 0000000..accc7d1 --- /dev/null +++ b/etc/new_table @@ -0,0 +1,72 @@ +h=${1-.} +d=${2-.} + +set `grep fpseu em_table` +p=$2 +set `grep fmnem em_table` +m=$2 + +ed - em_table <<'A' > X +1,/^$/g/ /s// /gp +A + +ed - em_table <<'A' | awk '{$2=$2+'$p'; print}' > Y +1,/^$/d +1,/^$/g/ /s// /gp +A + +ed - em_table <<'A' | awk '{print $0,'$m'+i++}' > Z +1,/^$/d +1,/^$/d +1,/^$/g/ /s// /gp +A + +i=`wc -l >X +i=`wc -l >X + +ed - X <<'A' > $h/em_spec.h +g/^/s//#define sp_/p +A + +ed - Y <<'A' > $h/em_pseu.h +g/ \(.*\) .*/s// \1/ +g/\(.*\) \(.*\)/s//#define ps_\1 \2/p +A + +ed - Z <<'A' > $h/em_mnem.h +g/ .* /s// / +g/\(.*\) \(.*\)/s//#define op_\1 \2/p +A + +( +echo 'char em_pseu[][4] = {' +ed - Y <<'A' +g/\(...\).*/s// "\1",/p +A +echo '};' +) > $d/em_pseu.c + +( +echo 'char em_mnem[][4] = {' +ed - Z <<'A' +g/\(...\).*/s// "\1",/p +A +echo '};' +) > $d/em_mnem.c + +( +echo '#include +char em_flag[] = {' +ed - Z <<'A' | tr abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ +g/^... /s/// +g/ .*/s/// +g/\(.\)\(.\)/s//PAR_\1 | FLO_\2/ +g/-/s//NO/g +g/.*/s// &,/p +A +echo '};' +) > $d/em_flag.c + +rm X Y Z diff --git a/etc/new_table_done b/etc/new_table_done new file mode 100644 index 0000000..e69de29 diff --git a/etc/pop_push b/etc/pop_push new file mode 100644 index 0000000..478f967 --- /dev/null +++ b/etc/pop_push @@ -0,0 +1,15 @@ +description of third column of em_table: + + -: pop item indicated by next character + +: push item indicated by next character + 0: no effect on the stack + +characters describing items: + + w: target machine word (1, 2 or 4) + d: double target machine word (2, 4 or 8) + p: target machine address + a: item with size specified in argument + x: item with size specified by top item of stack + y: item with size specified by second item on stack + ?: one or more items of unknown size diff --git a/etc/proto.make b/etc/proto.make new file mode 100644 index 0000000..acde1a8 --- /dev/null +++ b/etc/proto.make @@ -0,0 +1,31 @@ +# $Id: proto.make,v 1.2 1994/06/24 10:07:02 ceriel Exp $ + +#PARAMS do not remove this line! + +h=$(TARGET_HOME)/h +c=$(SRC_HOME)/util/data + +FILES= \ +$h/em_spec.h \ +$h/em_pseu.h \ +$h/em_mnem.h \ +$c/em_flag.c \ +$c/em_pseu.c \ +$c/em_mnem.c + +$(FILES): $(SRC_DIR)/etc/em_table + cd $(SRC_DIR)/etc ; new_table $h $c + +install: $(FILES) + +distr: $(FILES) + +opr: + make pr ^ opr +pr: + @cd $(SRC_DIR) ; pr proto.make em_table new_table pop_push traps + +clean: + -rm -f *.old + +cmp : # do nothing diff --git a/etc/traps b/etc/traps new file mode 100644 index 0000000..858cf61 --- /dev/null +++ b/etc/traps @@ -0,0 +1,28 @@ +~ Array bound error +~ Range bound error +~ Set bound error +~ Integer overflow +~ Floating overflow +~ Floating underflow +~ Divide by 0 +~ Divide by 0.0 +~ Integer undefined +~ Floating undefined +~ Conversion error + + + + + +* Stack overflow +* Heap overflow +* Illegal instruction +* Illegal odd or zero argument +* Case error +* Addressing non existent memory +* Bad pointer used +* Program counter out of range +* Bad argument of LAE +* Bad monitor call +* Argument of LIN too high +* Bad GTO descriptor diff --git a/fast/Action b/fast/Action new file mode 100644 index 0000000..451eb02 --- /dev/null +++ b/fast/Action @@ -0,0 +1,15 @@ +name "Fast C compiler" +dir f_c +end +name "Fast ANSI C compiler" +dir f_c.ansi +end +name "Fast Pascal compiler" +dir f_pc +end +name "Fast Modula-2 compiler" +dir f_m2 +end +name "Driver for fast compilers" +dir driver +end diff --git a/fast/driver/afcc.1 b/fast/driver/afcc.1 new file mode 100644 index 0000000..a95b5b5 --- /dev/null +++ b/fast/driver/afcc.1 @@ -0,0 +1,158 @@ +.TH AFCC 1 +.SH NAME +afcc \- fast ACK compatible C compiler +.SH SYNOPSIS +.B afcc +[ +.B \-c +] +[ +.B \-v +] +[ +.B \-ansi +] +[ +.B \-R +] +[ +.B \-vn +] +[ \fB\-D\fIname\fR ] +[ \fB\-D\fIname\fB=\fIdef\fR ] +[ +.BI \-I pathname +] +[ +.B \-w +] +[ +.B \-g +] +[ +.B \-o +.I outfile +] +[ +.BI \-U name +] +[ +.BI -M compiler +] +.I sourcefile ... +.SH DESCRIPTION +.LP +.I Afcc +is a fast +.B C +compiler. It translates +.B C +programs +into ack(1)-compatible relocatable object modules, and does so in one pass. +Then, if the \fB\-c\fP flag is not given, +.I afcc +offers the object modules to a link-editor, +to create an executable binary. +.LP +.I Afcc +accepts several types of filename arguments. Files with +names ending in +.B .c +are taken to be +.B C +source programs. +They are compiled, and the resulting object module is placed in the current +directory. +The object module is named after its source file, the suffix +.B .o +replacing +.BR .c +in the name of the object. +.LP +Other arguments refer to loader options, +object modules, or object libraries. +Unless the +.B \-c +flag is given, these modules and libraries, together with the results of any +specified compilations, are passed (in the order given) to the +link-editor to produce +an output file named +.IR a.out . +You can specify a name for the executable by using the +.B \-o +option. +.SH OPTIONS +.LP +.IP \fB\-ansi\fP +Use the ANSI C compiler instead of the K&R one. This flag must be first, +and must also be used when linking through +.I afcc +or +.I ack. +.IP \fB\-R\fP +test for more compatibility with Kernighan & Ritchie C [1]. +.IP \fB\-c\fP +.br +Suppress the loading phase of the compilation, and force an object module to +be produced, even if only one program is compiled. +A single object module can be named explicitly using the +.B \-o +option. +.IP \fB\-D\fIname\fR\fB=\fIdef\fR +Define a symbol +.I name +to the +preprocessor, as if by "#define". +.IP \fB\-D\fIname\fR +.br +same as \fB\-D\fIname\fB=1\fR. +.IP \fB\-I\fIpathname\fR +.br +Add +.I pathname +to the list of directories in which to search for +.B #include +files with filenames not beginning with slash. +The compiler first searches for +.B #include +files in the directory containing +.I sourcefile, +then in directories in +.B \-I +options, then in the ACK include directory, +and finally, in +.I /usr/include. +.IP "\fB\-o \fIoutput\fR" +Name the final output file +.I output. +.IP \fB\-U\fIname\fR +.br +Remove any initial definition of +.I name. +.IP \fB\-g\fP +.br +Produce symbolic debugging information for grind(1). +.IP \fB\-v\fP +.br +Verbose. Print the commands as they are executed. +.IP \fB\-vn\fP +.br +Verbose, no execute. Only print the commands, do not execute them. +.IP \fB\-w\fP +suppress warning messages. +.IP \fB\-M\fIcompiler\fR +.br +use \fIcompiler\fR as C compiler instead of the default. +.LP +Object modules produced by ack(1) and +.I afcc +can be freely mixed. +.SH "SEE ALSO" +.IP [1] +B.W. Kernighan, D. Ritchie, "\fIThe C programming Language\fP", Prentice-Hall Inc., 1978 +.IP [2] +ack(1) manual page. +.IP [3] +grind(1) manual page. +.SH DIAGNOSTICS +Diagnostics are intended to be self-explanatory. diff --git a/fast/driver/afm2.1 b/fast/driver/afm2.1 new file mode 100644 index 0000000..431f0f0 --- /dev/null +++ b/fast/driver/afm2.1 @@ -0,0 +1,209 @@ +.TH FM2 1 +.SH NAME +afm2 \- fast ACK compatible Modula-2 compiler +.SH SYNOPSIS +.B afm2 +[ +.B \-c +] +[ +.B \-v +] +[ +.B \-vn +] +[ \fB\-D\fIname\fR ] +[ \fB\-D\fIname\fB=\fIdef\fR ] +[ +.BI \-I pathname +] +[ +.BI \-w classes +] +[ +.BI \-W classes +] +[ +.B \-L +] +[ +.B \-o +.I outfile +] +[ +.B \-R +] +[ +.B \-A +] +[ +.B \-3 +] +[ +.B \-_ +] +[ +.B \-g +] +[ +.BI \-U name +] +[ +.BI -M compiler +] +.I sourcefile ... +.SH DESCRIPTION +.LP +.I afm2 +is a fast +.B Modula-2 +compiler. It translates +.B Modula-2 +programs +into ack(1)-compatible relocatable object modules, and does so in one pass. +Then, if the \fB\-c\fP flag is not given, +.I afm2 +offers the object modules to a link-editor, +to create an executable binary. +.LP +.I Afm2 +accepts several types of filename arguments. Files with +names ending in +.B .mod +are taken to be +.B Modula-2 +source programs. +They are compiled, and the resulting object module is placed in the current +directory. +The object module is named after its source file, the suffix +.B .o +replacing +.BR .mod +in the name of the object. +A file with suffix +.B .mod +is passed through the C preprocessor if it begins with a '#'. +.PP +Definition modules are not separately compiled. The compiler reads them when +it needs them. +Definition modules are expected to reside in files with names ending +in +.BR .def . +The name of the file in which a definition module is stored must be the same as +the module-name, apart from the extension. +Also, in most Unix systems filenames are only 14 characters long. +So, given an IMPORT declaration for a module called "LongModulName", +the compiler will try to open a file called "LongModulN.def". +The requirement does not hold for implementation or program modules, +but is certainly recommended. +.LP +Other arguments refer to loader options, +object modules, or object libraries. +Unless the +.B \-c +flag is given, these modules and libraries, together with the results of any +specified compilations, are passed (in the order given) to the +link-editor to produce +an output file named +.IR a.out . +You can specify a name for the executable by using the +.B \-o +option. +.SH OPTIONS +.LP +.IP \fB\-c\fP +.br +Suppress the loading phase of the compilation, and force an object module to +be produced, even if only one program is compiled. +A single object module can be named explicitly using the +.B \-o +option. +.IP \fB\-D\fIname\fR\fB=\fIdef\fR +Define a symbol +.I name +to the +preprocessor, as if by "#define". +.IP \fB\-D\fIname\fR +.br +same as \fB\-D\fIname\fB=1\fR. +.IP \fB\-I\fIpathname\fR +.br +Add +.I pathname +to the list of directories in which to search for +.B #include +files with filenames not beginning with slash. +The preprocessor first searches for +.B #include +files in the directory containing +.I sourcefile, +then in directories in +.B \-I +options, then in the ACK include directory, +and finally, in +.I /usr/include. +This flag is also passed to the compiler. When the compiler needs a definition +module, it is first searched for in the current directory, then in the +directories given to it by the \fB\-I\fP flag, and then in a default directory, +.I $TARGET_HOME/lib/m2. +.I afm2 +This default directory contains all definition modules of +the runtime system. +.IP "\fB\-o \fIoutput\fR" +Name the final output file +.I output. +.IP \fB\-U\fIname\fR +.br +Remove any initial definition of +.I name. +.IP \fB\-v\fP +.br +Verbose. Print the commands as they are executed. +.IP \fB\-vn\fP +.br +Verbose, no execute. Only print the commands, do not execute them. +.IP \fB\-L\fR +do not generate code to keep track of +the current location in the source code. +.IP \fB\-g\fP +.br +Produce symbolic debugging information for grind(1). +.IP \fB\-w\fR\fIclasses\fR +suppress warning messages whose class is a member of \fIclasses\fR. +Currently, there are three classes: \fBO\fR, indicating old-flashioned use, +\fBW\fR, indicating "ordinary" warnings, and \fBR\fR, indicating +restricted Modula-2. +If no \fIclasses\fR are given, all warnings are suppressed. +By default, warnings in class \fBO\fR and \fBW\fR are given. +.IP \fB\-W\fR\fIclasses\fR +allow for warning messages whose class is a member of \fIclasses\fR. +.IP \fB\-R\fP +.br +disable all range-checks. +.IP \fB\-A\fP +.br +enable extra array bound checks. Unfortunately, the back-end used for this +compiler is a bit sloppy, so extra array bound checks are needed if you want +detection of array bound errors. +.IP \fB\-3\fP +.br +Only accept Modula-2 programs that strictly conform to the 3rd Edition of +[1]. +.IP \fB\-_\fP +.br +allow for underscores within identifiers. Identifiers may not start or end +with an underscore, even if this flag is given. +.IP \fB\-M\fIcompiler\fR +.br +use \fIcompiler\fR as Modula-2 compiler instead of the default. +.SH "SEE ALSO" +.IP [1] +N. Wirth, \fIProgramming in Modula-2\fP, 3rd edition, Springer Verlag. +.IP [2] +C.J.H. Jacobs, \fIThe ACK Modula-2 Compiler\fP. +.IP [3] +ack(1) unix manual page. +.IP [4] +grind(1) unix manual page. +.SH DIAGNOSTICS +Diagnostics are intended to be self-explanatory. diff --git a/fast/driver/afpc.1 b/fast/driver/afpc.1 new file mode 100644 index 0000000..f96d2e3 --- /dev/null +++ b/fast/driver/afpc.1 @@ -0,0 +1,227 @@ +.TH AFPC 1 +.SH NAME +afpc \- fast ACK compatible Pascal compiler +.SH SYNOPSIS +.B afpc +[ +.B \-c +] +[ +.B \-v +] +[ +.B \-vn +] +[ \fB\-D\fIname\fR ] +[ \fB\-D\fIname\fB=\fIdef\fR ] +[ +.BI \-I pathname +] +[ +.B \-w +] +[ +.B \-g +] +[ +.B \-L +] +[ +.B \-o +.I outfile +] +[ +.B \-R +] +[ +.B \-A +] +[ +.B \-a +] +[ +.B \-d +] +[ +.BI \-i num +] +[ +.B \-t +] +[ +.B \-C +] +[ +.B \-U+ +] +[ +.B \-u+ +] +[ +.B \-s+ +] +[ +.B \-c+ +] +[ +.BI \-U name +] +[ +.BI -M compiler +] +.I sourcefile ... +.SH DESCRIPTION +.LP +.I afpc +is a fast +.B Pascal +compiler. It translates +.B Pascal +programs +into ack(1)-compatible relocatable object modules, and does so in one pass. +Then, if the \fB\-c\fP flag is not given, +.I afpc +offers the object modules to a link-editor, +to create an executable binary. +.LP +.I Fpc +accepts several types of filename arguments. Files with +names ending in +.B .p +are taken to be +.B Pascal +source programs. +They are compiled, and the resulting object module is placed in the current +directory. +The object module is named after its source file, the suffix +.B .o +replacing +.BR .p +in the name of the object. +A file with suffix +.B .p +is passed through the C preprocessor if it begins with a '#'. +.LP +Other arguments refer to loader options, +object modules, or object libraries. +Unless the +.B \-c +flag is given, these modules and libraries, together with the results of any +specified compilations, are passed (in the order given) to the +link-editor to produce +an output file named +.IR a.out . +You can specify a name for the executable by using the +.B \-o +option. +.LP +If a single +.B Pascal +program is compiled and loaded all at once, the object module +file is deleted. +.SH OPTIONS +.LP +.IP \fB\-c\fP +.br +Suppress the loading phase of the compilation, and force an object module to +be produced, even if only one program is compiled. +A single object module can be named explicitly using the +.B \-o +option. +.IP \fB\-D\fIname\fR\fB=\fIdef\fR +Define a symbol +.I name +to the +preprocessor, as if by "#define". +.IP \fB\-D\fIname\fR +.br +same as \fB\-D\fIname\fB=1\fR. +.IP \fB\-I\fIpathname\fR +.br +Add +.I pathname +to the list of directories in which to search for +.B #include +files with filenames not beginning with slash. +The preprocessor first searches for +.B #include +files in the directory containing +.I sourcefile, +then in directories in +.B \-I +options, and finally, in +.I /usr/include. +.IP "\fB\-o \fIoutput\fR" +Name the final output file +.I output. +.IP \fB\-U\fIname\fR +.br +Remove any initial definition of +.I name. +.IP \fB\-v\fP +.br +Verbose. Print the commands as they are executed. +.IP \fB\-vn\fP +.br +Verbose, no execute. Only print the commands, do not execute them. +.IP \fB\-L\fP +.br +do not generate code to keep track of +the current location in the source code. +.IP \fB\-g\fP +.br +Produce symbolic debugging information for grind(1). +.IP \fB\-w\fP +.br +suppress warning messages. +.IP \fB\-d\fP +.br +allow for "long"s. +.IP \fB\-i\fInum\fR +.br +set size for integer sets. By default, the set size is the word size. +.IP \fB\-C\fP +.br +distinguish between lower case and upper case. Normally, upper case letters +are considered equal to their lower case counterpart. +.IP \fB\-t\fP +.br +trace calls and exits of procedures and functions. +.IP \fB\-R\fP +.br +disable all range-checks. +.IP \fB\-A\fP +.br +enable extra array bound checks. Unfortunately, the back-end used for this +compiler is a bit sloppy, so extra array bound checks are needed if you want +detection of array bound errors. +.IP \fB\-a\fP +.br +disable assertions. Assertions are skipped instead of evaluated. +.IP "\fB\-U+\fP, \fB\-u+\fP" +.br +allow for underscores within identifiers. Identifiers may not start +with an underscore, even if this flag is given. +.IP \fB-s+\fP +.br +allow only standard +.BR Pascal . +This disables the \fB\-c+\fP, \fB\-d\fR, \fB\-u+\fR, +\fB\-U+\fR and \fB\-C\fR options. +Furthermore, assertions are not recognized at all. +.IP \fB-c+\fP +.br +allow C-like strings. This option is mainly intended for usage with +C-functions. This option will cause the type 'string' to be known. +.IP \fB\-M\fIcompiler\fR +.br +use \fIcompiler\fR as Modula-2 compiler instead of the default. +.SH "SEE ALSO" +.IP [1] +J.W. Stevenson, H. v. Eck, \fIAmsterdam Compiler Kit-Pascal reference manual\fP. +.IP [2] +ack(1) unix manual page. +.IP [3] +grind(1) unix manual page. +.SH DIAGNOSTICS +Diagnostics are intended to be self-explanatory. diff --git a/fast/driver/driver.c b/fast/driver/driver.c new file mode 100644 index 0000000..3f43889 --- /dev/null +++ b/fast/driver/driver.c @@ -0,0 +1,860 @@ +/* fcc/fm2/fpc + Driver for fast ACK compilers. + + Derived from the C compiler driver from Minix. + + Compile this file with + cc -O -I/config -DF?? driver.c + where F?? is either FCC, FPC, or FM2. + Install the resulting binaries in the EM bin directory. + Suggested names: afcc, afm2, and afpc. +*/ + +#if FM2+FPC+FCC > 1 +Something wrong here! Only one of FM2, FPC, or FCC must be defined +#endif + +#ifdef sun3 +#define MACHNAME "m68020" +#define SYSNAME "sun3" +#endif + +#ifdef vax4 +#define MACHNAME "vax4" +#define SYSNAME "vax4" +#endif + +#ifdef i386 +#define MACHNAME "i386" +#define SYSNAME "i386" +#endif + +#include +#include +#include +#include +#if __STDC__ +#include +#else +#include +#endif + +/* + Version producing ACK .o files in one pass. +*/ +#define MAXARGC 256 /* maximum number of arguments allowed in a list */ +#define USTR_SIZE 128 /* maximum length of string variable */ + +typedef char USTRING[USTR_SIZE]; + +struct arglist { + int al_argc; + char *al_argv[MAXARGC]; +}; + +#define CPP_NAME "$H/lib.bin/cpp" +#define LD_NAME "$H/lib.bin/em_led" +#define CV_NAME "$H/lib.bin/$S/cv" +#define SHELL "/bin/sh" + +char *CPP; +char *COMP; +char *cc = "cc"; + +int kids = -1; +int ecount = 0; + +struct arglist CPP_FLAGS = { +#ifdef FCC + 7, +#else + 13, +#endif + { + "-D__unix", + "-D_EM_WSIZE=4", + "-D_EM_PSIZE=4", + "-D_EM_SSIZE=2", + "-D_EM_LSIZE=4", + "-D_EM_FSIZE=4", + "-D_EM_DSIZE=8", +#ifndef FCC + "-DEM_WSIZE=4", + "-DEM_PSIZE=4", + "-DEM_SSIZE=2", + "-DEM_LSIZE=4", + "-DEM_FSIZE=4", + "-DEM_DSIZE=8", +#endif + } +}; + +struct arglist LD_HEAD = { + 2, + { + "$H/lib/$S/head_em", +#ifdef FCC + "$H/lib/$S/head_$A" +#endif +#ifdef FM2 + "$H/lib/$S/head_m2" +#endif +#ifdef FPC + "$H/lib/$S/head_pc" +#endif + } +}; + +struct arglist LD_TAIL = { +#if defined(sun3) || defined(i386) + 5, +#else + 4, +#endif + { +#ifdef FCC + "$H/lib/$S/tail_$A", +#endif +#ifdef FM2 + "$H/lib/$S/tail_m2", +#endif +#ifdef FPC + "$H/lib/$S/tail_pc", +#endif +#if defined(sun3) || defined(i386) + "$H/lib/$M/tail_fp", +#endif + "$H/lib/$M/tail_em", + "$H/lib/$S/tail_mon", + "$H/lib/$M/end_em" + } +}; + +struct arglist align = { + 5, { +#ifdef sun3 + "-a0:4", + "-a1:4", + "-a2:0x20000", + "-a3:4", + "-b0:0x2020" +#endif +#ifdef vax4 + "-a0:4", + "-a1:4", + "-a2:0x400", + "-a3:4", + "-b0:0" +#endif +#ifdef i386 + "-a0:4", + "-a1:4", + "-a2:4", + "-a3:4", + "-b1:0x1880000" +#endif + } +}; + +struct arglist COMP_FLAGS; + +char *o_FILE = "a.out"; /* default name for executable file */ + +#define remove(str) ((noexec || unlink(str)), (str)[0] = '\0') +#define cleanup(str) (str && str[0] && remove(str)) +#define init(al) ((al)->al_argc = 1) + +char ProgCall[128]; + +struct arglist SRCFILES; +struct arglist LDFILES; + +int RET_CODE = 0; + +struct arglist LD_FLAGS; + +struct arglist CALL_VEC; + +int o_flag = 0; +int c_flag = 0; +int g_flag = 0; +int v_flag = 0; +int O_flag = 0; +int ansi_c = 0; + +#if __STDC__ +char *mkstr(char *, ...); +#else +char *mkstr(); +#endif +char *malloc(); +char *alloc(); +char *extension(); +char *expand_string(); + +USTRING ofile; +USTRING BASE; +USTRING tmp_file; + +int noexec = 0; + +extern char *strcat(), *strcpy(), *mktemp(), *strchr(); + +trapcc(sig) + int sig; +{ + signal(sig, SIG_IGN); + if (kids != -1) kill(kids, sig); + cleanup(ofile); + cleanup(tmp_file); + exit(1); +} + +#ifdef FCC +#define lang_suffix() "c" +#define comp_name() "$H/lib.bin/c_ce" +#define ansi_c_name() "$H/lib.bin/c_ce.ansi" +#endif /* FCC */ + +#ifdef FM2 +#define lang_suffix() "mod" +#define comp_name() "$H/lib.bin/m2_ce" +#endif /* FM2 */ + +#ifdef FPC +#define lang_suffix() "p" +#define comp_name() "$H/lib.bin/pc_ce" +#endif /* FPC */ + + +#ifdef FCC +int +lang_opt(str) + char *str; +{ + switch(str[1]) { + case 'R': + if (! ansi_c) { + append(&COMP_FLAGS, str); + return 1; + } + break; + case '-': /* debug options */ + append(&COMP_FLAGS, str); + return 1; + case 'a': /* -ansi flag */ + if (! strcmp(str, "-ansi")) { + ansi_c = 1; + COMP = expand_string(ansi_c_name()); + return 1; + } + break; + case 'w': /* disable warnings */ + if (! ansi_c) { + append(&COMP_FLAGS, str); + return 1; + } + if (str[2]) { + str[1] = '-'; + append(&COMP_FLAGS, &str[1]); + } + else append(&COMP_FLAGS, "-a"); + return 1; + } + return 0; +} +#endif /* FCC */ + +#ifdef FM2 +int +lang_opt(str) + char *str; +{ + switch(str[1]) { + case '-': /* debug options */ + case 'w': /* disable warnings */ + case 'R': /* no runtime checks */ + case 'W': /* add warnings */ + case 'L': /* no line numbers */ + case 'A': /* extra array bound checks */ + case '3': /* only accept 3rd edition Modula-2 */ + append(&COMP_FLAGS, str); + return 1; + case 'I': + append(&COMP_FLAGS, str); + break; /* !!! */ + case 'U': /* underscores in identifiers allowed */ + if (str[2] == '\0') { + append(&COMP_FLAGS, str); + return 1; + } + break; + case 'e': /* local extension for Modula-2 compiler: + procedure constants + */ + str[1] = 'l'; + append(&COMP_FLAGS, str); + return 1; + } + return 0; +} +#endif /* FM2 */ + +#ifdef FPC +int +lang_opt(str) + char *str; +{ + switch(str[1]) { + case '-': /* debug options */ + case 'a': /* enable assertions */ + case 'd': /* allow doubles (longs) */ + case 'i': /* set size of integer sets */ + case 't': /* tracing */ + case 'w': /* disable warnings */ + case 'A': /* extra array bound checks */ + case 'C': /* distinguish between lower case and upper case */ + case 'L': /* no FIL and LIN instructions */ + case 'R': /* no runtime checks */ + append(&COMP_FLAGS, str); + return 1; + case 'u': + case 'U': + /* underscores in identifiers */ + case 's': + /* only compile standard pascal */ + case 'c': + /* C type strings */ + if (str[2] == '+' && str[3] == '\0') { + str[2] = 0; + append(&COMP_FLAGS, str); + return 1; + } + } + return 0; +} +#endif /* FPC */ + +main(argc, argv) + char *argv[]; +{ + char *str; + char **argvec; + int count; + char *ext; + register struct arglist *call = &CALL_VEC; + char *file; + char *ldfile; + char *INCLUDE = 0; + int compile_cnt = 0; + + setbuf(stdout, (char *) 0); + basename(*argv++,ProgCall); + + COMP = expand_string(comp_name()); + CPP = expand_string(CPP_NAME); + +#ifdef vax4 + append(&CPP_FLAGS, "-D__vax"); +#endif +#ifdef sun3 + append(&CPP_FLAGS, "-D__sun"); +#endif +#ifdef m68020 + append(&CPP_FLAGS, "-D__mc68020"); + append(&CPP_FLAGS, "-D__mc68000"); +#endif + + if (signal(SIGHUP, SIG_IGN) != SIG_IGN) + signal(SIGHUP, trapcc); + if (signal(SIGINT, SIG_IGN) != SIG_IGN) + signal(SIGINT, trapcc); + if (signal(SIGQUIT, SIG_IGN) != SIG_IGN) + signal(SIGQUIT, trapcc); + while (--argc > 0) { + if (*(str = *argv++) != '-') { + append(&SRCFILES, str); + continue; + } + + if (lang_opt(str)) { + } + else switch (str[1]) { + + case 'c': /* stop after producing .o files */ + c_flag = 1; + break; + case 'D': /* preprocessor #define */ + case 'U': /* preprocessor #undef */ + append(&CPP_FLAGS, str); + break; + case 'I': /* include directory */ + append(&CPP_FLAGS, str); + break; + case 'g': /* debugger support */ + append(&COMP_FLAGS, str); + g_flag = 1; + break; + case 'a': /* -ansi flag */ + if (! strcmp(str, "-ansi")) { + ansi_c = 1; + return 1; + } + break; + case 'o': /* target file */ + if (argc-- >= 0) { + o_flag = 1; + o_FILE = *argv++; + ext = extension(o_FILE); + if (ext != o_FILE && ! strcmp(ext, lang_suffix()) + ) { + error("-o would overwrite %s", o_FILE); + } + } + break; + case 'u': /* mark identifier as undefined */ + append(&LD_FLAGS, str); + if (argc-- >= 0) + append(&LD_FLAGS, *argv++); + break; + case 'O': /* use built in peephole optimizer */ + O_flag = 1; + break; + case 'v': /* verbose */ + v_flag++; + if (str[2] == 'n') + noexec = 1; + break; + case 'l': /* library file */ + append(&SRCFILES, str); + break; + case 'M': /* use other compiler (for testing) */ + strcpy(COMP, str+2); + break; + case 's': /* strip */ + if (str[2] == '\0') { + append(&LD_FLAGS, str); + break; + } + /* fall through */ + default: + warning("%s flag ignored", str); + break; + } + } + + if (ecount) exit(1); + + count = SRCFILES.al_argc; + argvec = &(SRCFILES.al_argv[0]); + while (count-- > 0) { + ext = extension(*argvec); + if (*argvec[0] != '-' && + ext != *argvec++ && (! strcmp(ext, lang_suffix()) + )) { + compile_cnt++; + } + } + + if (compile_cnt > 1 && c_flag && o_flag) { + warning("-o flag ignored"); + o_flag = 0; + } + +#ifdef FM2 + INCLUDE = expand_string("-I$H/lib/m2"); +#endif /* FM2 */ +#ifdef FCC + INCLUDE = expand_string(ansi_c ? "-I$H/include/tail_ac" : "-I$H/include/_tail_cc"); + append(&COMP_FLAGS, "-L"); +#endif /* FCC */ + count = SRCFILES.al_argc; + argvec = &(SRCFILES.al_argv[0]); + while (count-- > 0) { + register char *f; + basename(file = *argvec++, BASE); + + ext = extension(file); + + if (file[0] != '-' && + ext != file && (!strcmp(ext, lang_suffix()) + )) { + if (compile_cnt > 1) printf("%s\n", file); + + ldfile = c_flag ? ofile : alloc((unsigned)strlen(BASE)+3); + if ( +#ifdef FCC + !strcmp(ext, "s") && +#endif + needsprep(file)) { + strcpy(tmp_file, TMP_DIR); + strcat(tmp_file, "/F_XXXXXX"); + mktemp(tmp_file); + init(call); + append(call, CPP); + concat(call, &CPP_FLAGS); + append(call, INCLUDE); + append(call, file); + if (runvec(call, tmp_file)) { + file = tmp_file; + } + else { + remove(tmp_file); + tmp_file[0] = '\0'; + continue; + } + } + init(call); + if (o_flag && c_flag) { + f = o_FILE; + } + else f = mkstr(ldfile, BASE, ".", "o", (char *)0); + append(call, COMP); +#ifdef FCC + concat(call, &CPP_FLAGS); +#endif + concat(call, &COMP_FLAGS); +#if FM2 || FCC + append(call, INCLUDE); +#endif + append(call, file); + append(call, f); + if (runvec(call, (char *) 0)) { + file = f; + } + else { + remove(f); + continue; + } + cleanup(tmp_file); + tmp_file[0] = '\0'; + } + + else if (file[0] != '-' && + strcmp(ext, "o") && strcmp(ext, "a")) { + warning("file with unknown suffix (%s) passed to the loader", ext); + } + + if (c_flag) + continue; + + append(&LDFILES, file); + } + + /* *.s to a.out */ + if (RET_CODE == 0 && LDFILES.al_argc > 0) { + init(call); + expand(&LD_HEAD); + cc = "cc.2g"; + expand(&LD_TAIL); + append(call, expand_string(LD_NAME)); + concat(call, &align); + append(call, "-o"); + strcpy(tmp_file, TMP_DIR); + strcat(tmp_file, "/F_XXXXXX"); + mktemp(tmp_file); + append(call, tmp_file); + concat(call, &LD_HEAD); + concat(call, &LD_FLAGS); + concat(call, &LDFILES); + if (g_flag) append(call, expand_string("$H/lib/$M/tail_db")); +#ifdef FCC + if (! ansi_c) append(call, expand_string("$H/lib/$S/tail_cc.1s")); +#endif + concat(call, &LD_TAIL); + if (! runvec(call, (char *) 0)) { + cleanup(tmp_file); + exit(RET_CODE); + } + init(call); + append(call, expand_string(CV_NAME)); + append(call, tmp_file); + append(call, o_FILE); + runvec(call, (char *) 0); + cleanup(tmp_file); + } + exit(RET_CODE); +} + +needsprep(name) + char *name; +{ + int file; + char fc; + + file = open(name,0); + if (file < 0) return 0; + if (read(file, &fc, 1) != 1) fc = 0; + close(file); + return fc == '#'; +} + +char * +alloc(u) + unsigned u; +{ + char *p = malloc(u); + + if (p == 0) + panic("no space"); + return p; +} + +char * +expand_string(s) + char *s; +{ + char buf[1024]; + register char *p = s; + register char *q = &buf[0]; + int expanded = 0; + + if (!p) return p; + while (*p) { + if (*p == '$') { + p++; + expanded = 1; + switch(*p++) { + case 'A': + if (ansi_c) strcpy(q, "ac"); + else strcpy(q, cc); + break; + case 'H': + strcpy(q, EM_DIR); + break; + case 'M': + strcpy(q, MACHNAME); + break; + case 'S': + strcpy(q, SYSNAME); + break; + default: + panic("internal error"); + break; + } + while (*q) q++; + } + else *q++ = *p++; + } + if (! expanded) return s; + *q++ = '\0'; + p = alloc((unsigned int) (q - buf)); + return strcpy(p, buf); +} + +append(al, arg) + register struct arglist *al; + char *arg; +{ + if (!arg || !*arg) return; + if (al->al_argc >= MAXARGC) + panic("argument list overflow"); + al->al_argv[(al->al_argc)++] = arg; +} + +expand(al) + register struct arglist *al; +{ + register int i = al->al_argc; + register char **p = &(al->al_argv[0]); + + while (i-- > 0) { + *p = expand_string(*p); + p++; + } +} + +concat(al1, al2) + struct arglist *al1, *al2; +{ + register i = al2->al_argc; + register char **p = &(al1->al_argv[al1->al_argc]); + register char **q = &(al2->al_argv[0]); + + if ((al1->al_argc += i) >= MAXARGC) + panic("argument list overflow"); + while (i-- > 0) { + *p++ = *q++; + } +} +#if __STDC__ +/*VARARGS*/ +char * +mkstr(char *dst, ...) +{ + va_list ap; + + va_start(ap, dst); + { + register char *p; + register char *q; + + q = dst; + p = va_arg(ap, char *); + + while (p) { + while (*q++ = *p++); + q--; + p = va_arg(ap, char *); + } + } + va_end(ap); + + return dst; +} +#else +/*VARARGS*/ +char * +mkstr(va_alist) + va_dcl +{ + va_list ap; + char *dst; + + va_start(ap); + { + register char *p; + register char *q; + + dst = q = va_arg(ap, char *); + p = va_arg(ap, char *); + + while (p) { + while (*q++ = *p++); + q--; + p = va_arg(ap, char *); + } + } + va_end(ap); + + return dst; +} +#endif +basename(str, dst) + char *str; + register char *dst; +{ + register char *p1 = str; + register char *p2 = p1; + + while (*p1) + if (*p1++ == '/') + p2 = p1; + p1--; + while (*p1 != '.' && p1 >= p2) p1--; + if (p1 >= p2) { + *p1 = '\0'; + while (*dst++ = *p2++); + *p1 = '.'; + } + else + while (*dst++ = *p2++); +} + +char * +extension(fn) + char *fn; +{ + register char *c = fn; + + while (*c++) ; + while (*--c != '.' && c >= fn) { } + if (c++ < fn || !*c) return fn; + return c; +} + +runvec(vec, outp) + struct arglist *vec; + char *outp; +{ + int pid, status; + + if (v_flag) { + pr_vec(vec); + putc('\n', stderr); + } + if ((pid = fork()) == 0) { /* start up the process */ + if (outp) { /* redirect standard output */ + close(1); + if (creat(outp, 0666) != 1) + panic("cannot create output file"); + } + ex_vec(vec); + } + if (pid == -1) + panic("no more processes"); + kids = pid; + wait(&status); + if (status) switch(status & 0177) { + case SIGHUP: + case SIGINT: + case SIGQUIT: + case SIGTERM: + case 0: + break; + default: + error("%s died with signal %d\n", vec->al_argv[1], status&0177); + } + kids = -1; + return status ? ((RET_CODE = 1), 0) : 1; +} + +/*VARARGS1*/ +error(str, s1, s2) + char *str, *s1, *s2; +{ + fprintf(stderr, "%s: ", ProgCall); + fprintf(stderr, str, s1, s2); + putc('\n', stderr); + ecount++; +} + +/*VARARGS1*/ +warning(str, s1, s2) + char *str, *s1, *s2; +{ + fprintf(stderr, "%s: (warning) ", ProgCall); + fprintf(stderr, str, s1, s2); + putc('\n', stderr); +} + +panic(str) + char *str; +{ + error(str); + trapcc(SIGINT); +} + +pr_vec(vec) + register struct arglist *vec; +{ + register char **ap = &vec->al_argv[1]; + + vec->al_argv[vec->al_argc] = 0; + fprintf(stderr, "%s", *ap); + while (*++ap) { + fprintf(stderr, " %s", *ap); + } +} + +extern int errno; + +ex_vec(vec) + register struct arglist *vec; +{ + if (noexec) + exit(0); + vec->al_argv[vec->al_argc] = 0; + execv(vec->al_argv[1], &(vec->al_argv[1])); + if (errno == ENOEXEC) { /* not an a.out, try it with the SHELL */ + vec->al_argv[0] = SHELL; + execv(SHELL, &(vec->al_argv[0])); + } + if (access(vec->al_argv[1], 1) == 0) { + /* File is executable. */ + error("cannot execute %s", vec->al_argv[1]); + } else { + error("%s is not executable", vec->al_argv[1]); + } + exit(1); +} diff --git a/fast/driver/proto.make b/fast/driver/proto.make new file mode 100644 index 0000000..b9fafb6 --- /dev/null +++ b/fast/driver/proto.make @@ -0,0 +1,57 @@ +# $Id: proto.make,v 1.5 1994/06/24 11:01:58 ceriel Exp $ + +#PARAMS do not remove this line! + +SRC_DIR = $(SRC_HOME)/fast/driver +INCLUDES = -I$(TARGET_HOME)/config +CFLAGS = $(COPTIONS) $(INCLUDES) +LINTFLAGS = $(LINTOPTIONS) $(INCLUDES) +LDFLAGS = $(LDOPTIONS) + +all: afcc afm2 afpc + +install: all + cp afcc afm2 afpc $(TARGET_HOME)/bin + if [ $(DO_MACHINE_INDEP) = y ] ; \ + then mk_manpage $(SRC_DIR)/afcc.1 $(TARGET_HOME) ; \ + mk_manpage $(SRC_DIR)/afpc.1 $(TARGET_HOME) ; \ + mk_manpage $(SRC_DIR)/afm2.1 $(TARGET_HOME) ; \ + fi + +cmp: all + -cmp afcc $(TARGET_HOME)/bin/afcc + -cmp afm2 $(TARGET_HOME)/bin/afm2 + -cmp afpc $(TARGET_HOME)/bin/afpc + +pr: + @pr $(SRC_DIR)/proto.make $(SRC_DIR)/driver.c + +opr: + make pr | opr + +clean: + rm -f *.$(SUF) afcc afm2 afpc Out + +lint: + $(LINT) $(LINTFLAGS) -DFCC -D`ack_sys` $(SRC_DIR)/driver.c + +afcc.$(SUF): $(SRC_DIR)/driver.c $(TARGET_HOME)/config/em_path.h + $(CC) $(CFLAGS) -c -DFCC -D$(MACH) -D`ack_sys` $(SRC_DIR)/driver.c + mv driver.$(SUF) afcc.$(SUF) + +afpc.$(SUF): $(SRC_DIR)/driver.c $(TARGET_HOME)/config/em_path.h + $(CC) $(CFLAGS) -c -DFPC -D$(MACH) -D`ack_sys` $(SRC_DIR)/driver.c + mv driver.$(SUF) afpc.$(SUF) + +afm2.$(SUF): $(SRC_DIR)/driver.c $(TARGET_HOME)/config/em_path.h + $(CC) $(CFLAGS) -c -DFM2 -D$(MACH) -D`ack_sys` $(SRC_DIR)/driver.c + mv driver.$(SUF) afm2.$(SUF) + +afcc: afcc.$(SUF) + $(CC) $(LDFLAGS) -o afcc afcc.$(SUF) + +afm2: afm2.$(SUF) + $(CC) $(LDFLAGS) -o afm2 afm2.$(SUF) + +afpc: afpc.$(SUF) + $(CC) $(LDFLAGS) -o afpc afpc.$(SUF) diff --git a/fast/f_c.ansi/Parameters b/fast/f_c.ansi/Parameters new file mode 100644 index 0000000..239e82c --- /dev/null +++ b/fast/f_c.ansi/Parameters @@ -0,0 +1,143 @@ +!File: lint.h +/*#define LINT 1 /* if defined, 'lint' is produced */ + + +!File: pathlength.h +#define PATHLENGTH 1024 /* max. length of path to file */ + + +!File: errout.h +#define ERROUT STDERR /* file pointer for writing messages */ +#define ERR_SHADOW 5 /* a syntax error overshadows error messages + until ERR_SHADOW symbols have been + accepted without syntax error */ + + +!File: idfsize.h +#define IDFSIZE 64 /* maximum significant length of an identifier */ + + +!File: numsize.h +#define NUMSIZE 256 /* maximum length of a numeric constant */ + + +!File: nparams.h +#define NPARAMS 32 /* maximum number of parameters */ +#define STDC_NPARAMS 31 /* ANSI limit on number of parameters */ + + +!File: ifdepth.h +#define IFDEPTH 256 /* maximum number of nested if-constructions */ + + +!File: density.h +#define DENSITY 3 /* see switch.[ch] for an explanation */ + + +!File: macbuf.h +#define LAPBUF 128 /* initial size of macro replacement buffer */ +#define ARGBUF 128 /* initial size of macro parameter buffer(s) */ + + +!File: strsize.h +#define ISTRSIZE 32 /* minimum number of bytes allocated for + storing a string */ +#define RSTRSIZE 16 /* step size in enlarging the memory for + the storage of a string */ + + +!File: trgt_sizes.h +#define MAXSIZE 8 /* the maximum of the SZ_* constants */ + +/* target machine sizes */ +#define SZ_CHAR 1 +#define SZ_SHORT 2 +#define SZ_WORD 4 +#define SZ_INT 4 +#define SZ_LONG 4 +#define SZ_FLOAT 4 +#define SZ_DOUBLE 8 +#define SZ_LNGDBL 8 /* for now */ +#define SZ_POINTER 4 + +/* target machine alignment requirements */ +#define AL_CHAR 1 +#define AL_SHORT SZ_SHORT +#define AL_WORD SZ_WORD +#define AL_INT SZ_WORD +#define AL_LONG SZ_WORD +#define AL_FLOAT SZ_WORD +#define AL_DOUBLE SZ_WORD +#define AL_LNGDBL SZ_WORD +#define AL_POINTER SZ_WORD +#define AL_STRUCT 1 +#define AL_UNION 1 + + +!File: botch_free.h +/*#define BOTCH_FREE 1 /* when defined, botch freed memory, as a check */ + + +!File: dataflow.h +/*#define DATAFLOW 1 /* produce some compile-time xref */ + + +!File: debug.h +/*#define DEBUG 1 /* perform various self-tests */ +#define NDEBUG 1 /* disable assertions */ + + +!File: use_tmp.h +#define PREPEND_SCOPES 1 /* collect exa, exp, ina and inp commands + and if USE_TMP is defined let them + precede the rest of the generated + compact code */ +/*#define USE_TMP 1 /* use C_insertpart, C_endpart mechanism + to generate EM-code in the order needed + for the code-generators. If not defined, + the old-style peephole optimizer is + needed. */ + + +!File: parbufsize.h +#define PARBUFSIZE 1024 + + +!File: textsize.h +#define ITEXTSIZE 32 /* 1st piece of memory for repl. text */ + + +!File: inputtype.h +#define INP_READ_IN_ONE 1 /* read input file in one */ + + +!File: nopp.h +/*#define NOPP 1 /* if NOT defined, use built-int preprocessor */ + + +!File: nobitfield.h +/*#define NOBITFIELD 1 /* if NOT defined, implement bitfields */ + + +!File: spec_arith.h +/* describes internal compiler arithmetics */ +/*#define SPECIAL_ARITHMETICS /* something different from native long */ +#define UNSIGNED_ARITH unsigned arith /* when it is supported */ + + +!File: static.h +#define GSTATIC /* for large global "static" arrays */ + + +!File: nocross.h +#define NOCROSS 1 /* if NOT defined, cross compiler */ + + +!File: regcount.h +/*#define REGCOUNT 1 /* count occurrences for register messages */ + + +!File: dbsymtab.h +#define DBSYMTAB 1 /* ability to produce symbol table for debugger */ + + diff --git a/fast/f_c.ansi/proto.main b/fast/f_c.ansi/proto.main new file mode 100644 index 0000000..2600521 --- /dev/null +++ b/fast/f_c.ansi/proto.main @@ -0,0 +1,65 @@ +# $Id: proto.main,v 1.2 1994/06/24 11:02:11 ceriel Exp $ + +# C compilation part. Not to be called directly. +# Instead, it is to be called by the Makefile. +# SRC_DIR, UTIL_HOME, TARGET_HOME, CC, COPTIONS, LINT, LINTOPTIONS, LDOPTIONS, +# CC_AND_MKDEP, SUF, LIBSUF, MACH should be set here. + +#PARAMS do not remove this line! + +MDIR = $(TARGET_HOME)/modules +LIBDIR = $(MDIR)/lib +LINTLIBDIR = $(UTIL_HOME)/modules/lib + +MALLOC = $(LIBDIR)/malloc.$(SUF) + +EMLIB = $(LIBDIR)/libem_mesO.$(LIBSUF) \ + $(LIBDIR)/libCEopt.$(LIBSUF) \ + $(TARGET_HOME)/lib.bin/$(MACH)/ce.$(LIBSUF) \ + $(TARGET_HOME)/lib.bin/$(MACH)/back.$(LIBSUF) \ + $(TARGET_HOME)/lib.bin/em_data.$(LIBSUF) \ + $(LIBDIR)/libobject.$(LIBSUF) + +MODLIB = $(LIBDIR)/libinput.$(LIBSUF) \ + $(LIBDIR)/libassert.$(LIBSUF) \ + $(LIBDIR)/liballoc.$(LIBSUF) \ + $(MALLOC) \ + $(LIBDIR)/libflt.$(LIBSUF) \ + $(LIBDIR)/libprint.$(LIBSUF) \ + $(LIBDIR)/libstring.$(LIBSUF) \ + $(LIBDIR)/libsystem.$(LIBSUF) + +LIBS = $(EMLIB) $(MODLIB) + +LINTLIBS = $(LINTLIBDIR)/$(LINTPREF)em_mes.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)emk.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)input.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)assert.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)alloc.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)flt.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)print.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)string.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)system.$(LINTSUF) + +PROFILE = +INCLUDES = -I. -I$(SRC_DIR) -I$(TARGET_HOME)/modules/h -I$(TARGET_HOME)/h -I$(TARGET_HOME)/modules/pkg +CFLAGS = $(PROFILE) $(INCLUDES) $(COPTIONS) +LINTFLAGS = $(INCLUDES) $(LINTOPTIONS) +LDFLAGS = $(PROFILE) $(LDOPTIONS) + +# C_SRC and OBJ should be set here. +#LISTS do not remove this line! + +all: main + +clean: + rm -f *.$(SUF) main + +lint: + $(LINT) $(LINTFLAGS) $(C_SRC) $(LINTLIBS) + +main: $(OBJ) + $(CC) $(LDFLAGS) $(OBJ) $(LIBS) -o main + +# do not remove the next line; it is used for generating dependencies +#DEPENDENCIES diff --git a/fast/f_c.ansi/proto.make b/fast/f_c.ansi/proto.make new file mode 100644 index 0000000..d247d8c --- /dev/null +++ b/fast/f_c.ansi/proto.make @@ -0,0 +1,268 @@ +# $Id: proto.make,v 1.6 1994/06/24 11:02:14 ceriel Exp $ + +# make ANSI C compiler + +#PARAMS do not remove this line! + +UTIL_BIN = \ + $(UTIL_HOME)/bin +SRC_DIR = \ + $(SRC_HOME)/lang/cem/cemcom.ansi +FSRC_DIR = \ + $(SRC_HOME)/fast/f_c.ansi + +TABGEN= $(UTIL_BIN)/tabgen +LLGEN = $(UTIL_BIN)/LLgen +LLGENOPTIONS = \ + -v + +SRC_G = $(SRC_DIR)/program.g $(SRC_DIR)/declar.g \ + $(SRC_DIR)/expression.g $(SRC_DIR)/statement.g $(SRC_DIR)/ival.g +GEN_G = tokenfile.g +GFILES= $(GEN_G) $(SRC_G) + +SRC_C = \ + $(SRC_DIR)/Version.c \ + $(SRC_DIR)/LLlex.c \ + $(SRC_DIR)/LLmessage.c \ + $(SRC_DIR)/arith.c \ + $(SRC_DIR)/blocks.c \ + $(SRC_DIR)/ch3.c \ + $(SRC_DIR)/ch3bin.c \ + $(SRC_DIR)/ch3mon.c \ + $(SRC_DIR)/code.c \ + $(SRC_DIR)/conversion.c \ + $(SRC_DIR)/cstoper.c \ + $(SRC_DIR)/dataflow.c \ + $(SRC_DIR)/declarator.c \ + $(SRC_DIR)/decspecs.c \ + $(SRC_DIR)/domacro.c \ + $(SRC_DIR)/dumpidf.c \ + $(SRC_DIR)/error.c \ + $(SRC_DIR)/eval.c \ + $(SRC_DIR)/expr.c \ + $(SRC_DIR)/field.c \ + $(SRC_DIR)/fltcstoper.c \ + $(SRC_DIR)/idf.c \ + $(SRC_DIR)/init.c \ + $(SRC_DIR)/input.c \ + $(SRC_DIR)/l_comment.c \ + $(SRC_DIR)/l_ev_ord.c \ + $(SRC_DIR)/l_lint.c \ + $(SRC_DIR)/l_misc.c \ + $(SRC_DIR)/l_outdef.c \ + $(SRC_DIR)/l_states.c \ + $(SRC_DIR)/label.c \ + $(SRC_DIR)/main.c \ + $(SRC_DIR)/options.c \ + $(SRC_DIR)/pragma.c \ + $(SRC_DIR)/proto.c \ + $(SRC_DIR)/replace.c \ + $(SRC_DIR)/skip.c \ + $(SRC_DIR)/stab.c \ + $(SRC_DIR)/stack.c \ + $(SRC_DIR)/struct.c \ + $(SRC_DIR)/switch.c \ + $(SRC_DIR)/tokenname.c \ + $(SRC_DIR)/type.c \ + $(SRC_DIR)/util.c +GEN_C = tokenfile.c program.c declar.c expression.c statement.c ival.c \ + symbol2str.c char.c Lpars.c next.c +CFILES= $(SRC_C) $(GEN_C) + +SRC_H = \ + $(SRC_DIR)/LLlex.h \ + $(SRC_DIR)/align.h \ + $(SRC_DIR)/arith.h \ + $(SRC_DIR)/assert.h \ + $(SRC_DIR)/atw.h \ + $(SRC_DIR)/class.h \ + $(SRC_DIR)/decspecs.h \ + $(SRC_DIR)/file_info.h \ + $(SRC_DIR)/input.h \ + $(SRC_DIR)/interface.h \ + $(SRC_DIR)/l_class.h \ + $(SRC_DIR)/l_comment.h \ + $(SRC_DIR)/l_em.h \ + $(SRC_DIR)/l_lint.h \ + $(SRC_DIR)/label.h \ + $(SRC_DIR)/level.h \ + $(SRC_DIR)/mes.h \ + $(SRC_DIR)/sizes.h \ + $(SRC_DIR)/specials.h \ + $(SRC_DIR)/tokenname.h + +GEN_H = botch_free.h dataflow.h debug.h density.h errout.h \ + idfsize.h ifdepth.h inputtype.h macbuf.h lint.h \ + nobitfield.h nopp.h nocross.h \ + nparams.h numsize.h parbufsize.h pathlength.h Lpars.h \ + strsize.h trgt_sizes.h textsize.h use_tmp.h spec_arith.h static.h \ + regcount.h dbsymtab.h \ + code.h declar.h def.h expr.h field.h estack.h util.h proto.h replace.h \ + idf.h macro.h stmt.h struct.h switch.h type.h l_brace.h l_state.h \ + l_outdef.h stack.h + +HFILES= $(GEN_H) $(SRC_H) + +NEXTFILES = \ + $(SRC_DIR)/code.str \ + $(SRC_DIR)/declar.str \ + $(SRC_DIR)/def.str \ + $(SRC_DIR)/expr.str \ + $(SRC_DIR)/field.str \ + $(SRC_DIR)/estack.str \ + $(SRC_DIR)/util.str \ + $(SRC_DIR)/proto.str \ + $(SRC_DIR)/replace.str \ + $(SRC_DIR)/idf.str \ + $(SRC_DIR)/macro.str \ + $(SRC_DIR)/stack.str \ + $(SRC_DIR)/stmt.str \ + $(SRC_DIR)/struct.str \ + $(SRC_DIR)/switch.str \ + $(SRC_DIR)/type.str \ + $(SRC_DIR)/l_brace.str \ + $(SRC_DIR)/l_state.str \ + $(SRC_DIR)/l_outdef.str + +all: make.main + make -f make.main main + +install: all + cp main $(TARGET_HOME)/lib.bin/c_ce.ansi + +cmp: all + -cmp main $(TARGET_HOME)/lib.bin/c_ce.ansi + +opr: + make pr | opr + +pr: + @pr $(FSRC_DIR)/proto.make $(FSRC_DIR)/proto.main \ + $(FSRC_DIR)/Parameters + +lint: make.main + make -f make.main lint + +Cfiles: hfiles LLfiles $(GEN_C) $(GEN_H) Makefile + echo $(CFILES) | tr ' ' '\012' > Cfiles + echo $(HFILES) | tr ' ' '\012' >> Cfiles + +resolved: Cfiles + CC="$(CC)" UTIL_HOME="$(UTIL_HOME)" do_resolve `cat Cfiles` > Cfiles.new + -if cmp -s Cfiles Cfiles.new ; then rm -f Cfiles.new ; else mv Cfiles.new Cfiles ; fi + touch resolved + +# there is no file called "dependencies"; we want dependencies checked +# every time. This means that make.main is made every time. Oh well ... +# it does not take much time. +dependencies: resolved + do_deps `grep '.c$$' Cfiles` + +make.main: dependencies make_macros lists $(FSRC_DIR)/proto.main + rm_deps $(FSRC_DIR)/proto.main | sed -e '/^.PARAMS/r make_macros' -e '/^.LISTS/r lists' > make.main + cat *.dep >> make.main + +make_macros: Makefile + echo 'SRC_DIR=$(SRC_DIR)' > make_macros + echo 'UTIL_HOME=$(UTIL_HOME)' >> make_macros + echo 'TARGET_HOME=$(TARGET_HOME)' >> make_macros + echo 'CC=$(CC)' >> make_macros + echo 'COPTIONS=$(COPTIONS) -DPEEPHOLE' >> make_macros + echo 'LDOPTIONS=$(LDOPTIONS)' >> make_macros + echo 'LINT=$(LINT)' >> make_macros + echo 'LINTSUF=$(LINTSUF)' >> make_macros + echo 'LINTPREF=$(LINTPREF)' >> make_macros + echo 'LINTOPTIONS=$(LINTOPTIONS)' >> make_macros + echo 'SUF=$(SUF)' >> make_macros + echo 'LIBSUF=$(LIBSUF)' >> make_macros + echo 'CC_AND_MKDEP=$(CC_AND_MKDEP)' >> make_macros + echo 'MACH=$(MACH)' >> make_macros + +lists: Cfiles + echo "C_SRC = \\" > lists + echo $(CFILES) >> lists + echo "OBJ = \\" >> lists + echo $(CFILES) | sed -e 's|[^ ]*/||g' -e 's/\.c/.$$(SUF)/g' >> lists + +clean: + -make -f make.main clean + rm -f $(GEN_C) $(GEN_G) $(GEN_H) hfiles LLfiles Cfiles LL.output + rm -f resolved *.dep lists make.main make_macros + +LLfiles: $(GFILES) + $(LLGEN) $(LLGENOPTIONS) $(GFILES) + @touch LLfiles + +hfiles: $(FSRC_DIR)/Parameters $(SRC_DIR)/make.hfiles + $(SRC_DIR)/make.hfiles $(FSRC_DIR)/Parameters + touch hfiles + +tokenfile.g: $(SRC_DIR)/tokenname.c $(SRC_DIR)/make.tokfile + $(SRC_DIR)/make.tokfile <$(SRC_DIR)/tokenname.c >tokenfile.g + +symbol2str.c: $(SRC_DIR)/tokenname.c $(SRC_DIR)/make.tokcase + $(SRC_DIR)/make.tokcase <$(SRC_DIR)/tokenname.c >symbol2str.c + +code.h: $(SRC_DIR)/code.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/code.str > code.h + +declar.h: $(SRC_DIR)/declar.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/declar.str > declar.h + +def.h: $(SRC_DIR)/def.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/def.str > def.h + +expr.h: $(SRC_DIR)/expr.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/expr.str > expr.h + +field.h: $(SRC_DIR)/field.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/field.str > field.h + +estack.h: $(SRC_DIR)/estack.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/estack.str > estack.h + +util.h: $(SRC_DIR)/util.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/util.str > util.h + +proto.h: $(SRC_DIR)/proto.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/proto.str > proto.h + +replace.h: $(SRC_DIR)/replace.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/replace.str > replace.h + +idf.h: $(SRC_DIR)/idf.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/idf.str > idf.h + +macro.h: $(SRC_DIR)/macro.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/macro.str > macro.h + +stack.h: $(SRC_DIR)/stack.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/stack.str > stack.h + +stmt.h: $(SRC_DIR)/stmt.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/stmt.str > stmt.h + +struct.h: $(SRC_DIR)/struct.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/struct.str > struct.h + +switch.h: $(SRC_DIR)/switch.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/switch.str > switch.h + +type.h: $(SRC_DIR)/type.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/type.str > type.h + +l_brace.h: $(SRC_DIR)/l_brace.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/l_brace.str > l_brace.h + +l_state.h: $(SRC_DIR)/l_state.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/l_state.str > l_state.h + +l_outdef.h: $(SRC_DIR)/l_outdef.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/l_outdef.str > l_outdef.h + +next.c: $(NEXTFILES) $(SRC_DIR)/make.next + $(SRC_DIR)/make.next $(NEXTFILES) > next.c + +char.c: $(SRC_DIR)/char.tab + $(TABGEN) -f$(SRC_DIR)/char.tab >char.c diff --git a/fast/f_c/Parameters b/fast/f_c/Parameters new file mode 100644 index 0000000..34a4f45 --- /dev/null +++ b/fast/f_c/Parameters @@ -0,0 +1,148 @@ +!File: lint.h +/*#define LINT 1 /* if defined, 'lint' is produced */ + + +!File: pathlength.h +#define PATHLENGTH 1024 /* max. length of path to file */ + + +!File: errout.h +#define ERROUT STDERR /* file pointer for writing messages */ +#define ERR_SHADOW 5 /* a syntax error overshadows error messages + until ERR_SHADOW symbols have been + accepted without syntax error */ + + +!File: idfsize.h +#define IDFSIZE 64 /* maximum significant length of an identifier */ + + +!File: numsize.h +#define NUMSIZE 256 /* maximum length of a numeric constant */ + + +!File: nparams.h +#define NPARAMS 32 /* maximum number of parameters of macros */ + + +!File: ifdepth.h +#define IFDEPTH 256 /* maximum number of nested if-constructions */ + + +!File: density.h +#define DENSITY 3 /* see switch.[ch] for an explanation */ + + +!File: lapbuf.h +#define LAPBUF 4096 /* size of macro actual parameter buffer */ + + +!File: strsize.h +#define ISTRSIZE 32 /* minimum number of bytes allocated for + storing a string */ +#define RSTRSIZE 16 /* step size in enlarging the memory for + the storage of a string */ + + +!File: target_sizes.h +#define MAXSIZE 8 /* the maximum of the SZ_* constants */ + +/* target machine sizes */ +#define SZ_CHAR (arith)1 +#define SZ_SHORT (arith)2 +#define SZ_WORD (arith)4 +#define SZ_INT (arith)4 +#define SZ_LONG (arith)4 +#define SZ_FLOAT (arith)4 +#define SZ_DOUBLE (arith)8 +#define SZ_POINTER (arith)4 + +/* target machine alignment requirements */ +#define AL_CHAR 1 +#define AL_SHORT SZ_SHORT +#define AL_WORD SZ_WORD +#define AL_INT SZ_WORD +#define AL_LONG SZ_WORD +#define AL_FLOAT SZ_WORD +#define AL_DOUBLE SZ_WORD +#define AL_POINTER SZ_WORD +#define AL_STRUCT 1 +#define AL_UNION 1 + + +!File: botch_free.h +/*#define BOTCH_FREE 1 /* when defined, botch freed memory, as a check */ + + +!File: dataflow.h +/*#define DATAFLOW 1 /* produce some compile-time xref */ + + +!File: debug.h +/*#define DEBUG 1 /* perform various self-tests */ +#define NDEBUG 1 /* disable assertions */ + + +!File: use_tmp.h +#define PREPEND_SCOPES 1 /* collect exa, exp, ina and inp commands + and if USE_TMP is defined let them + precede the rest of the generated + compact code */ +/*#define USE_TMP 1 /* use C_insertpart, C_endpart mechanism + to generate EM-code in the order needed + for the code-generators. If not defined, + the old-style peephole optimizer is + needed. */ + + +!File: parbufsize.h +#define PARBUFSIZE 1024 + + +!File: textsize.h +#define ITEXTSIZE 32 /* 1st piece of memory for repl. text */ +#define RTEXTSIZE 16 /* stepsize for enlarging repl.text */ + + +!File: inputtype.h +#define INP_READ_IN_ONE 1 /* read input file in one */ + + +!File: nopp.h +/*#define NOPP 1 /* if NOT defined, use built-int preprocessor */ + + +!File: nobitfield.h +/*#define NOBITFIELD 1 /* if NOT defined, implement bitfields */ + + +!File: spec_arith.h +/* describes internal compiler arithmetics */ +/*#define SPECIAL_ARITHMETICS /* something different from native long */ + + +!File: static.h +#define GSTATIC /* for large global "static" arrays */ + + +!File: nofloat.h +/*#define NOFLOAT 1 /* if NOT defined, floats are implemented */ + + +!File: noRoption.h +/*#define NOROPTION 1 /* if NOT defined, R option is implemented */ + + +!File: nocross.h +#define NOCROSS 1 /* if NOT defined, cross compiler */ + + +!File: regcount.h +/*#define REGCOUNT 1 /* count occurrences for register messages */ + + +!File: dbsymtab.h +#define DBSYMTAB 1 /* ability to produce symbol table for debugger +*/ + + diff --git a/fast/f_c/proto.main b/fast/f_c/proto.main new file mode 100644 index 0000000..dd2f70a --- /dev/null +++ b/fast/f_c/proto.main @@ -0,0 +1,65 @@ +# $Id: proto.main,v 1.2 1994/06/24 11:02:27 ceriel Exp $ + +# C compilation part. Not to be called directly. +# Instead, it is to be called by the Makefile. +# SRC_DIR, UTIL_HOME, TARGET_HOME, CC, COPTIONS, LINT, LINTOPTIONS, LDOPTIONS, +# CC_AND_MKDEP, SUF, LIBSUF, MACH should be set here. + +#PARAMS do not remove this line! + +MDIR = $(TARGET_HOME)/modules +LIBDIR = $(MDIR)/lib +LINTLIBDIR = $(UTIL_HOME)/modules/lib + +MALLOC = $(LIBDIR)/malloc.$(SUF) + +EMLIB = $(LIBDIR)/libem_mesO.$(LIBSUF) \ + $(LIBDIR)/libCEopt.$(LIBSUF) \ + $(TARGET_HOME)/lib.bin/$(MACH)/ce.$(LIBSUF) \ + $(TARGET_HOME)/lib.bin/$(MACH)/back.$(LIBSUF) \ + $(TARGET_HOME)/lib.bin/em_data.$(LIBSUF) \ + $(LIBDIR)/libobject.$(LIBSUF) + +MODLIB = $(LIBDIR)/libinput.$(LIBSUF) \ + $(LIBDIR)/libassert.$(LIBSUF) \ + $(LIBDIR)/liballoc.$(LIBSUF) \ + $(MALLOC) \ + $(LIBDIR)/libflt.$(LIBSUF) \ + $(LIBDIR)/libprint.$(LIBSUF) \ + $(LIBDIR)/libstring.$(LIBSUF) \ + $(LIBDIR)/libsystem.$(LIBSUF) + +LIBS = $(EMLIB) $(MODLIB) + +LINTLIBS = $(LINTLIBDIR)/$(LINTPREF)em_mes.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)emk.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)input.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)assert.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)alloc.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)flt.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)print.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)string.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)system.$(LINTSUF) + +PROFILE = +INCLUDES = -I. -I$(SRC_DIR) -I$(TARGET_HOME)/modules/h -I$(TARGET_HOME)/h -I$(TARGET_HOME)/modules/pkg +CFLAGS = $(PROFILE) $(INCLUDES) $(COPTIONS) +LINTFLAGS = $(INCLUDES) $(LINTOPTIONS) +LDFLAGS = $(PROFILE) $(LDOPTIONS) + +# C_SRC and OBJ should be set here. +#LISTS do not remove this line! + +all: main + +clean: + rm -f *.$(SUF) main + +lint: + $(LINT) $(LINTFLAGS) $(C_SRC) $(LINTLIBS) + +main: $(OBJ) + $(CC) $(LDFLAGS) $(OBJ) $(LIBS) -o main + +# do not remove the next line; it is used for generating dependencies +#DEPENDENCIES diff --git a/fast/f_c/proto.make b/fast/f_c/proto.make new file mode 100644 index 0000000..c289774 --- /dev/null +++ b/fast/f_c/proto.make @@ -0,0 +1,264 @@ +# $Id: proto.make,v 1.4 1994/06/24 11:02:30 ceriel Exp $ + +# make C compiler + +#PARAMS do not remove this line! + +UTIL_BIN = \ + $(UTIL_HOME)/bin +SRC_DIR = \ + $(SRC_HOME)/lang/cem/cemcom +FSRC_DIR = \ + $(SRC_HOME)/fast/f_c + +TABGEN= $(UTIL_BIN)/tabgen +LLGEN = $(UTIL_BIN)/LLgen +LLGENOPTIONS = \ + -v + +SRC_G = $(SRC_DIR)/program.g $(SRC_DIR)/declar.g \ + $(SRC_DIR)/expression.g $(SRC_DIR)/statement.g $(SRC_DIR)/ival.g +GEN_G = tokenfile.g +GFILES= $(GEN_G) $(SRC_G) + +SRC_C = \ + $(SRC_DIR)/Version.c \ + $(SRC_DIR)/LLlex.c \ + $(SRC_DIR)/LLmessage.c \ + $(SRC_DIR)/arith.c \ + $(SRC_DIR)/asm.c \ + $(SRC_DIR)/blocks.c \ + $(SRC_DIR)/ch7.c \ + $(SRC_DIR)/ch7bin.c \ + $(SRC_DIR)/ch7mon.c \ + $(SRC_DIR)/code.c \ + $(SRC_DIR)/conversion.c \ + $(SRC_DIR)/cstoper.c \ + $(SRC_DIR)/dataflow.c \ + $(SRC_DIR)/declarator.c \ + $(SRC_DIR)/decspecs.c \ + $(SRC_DIR)/domacro.c \ + $(SRC_DIR)/dumpidf.c \ + $(SRC_DIR)/error.c \ + $(SRC_DIR)/eval.c \ + $(SRC_DIR)/expr.c \ + $(SRC_DIR)/field.c \ + $(SRC_DIR)/idf.c \ + $(SRC_DIR)/init.c \ + $(SRC_DIR)/input.c \ + $(SRC_DIR)/l_comment.c \ + $(SRC_DIR)/l_ev_ord.c \ + $(SRC_DIR)/l_lint.c \ + $(SRC_DIR)/l_misc.c \ + $(SRC_DIR)/l_outdef.c \ + $(SRC_DIR)/l_states.c \ + $(SRC_DIR)/label.c \ + $(SRC_DIR)/main.c \ + $(SRC_DIR)/options.c \ + $(SRC_DIR)/replace.c \ + $(SRC_DIR)/scan.c \ + $(SRC_DIR)/skip.c \ + $(SRC_DIR)/stack.c \ + $(SRC_DIR)/struct.c \ + $(SRC_DIR)/switch.c \ + $(SRC_DIR)/tokenname.c \ + $(SRC_DIR)/type.c \ + $(SRC_DIR)/util.c \ + $(SRC_DIR)/stab.c + +GEN_C = tokenfile.c program.c declar.c expression.c statement.c ival.c \ + symbol2str.c char.c Lpars.c next.c +CFILES= $(SRC_C) $(GEN_C) + +SRC_H = \ + $(SRC_DIR)/LLlex.h \ + $(SRC_DIR)/align.h \ + $(SRC_DIR)/arith.h \ + $(SRC_DIR)/assert.h \ + $(SRC_DIR)/atw.h \ + $(SRC_DIR)/class.h \ + $(SRC_DIR)/decspecs.h \ + $(SRC_DIR)/file_info.h \ + $(SRC_DIR)/input.h \ + $(SRC_DIR)/interface.h \ + $(SRC_DIR)/l_class.h \ + $(SRC_DIR)/l_comment.h \ + $(SRC_DIR)/l_em.h \ + $(SRC_DIR)/l_lint.h \ + $(SRC_DIR)/label.h \ + $(SRC_DIR)/level.h \ + $(SRC_DIR)/mes.h \ + $(SRC_DIR)/sizes.h \ + $(SRC_DIR)/specials.h \ + $(SRC_DIR)/tokenname.h + +GEN_H = botch_free.h dataflow.h debug.h density.h errout.h \ + idfsize.h ifdepth.h inputtype.h lint.h \ + nobitfield.h nopp.h nocross.h \ + nparams.h numsize.h parbufsize.h pathlength.h Lpars.h \ + strsize.h target_sizes.h textsize.h use_tmp.h spec_arith.h static.h \ + regcount.h \ + code.h declar.h decspecs.h def.h expr.h field.h estack.h util.h \ + idf.h macro.h stmt.h struct.h switch.h type.h l_brace.h l_state.h \ + l_outdef.h stack.h lapbuf.h noRoption.h nofloat.h dbsymtab.h + +HFILES= $(GEN_H) $(SRC_H) + +NEXTFILES = \ + $(SRC_DIR)/code.str \ + $(SRC_DIR)/declar.str \ + $(SRC_DIR)/decspecs.str \ + $(SRC_DIR)/def.str \ + $(SRC_DIR)/expr.str \ + $(SRC_DIR)/field.str \ + $(SRC_DIR)/estack.str \ + $(SRC_DIR)/util.str \ + $(SRC_DIR)/idf.str \ + $(SRC_DIR)/macro.str \ + $(SRC_DIR)/stack.str \ + $(SRC_DIR)/stmt.str \ + $(SRC_DIR)/struct.str \ + $(SRC_DIR)/switch.str \ + $(SRC_DIR)/type.str \ + $(SRC_DIR)/l_brace.str \ + $(SRC_DIR)/l_state.str \ + $(SRC_DIR)/l_outdef.str + +all: make.main + make -f make.main main + +install: all + cp main $(TARGET_HOME)/lib.bin/c_ce + +cmp: all + -cmp main $(TARGET_HOME)/lib.bin/c_ce + +opr: + make pr | opr + +pr: + @pr $(FSRC_DIR)/proto.make $(FSRC_DIR)/proto.main \ + $(FSRC_DIR)/Parameters + +lint: make.main + make -f make.main lint + +Cfiles: hfiles LLfiles $(GEN_C) $(GEN_H) Makefile + echo $(CFILES) | tr ' ' '\012' > Cfiles + echo $(HFILES) | tr ' ' '\012' >> Cfiles + +resolved: Cfiles + CC="$(CC)" UTIL_HOME="$(UTIL_HOME)" do_resolve `cat Cfiles` > Cfiles.new + -if cmp -s Cfiles Cfiles.new ; then rm -f Cfiles.new ; else mv Cfiles.new Cfiles ; fi + touch resolved + +# there is no file called "dependencies"; we want dependencies checked +# every time. This means that make.main is made every time. Oh well ... +# it does not take much time. +dependencies: resolved + do_deps `grep '.c$$' Cfiles` + +make.main: dependencies make_macros lists $(FSRC_DIR)/proto.main + rm_deps $(FSRC_DIR)/proto.main | sed -e '/^.PARAMS/r make_macros' -e '/^.LISTS/r lists' > make.main + cat *.dep >> make.main + +make_macros: Makefile + echo 'SRC_DIR=$(SRC_DIR)' > make_macros + echo 'UTIL_HOME=$(UTIL_HOME)' >> make_macros + echo 'TARGET_HOME=$(TARGET_HOME)' >> make_macros + echo 'CC=$(CC)' >> make_macros + echo 'COPTIONS=$(COPTIONS) -DPEEPHOLE' >> make_macros + echo 'LDOPTIONS=$(LDOPTIONS)' >> make_macros + echo 'LINT=$(LINT)' >> make_macros + echo 'LINTSUF=$(LINTSUF)' >> make_macros + echo 'LINTPREF=$(LINTPREF)' >> make_macros + echo 'LINTOPTIONS=$(LINTOPTIONS)' >> make_macros + echo 'SUF=$(SUF)' >> make_macros + echo 'LIBSUF=$(LIBSUF)' >> make_macros + echo 'CC_AND_MKDEP=$(CC_AND_MKDEP)' >> make_macros + echo 'MACH=$(MACH)' >> make_macros + +lists: Cfiles + echo "C_SRC = \\" > lists + echo $(CFILES) >> lists + echo "OBJ = \\" >> lists + echo $(CFILES) | sed -e 's|[^ ]*/||g' -e 's/\.c/.$$(SUF)/g' >> lists + +clean: + -make -f make.main clean + rm -f $(GEN_C) $(GEN_G) $(GEN_H) hfiles LLfiles Cfiles LL.output + rm -f resolved *.dep lists make.main make_macros + +LLfiles: $(GFILES) + $(LLGEN) $(LLGENOPTIONS) $(GFILES) + @touch LLfiles + +hfiles: $(FSRC_DIR)/Parameters $(SRC_DIR)/make.hfiles + $(SRC_DIR)/make.hfiles $(FSRC_DIR)/Parameters + touch hfiles + +tokenfile.g: $(SRC_DIR)/tokenname.c $(SRC_DIR)/make.tokfile + $(SRC_DIR)/make.tokfile <$(SRC_DIR)/tokenname.c >tokenfile.g + +symbol2str.c: $(SRC_DIR)/tokenname.c $(SRC_DIR)/make.tokcase + $(SRC_DIR)/make.tokcase <$(SRC_DIR)/tokenname.c >symbol2str.c + +code.h: $(SRC_DIR)/code.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/code.str > code.h + +declar.h: $(SRC_DIR)/declar.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/declar.str > declar.h + +def.h: $(SRC_DIR)/def.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/def.str > def.h + +expr.h: $(SRC_DIR)/expr.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/expr.str > expr.h + +field.h: $(SRC_DIR)/field.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/field.str > field.h + +estack.h: $(SRC_DIR)/estack.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/estack.str > estack.h + +util.h: $(SRC_DIR)/util.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/util.str > util.h + +decspecs.h: $(SRC_DIR)/decspecs.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/decspecs.str > decspecs.h + +idf.h: $(SRC_DIR)/idf.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/idf.str > idf.h + +macro.h: $(SRC_DIR)/macro.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/macro.str > macro.h + +stack.h: $(SRC_DIR)/stack.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/stack.str > stack.h + +stmt.h: $(SRC_DIR)/stmt.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/stmt.str > stmt.h + +struct.h: $(SRC_DIR)/struct.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/struct.str > struct.h + +switch.h: $(SRC_DIR)/switch.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/switch.str > switch.h + +type.h: $(SRC_DIR)/type.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/type.str > type.h + +l_brace.h: $(SRC_DIR)/l_brace.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/l_brace.str > l_brace.h + +l_state.h: $(SRC_DIR)/l_state.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/l_state.str > l_state.h + +l_outdef.h: $(SRC_DIR)/l_outdef.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/l_outdef.str > l_outdef.h + +next.c: $(NEXTFILES) $(SRC_DIR)/make.next + $(SRC_DIR)/make.next $(NEXTFILES) > next.c + +char.c: $(SRC_DIR)/char.tab + $(TABGEN) -f$(SRC_DIR)/char.tab >char.c diff --git a/fast/f_m2/Parameters b/fast/f_m2/Parameters new file mode 100644 index 0000000..211ca94 --- /dev/null +++ b/fast/f_m2/Parameters @@ -0,0 +1,101 @@ +!File: errout.h +#define ERROUT STDERR /* file pointer for writing messages */ +#define ERR_SHADOW 5 /* a syntax error overshadows error messages + until ERR_SHADOW symbols have been + accepted without syntax error */ + + +!File: idfsize.h +#define IDFSIZE 128 /* maximum significant length of an identifier */ + + +!File: numsize.h +#define NUMSIZE 256 /* maximum length of a numeric constant */ + + +!File: strsize.h +#define ISTRSIZE 32 /* minimum number of bytes allocated for + storing a string */ +#define RSTRSIZE 8 /* step size in enlarging the memory for + the storage of a string */ + + +!File: target_sizes.h +#define MAXSIZE 8 /* the maximum of the SZ_* constants */ + +/* target machine sizes */ +#define SZ_CHAR ((arith)1) +#define SZ_SHORT ((arith)2) +#define SZ_WORD ((arith)4) +#define SZ_INT ((arith)4) +#define SZ_LONG ((arith)4) +#define SZ_FLOAT ((arith)4) +#define SZ_DOUBLE ((arith)8) +#define SZ_POINTER ((arith)4) + +/* target machine alignment requirements */ +#define AL_CHAR 1 +#define AL_SHORT ((int)SZ_SHORT) +#define AL_WORD ((int)SZ_WORD) +#define AL_INT ((int)SZ_WORD) +#define AL_LONG ((int)SZ_WORD) +#define AL_FLOAT ((int)SZ_WORD) +#define AL_DOUBLE ((int)SZ_WORD) +#define AL_POINTER ((int)SZ_WORD) +#define AL_STRUCT ((int)SZ_WORD) + + +!File: debugcst.h +/*#define DEBUG 1 /* perform various self-tests */ +#define NDEBUG 1 /* disable assertions */ + +!File: inputtype.h +#define INP_READ_IN_ONE 1 /* read input file in one */ + + +!File: density.h +#define DENSITY 3 /* see casestat.C for an explanation */ + + +!File: squeeze.h +/*#define SQUEEZE 1 /* define on "small" machines */ + + +!File: strict3rd.h +/*#define STRICT_3RD_ED 1 /* define on "small" machines, and if you want + a compiler that only implements "3rd edition" + Modula-2 + */ + + +!File: nocross.h +#define NOCROSS 1 /* define when cross-compiler not needed */ + + +!File: nostrict.h +/*#define NOSTRICT 1 /* define when STRICT warnings disabled + (yet another squeezing method) + */ + + +!File: bigresult.h +#define BIG_RESULT_ON_STACK 1 /* define when function results must be + put on the stack; in this case, caller + reserves space for it. When not defined, + callee puts result in global data area and + returns a pointer to it + */ + + +!File: dbsymtab.h +#define DBSYMTAB 1 /* ability to produce symbol table for debugger */ + + +!File: use_insert.h +/*#define USE_INSERT 1 /* use C_insertpart mechanism */ + + +!File: uns_arith.h +#define UNSIGNED_ARITH unsigned arith + + diff --git a/fast/f_m2/proto.main b/fast/f_m2/proto.main new file mode 100644 index 0000000..aee27f7 --- /dev/null +++ b/fast/f_m2/proto.main @@ -0,0 +1,65 @@ +# $Id: proto.main,v 1.2 1994/06/24 11:02:43 ceriel Exp $ + +# C compilation part. Not to be called directly. +# Instead, it is to be called by the Makefile. +# SRC_DIR, UTIL_HOME, TARGET_HOME, CC, COPTIONS, LINT, LINTOPTIONS, LDOPTIONS, +# CC_AND_MKDEP, SUF, LIBSUF, MACH should be set here. + +#PARAMS do not remove this line! + +MDIR = $(TARGET_HOME)/modules +LIBDIR = $(MDIR)/lib +LINTLIBDIR = $(UTIL_HOME)/modules/lib + +MALLOC = $(LIBDIR)/malloc.$(SUF) + +EMLIB = $(LIBDIR)/libem_mesO.$(LIBSUF) \ + $(LIBDIR)/libCEopt.$(LIBSUF) \ + $(TARGET_HOME)/lib.bin/$(MACH)/ce.$(LIBSUF) \ + $(TARGET_HOME)/lib.bin/$(MACH)/back.$(LIBSUF) \ + $(TARGET_HOME)/lib.bin/em_data.$(LIBSUF) \ + $(LIBDIR)/libobject.$(LIBSUF) + +MODLIB = $(LIBDIR)/libinput.$(LIBSUF) \ + $(LIBDIR)/libassert.$(LIBSUF) \ + $(LIBDIR)/liballoc.$(LIBSUF) \ + $(MALLOC) \ + $(LIBDIR)/libflt.$(LIBSUF) \ + $(LIBDIR)/libprint.$(LIBSUF) \ + $(LIBDIR)/libstring.$(LIBSUF) \ + $(LIBDIR)/libsystem.$(LIBSUF) + +LIBS = $(EMLIB) $(MODLIB) + +LINTLIBS = $(LINTLIBDIR)/$(LINTPREF)em_mes.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)emk.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)input.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)assert.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)alloc.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)flt.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)print.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)string.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)system.$(LINTSUF) + +PROFILE = +INCLUDES = -I. -I$(SRC_DIR) -I$(TARGET_HOME)/modules/h -I$(TARGET_HOME)/h -I$(TARGET_HOME)/modules/pkg +CFLAGS = $(PROFILE) $(INCLUDES) $(COPTIONS) +LINTFLAGS = $(INCLUDES) $(LINTOPTIONS) +LDFLAGS = $(PROFILE) $(LDOPTIONS) + +# C_SRC and OBJ should be set here. +#LISTS do not remove this line! + +all: main + +clean: + rm -f *.$(SUF) main + +lint: + $(LINT) $(LINTFLAGS) $(C_SRC) $(LINTLIBS) + +main: $(OBJ) + $(CC) $(LDFLAGS) $(OBJ) $(LIBS) -o main + +# do not remove the next line; it is used for generating dependencies +#DEPENDENCIES diff --git a/fast/f_m2/proto.make b/fast/f_m2/proto.make new file mode 100644 index 0000000..a13ecc2 --- /dev/null +++ b/fast/f_m2/proto.make @@ -0,0 +1,156 @@ +# $Id: proto.make,v 1.4 1994/06/24 11:02:46 ceriel Exp $ + +# make Modula-2 compiler + +#PARAMS do not remove this line! + +UTIL_BIN = \ + $(UTIL_HOME)/bin +SRC_DIR = \ + $(SRC_HOME)/lang/m2/comp +FSRC_DIR = \ + $(SRC_HOME)/fast/f_m2 + +TABGEN= $(UTIL_BIN)/tabgen +LLGEN = $(UTIL_BIN)/LLgen +LLGENOPTIONS = \ + -v + +SRC_G = $(SRC_DIR)/program.g $(SRC_DIR)/declar.g \ + $(SRC_DIR)/expression.g $(SRC_DIR)/statement.g +GEN_G = tokenfile.g +GFILES= $(GEN_G) $(SRC_G) + +SRC_C = $(SRC_DIR)/LLlex.c $(SRC_DIR)/LLmessage.c $(SRC_DIR)/error.c \ + $(SRC_DIR)/main.c $(SRC_DIR)/tokenname.c $(SRC_DIR)/idf.c \ + $(SRC_DIR)/input.c $(SRC_DIR)/type.c $(SRC_DIR)/def.c \ + $(SRC_DIR)/misc.c $(SRC_DIR)/enter.c $(SRC_DIR)/defmodule.c \ + $(SRC_DIR)/typequiv.c $(SRC_DIR)/node.c $(SRC_DIR)/cstoper.c \ + $(SRC_DIR)/chk_expr.c $(SRC_DIR)/options.c $(SRC_DIR)/walk.c \ + $(SRC_DIR)/desig.c $(SRC_DIR)/code.c $(SRC_DIR)/lookup.c \ + $(SRC_DIR)/Version.c $(SRC_DIR)/stab.c +GEN_C = tokenfile.c program.c declar.c expression.c statement.c \ + symbol2str.c char.c Lpars.c casestat.c tmpvar.c scope.c next.c +CFILES= $(SRC_C) $(GEN_C) + +SRC_H = $(SRC_DIR)/LLlex.h $(SRC_DIR)/chk_expr.h $(SRC_DIR)/class.h \ + $(SRC_DIR)/debug.h $(SRC_DIR)/desig.h $(SRC_DIR)/f_info.h \ + $(SRC_DIR)/idf.h $(SRC_DIR)/input.h $(SRC_DIR)/main.h \ + $(SRC_DIR)/misc.h $(SRC_DIR)/scope.h $(SRC_DIR)/standards.h \ + $(SRC_DIR)/tokenname.h $(SRC_DIR)/walk.h $(SRC_DIR)/warning.h \ + $(SRC_DIR)/SYSTEM.h +GEN_H = errout.h idfsize.h numsize.h strsize.h target_sizes.h bigresult.h \ + inputtype.h density.h squeeze.h nocross.h nostrict.h def.h debugcst.h \ + type.h Lpars.h node.h strict3rd.h real.h use_insert.h dbsymtab.h \ + uns_arith.h def.h type.h node.h real.h +HFILES= $(GEN_H) $(SRC_H) + +NEXTFILES = \ + $(SRC_DIR)/def.H $(SRC_DIR)/type.H $(SRC_DIR)/node.H $(SRC_DIR)/real.H \ + $(SRC_DIR)/scope.C $(SRC_DIR)/tmpvar.C $(SRC_DIR)/casestat.C + +all: make.main + make -f make.main main + +install: all + cp main $(TARGET_HOME)/lib.bin/m2_ce + +cmp: all + -cmp main $(TARGET_HOME)/lib.bin/m2_ce + +opr: + make pr | opr + +pr: + @pr $(FSRC_DIR)/proto.make $(FSRC_DIR)/proto.main \ + $(FSRC_DIR)/Parameters + +lint: make.main + make -f make.main lint + +Cfiles: hfiles LLfiles $(GEN_C) $(GEN_H) Makefile + echo $(CFILES) | tr ' ' '\012' > Cfiles + echo $(HFILES) | tr ' ' '\012' >> Cfiles + +resolved: Cfiles + CC="$(CC)" UTIL_HOME="$(UTIL_HOME)" do_resolve `cat Cfiles` > Cfiles.new + -if cmp -s Cfiles Cfiles.new ; then rm -f Cfiles.new ; else mv Cfiles.new Cfiles ; fi + touch resolved + +# there is no file called "dependencies"; we want dependencies checked +# every time. This means that make.main is made every time. Oh well ... +# it does not take much time. +dependencies: resolved + do_deps `grep '.c$$' Cfiles` + +make.main: dependencies make_macros lists $(FSRC_DIR)/proto.main + rm_deps $(FSRC_DIR)/proto.main | sed -e '/^.PARAMS/r make_macros' -e '/^.LISTS/r lists' > make.main + cat *.dep >> make.main + +make_macros: Makefile + echo 'SRC_DIR=$(SRC_DIR)' > make_macros + echo 'UTIL_HOME=$(UTIL_HOME)' >> make_macros + echo 'TARGET_HOME=$(TARGET_HOME)' >> make_macros + echo 'CC=$(CC)' >> make_macros + echo 'COPTIONS=$(COPTIONS) -DPEEPHOLE' >> make_macros + echo 'LDOPTIONS=$(LDOPTIONS)' >> make_macros + echo 'LINT=$(LINT)' >> make_macros + echo 'LINTOPTIONS=$(LINTOPTIONS)' >> make_macros + echo 'LINTSUF=$(LINTSUF)' >> make_macros + echo 'LINTPREF=$(LINTPREF)' >> make_macros + echo 'SUF=$(SUF)' >> make_macros + echo 'LIBSUF=$(LIBSUF)' >> make_macros + echo 'CC_AND_MKDEP=$(CC_AND_MKDEP)' >> make_macros + echo 'MACH=$(MACH)' >> make_macros + +lists: Cfiles + echo "C_SRC = \\" > lists + echo $(CFILES) >> lists + echo "OBJ = \\" >> lists + echo $(CFILES) | sed -e 's|[^ ]*/||g' -e 's/\.c/.$$(SUF)/g' >> lists + +clean: + -make -f make.main clean + rm -f $(GEN_C) $(GEN_G) $(GEN_H) hfiles LLfiles Cfiles LL.output + rm -f resolved *.dep lists make.main make_macros + +LLfiles: $(GFILES) + $(LLGEN) $(LLGENOPTIONS) $(GFILES) + @touch LLfiles + +hfiles: $(FSRC_DIR)/Parameters $(SRC_DIR)/make.hfiles + $(SRC_DIR)/make.hfiles $(FSRC_DIR)/Parameters + touch hfiles + +tokenfile.g: $(SRC_DIR)/tokenname.c $(SRC_DIR)/make.tokfile + $(SRC_DIR)/make.tokfile <$(SRC_DIR)/tokenname.c >tokenfile.g + +symbol2str.c: $(SRC_DIR)/tokenname.c $(SRC_DIR)/make.tokcase + $(SRC_DIR)/make.tokcase <$(SRC_DIR)/tokenname.c >symbol2str.c + +def.h: $(SRC_DIR)/make.allocd $(SRC_DIR)/def.H + $(SRC_DIR)/make.allocd < $(SRC_DIR)/def.H > def.h + +type.h: $(SRC_DIR)/make.allocd $(SRC_DIR)/type.H + $(SRC_DIR)/make.allocd < $(SRC_DIR)/type.H > type.h + +real.h: $(SRC_DIR)/make.allocd $(SRC_DIR)/real.H + $(SRC_DIR)/make.allocd < $(SRC_DIR)/real.H > real.h + +node.h: $(SRC_DIR)/make.allocd $(SRC_DIR)/node.H + $(SRC_DIR)/make.allocd < $(SRC_DIR)/node.H > node.h + +scope.c: $(SRC_DIR)/make.allocd $(SRC_DIR)/scope.C + $(SRC_DIR)/make.allocd < $(SRC_DIR)/scope.C > scope.c + +tmpvar.c: $(SRC_DIR)/make.allocd $(SRC_DIR)/tmpvar.C + $(SRC_DIR)/make.allocd < $(SRC_DIR)/tmpvar.C > tmpvar.c + +casestat.c: $(SRC_DIR)/make.allocd $(SRC_DIR)/casestat.C + $(SRC_DIR)/make.allocd < $(SRC_DIR)/casestat.C > casestat.c + +next.c: $(NEXTFILES) $(SRC_DIR)/make.next + $(SRC_DIR)/make.next $(NEXTFILES) > next.c + +char.c: $(SRC_DIR)/char.tab + $(TABGEN) -f$(SRC_DIR)/char.tab >char.c diff --git a/fast/f_pc/Parameters b/fast/f_pc/Parameters new file mode 100644 index 0000000..e26506b --- /dev/null +++ b/fast/f_pc/Parameters @@ -0,0 +1,62 @@ +!File: debugcst.h +/*#define DEBUG 1 /* perform various self-tests */ +#define NDEBUG 1 /* disable assertions */ + + +!File: density.h +#define DENSITY 3 /* to determine, if a csa or csb + instruction must be generated */ + + +!File: errout.h +#define ERROUT STDERR /* file pointer for writing messages */ +#define MAXERR_LINE 5 /* maximum number of error messages given + on the same input line. */ + + +!File: idfsize.h +#define IDFSIZE 128 /* max. significant length of an identifier */ + + +!File: inputtype.h +#define INP_READ_IN_ONE 1 /* read input file in one */ + + +!File: numsize.h +#define NUMSIZE 256 /* maximum length of a numeric constant */ + + +!File: strsize.h +#define ISTRSIZE 32 /* minimum number of bytes allocated for + storing a string */ +#define RSTRSIZE 8 /* step size in enlarging the memory for + the storage of a string */ + + +!File: target_sizes.h +#define MAXSIZE 8 /* the maximum of the SZ_* constants */ + +/* target machine sizes */ +#define SZ_CHAR (arith)1 +#define SZ_WORD (arith)4 +#define SZ_INT (arith)4 +#define SZ_LONG (arith)4 +#define SZ_POINTER (arith)4 +#define SZ_REAL (arith)8 + +/* target machine alignment requirements */ +#define AL_CHAR 1 +#define AL_WORD ((int)SZ_WORD) +#define AL_INT ((int)SZ_WORD) +#define AL_LONG ((int)SZ_WORD) +#define AL_POINTER ((int)SZ_WORD) +#define AL_REAL ((int)SZ_WORD) +#define AL_STRUCT ((int)SZ_WORD) + + +!File: nocross.h +#define NOCROSS 1 /* define when cross compiler not needed */ + + +!File: dbsymtab.h +#define DBSYMTAB 1 /* ability to produce symbol table for debugger */ diff --git a/fast/f_pc/proto.main b/fast/f_pc/proto.main new file mode 100644 index 0000000..7bdf45c --- /dev/null +++ b/fast/f_pc/proto.main @@ -0,0 +1,65 @@ +# $Id: proto.main,v 1.2 1994/06/24 11:02:59 ceriel Exp $ + +# C compilation part. Not to be called directly. +# Instead, it is to be called by the Makefile. +# SRC_DIR, UTIL_HOME, TARGET_HOME, CC, COPTIONS, LINT, LINTOPTIONS, LDOPTIONS, +# CC_AND_MKDEP, SUF, LIBSUF, MACH should be set here. + +#PARAMS do not remove this line! + +MDIR = $(TARGET_HOME)/modules +LIBDIR = $(MDIR)/lib +LINTLIBDIR = $(UTIL_HOME)/modules/lib + +MALLOC = $(LIBDIR)/malloc.$(SUF) + +EMLIB = $(LIBDIR)/libem_mesO.$(LIBSUF) \ + $(LIBDIR)/libCEopt.$(LIBSUF) \ + $(TARGET_HOME)/lib.bin/$(MACH)/ce.$(LIBSUF) \ + $(TARGET_HOME)/lib.bin/$(MACH)/back.$(LIBSUF) \ + $(TARGET_HOME)/lib.bin/em_data.$(LIBSUF) \ + $(LIBDIR)/libobject.$(LIBSUF) + +MODLIB = $(LIBDIR)/libinput.$(LIBSUF) \ + $(LIBDIR)/libassert.$(LIBSUF) \ + $(LIBDIR)/liballoc.$(LIBSUF) \ + $(MALLOC) \ + $(LIBDIR)/libflt.$(LIBSUF) \ + $(LIBDIR)/libprint.$(LIBSUF) \ + $(LIBDIR)/libstring.$(LIBSUF) \ + $(LIBDIR)/libsystem.$(LIBSUF) + +LIBS = $(EMLIB) $(MODLIB) + +LINTLIBS = $(LINTLIBDIR)/$(LINTPREF)em_mes.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)emk.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)input.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)assert.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)alloc.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)flt.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)print.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)string.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)system.$(LINTSUF) + +PROFILE = +INCLUDES = -I. -I$(SRC_DIR) -I$(TARGET_HOME)/modules/h -I$(TARGET_HOME)/h -I$(TARGET_HOME)/modules/pkg +CFLAGS = $(PROFILE) $(INCLUDES) $(COPTIONS) -DSTATIC=static +LINTFLAGS = $(INCLUDES) $(LINTOPTIONS) -DNORCSID -DSTATIC=static +LDFLAGS = $(PROFILE) $(LDOPTIONS) + +# C_SRC and OBJ should be set here. +#LISTS do not remove this line! + +all: main + +clean: + rm -f *.$(SUF) main + +lint: + $(LINT) $(LINTFLAGS) $(C_SRC) $(LINTLIBS) + +main: $(OBJ) + $(CC) $(LDFLAGS) $(OBJ) $(LIBS) -o main + +# do not remove the next line; it is used for generating dependencies +#DEPENDENCIES diff --git a/fast/f_pc/proto.make b/fast/f_pc/proto.make new file mode 100644 index 0000000..1e37e1a --- /dev/null +++ b/fast/f_pc/proto.make @@ -0,0 +1,157 @@ +# $Id: proto.make,v 1.4 1994/06/24 11:03:03 ceriel Exp $ + +# make Pascal compiler + +#PARAMS do not remove this line! + +UTIL_BIN = \ + $(UTIL_HOME)/bin +SRC_DIR = \ + $(SRC_HOME)/lang/pc/comp +FSRC_DIR = \ + $(SRC_HOME)/fast/f_pc + +TABGEN= $(UTIL_BIN)/tabgen +LLGEN = $(UTIL_BIN)/LLgen +LLGENOPTIONS = \ + -v + +SRC_G = $(SRC_DIR)/program.g $(SRC_DIR)/declar.g \ + $(SRC_DIR)/expression.g $(SRC_DIR)/statement.g +GEN_G = tokenfile.g +GFILES= $(GEN_G) $(SRC_G) + +SRC_C = $(SRC_DIR)/LLlex.c $(SRC_DIR)/LLmessage.c $(SRC_DIR)/body.c \ + $(SRC_DIR)/error.c $(SRC_DIR)/label.c $(SRC_DIR)/readwrite.c \ + $(SRC_DIR)/main.c $(SRC_DIR)/tokenname.c $(SRC_DIR)/idf.c \ + $(SRC_DIR)/input.c $(SRC_DIR)/type.c $(SRC_DIR)/def.c \ + $(SRC_DIR)/misc.c $(SRC_DIR)/enter.c $(SRC_DIR)/progs.c \ + $(SRC_DIR)/typequiv.c $(SRC_DIR)/node.c $(SRC_DIR)/cstoper.c \ + $(SRC_DIR)/chk_expr.c $(SRC_DIR)/options.c $(SRC_DIR)/scope.c \ + $(SRC_DIR)/desig.c $(SRC_DIR)/code.c $(SRC_DIR)/lookup.c \ + $(SRC_DIR)/stab.c +GEN_C = tokenfile.c program.c declar.c expression.c statement.c \ + symbol2str.c char.c Lpars.c casestat.c tmpvar.c next.c +CFILES= $(SRC_C) $(GEN_C) + +SRC_H = $(SRC_DIR)/LLlex.h $(SRC_DIR)/chk_expr.h $(SRC_DIR)/class.h \ + $(SRC_DIR)/const.h $(SRC_DIR)/debug.h $(SRC_DIR)/f_info.h \ + $(SRC_DIR)/idf.h $(SRC_DIR)/input.h $(SRC_DIR)/main.h \ + $(SRC_DIR)/misc.h $(SRC_DIR)/required.h $(SRC_DIR)/tokenname.h + +GEN_H = errout.h idfsize.h numsize.h strsize.h target_sizes.h \ + inputtype.h density.h nocross.h def.h debugcst.h \ + type.h Lpars.h node.h dbsymtab.h scope.h desig.h + +HFILES= $(GEN_H) $(SRC_H) + +NEXTFILES = \ + $(SRC_DIR)/def.H $(SRC_DIR)/type.H $(SRC_DIR)/node.H \ + $(SRC_DIR)/scope.H $(SRC_DIR)/desig.H \ + $(SRC_DIR)/tmpvar.C $(SRC_DIR)/casestat.C + +all: make.main + make -f make.main main + +install: all + cp main $(TARGET_HOME)/lib.bin/pc_ce + +cmp: all + -cmp main $(TARGET_HOME)/lib.bin/pc_ce + +opr: + make pr | opr + +pr: + @pr $(FSRC_DIR)/proto.make $(FSRC_DIR)/proto.main \ + $(FSRC_DIR)/Parameters + +lint: make.main + make -f make.main lint + +Cfiles: hfiles LLfiles $(GEN_C) $(GEN_H) Makefile + echo $(CFILES) | tr ' ' '\012' > Cfiles + echo $(HFILES) | tr ' ' '\012' >> Cfiles + +resolved: Cfiles + CC="$(CC)" UTIL_HOME="$(UTIL_HOME)" do_resolve `cat Cfiles` > Cfiles.new + -if cmp -s Cfiles Cfiles.new ; then rm -f Cfiles.new ; else mv Cfiles.new Cfiles ; fi + touch resolved + +# there is no file called "dependencies"; we want dependencies checked +# every time. This means that make.main is made every time. Oh well ... +# it does not take much time. +dependencies: resolved + do_deps `grep '.c$$' Cfiles` + +make.main: dependencies make_macros lists $(FSRC_DIR)/proto.main + rm_deps $(FSRC_DIR)/proto.main | sed -e '/^.PARAMS/r make_macros' -e '/^.LISTS/r lists' > make.main + cat *.dep >> make.main + +make_macros: Makefile + echo 'SRC_DIR=$(SRC_DIR)' > make_macros + echo 'UTIL_HOME=$(UTIL_HOME)' >> make_macros + echo 'TARGET_HOME=$(TARGET_HOME)' >> make_macros + echo 'CC=$(CC)' >> make_macros + echo 'COPTIONS=$(COPTIONS) -DPEEPHOLE' >> make_macros + echo 'LDOPTIONS=$(LDOPTIONS)' >> make_macros + echo 'LINT=$(LINT)' >> make_macros + echo 'LINTOPTIONS=$(LINTOPTIONS)' >> make_macros + echo 'LINTSUF=$(LINTSUF)' >> make_macros + echo 'LINTPREF=$(LINTPREF)' >> make_macros + echo 'SUF=$(SUF)' >> make_macros + echo 'LIBSUF=$(LIBSUF)' >> make_macros + echo 'CC_AND_MKDEP=$(CC_AND_MKDEP)' >> make_macros + echo 'MACH=$(MACH)' >> make_macros + +lists: Cfiles + echo "C_SRC = \\" > lists + echo $(CFILES) >> lists + echo "OBJ = \\" >> lists + echo $(CFILES) | sed -e 's|[^ ]*/||g' -e 's/\.c/.$$(SUF)/g' >> lists + +clean: + -make -f make.main clean + rm -f $(GEN_C) $(GEN_G) $(GEN_H) hfiles LLfiles Cfiles LL.output + rm -f resolved *.dep lists make.main make_macros + +LLfiles: $(GFILES) + $(LLGEN) $(LLGENOPTIONS) $(GFILES) + @touch LLfiles + +hfiles: $(FSRC_DIR)/Parameters $(SRC_DIR)/make.hfiles + $(SRC_DIR)/make.hfiles $(FSRC_DIR)/Parameters + touch hfiles + +tokenfile.g: $(SRC_DIR)/tokenname.c $(SRC_DIR)/make.tokfile + $(SRC_DIR)/make.tokfile <$(SRC_DIR)/tokenname.c >tokenfile.g + +symbol2str.c: $(SRC_DIR)/tokenname.c $(SRC_DIR)/make.tokcase + $(SRC_DIR)/make.tokcase <$(SRC_DIR)/tokenname.c >symbol2str.c + +def.h: $(SRC_DIR)/make.allocd $(SRC_DIR)/def.H + $(SRC_DIR)/make.allocd < $(SRC_DIR)/def.H > def.h + +type.h: $(SRC_DIR)/make.allocd $(SRC_DIR)/type.H + $(SRC_DIR)/make.allocd < $(SRC_DIR)/type.H > type.h + +scope.h: $(SRC_DIR)/make.allocd $(SRC_DIR)/scope.H + $(SRC_DIR)/make.allocd < $(SRC_DIR)/scope.H > scope.h + +node.h: $(SRC_DIR)/make.allocd $(SRC_DIR)/node.H + $(SRC_DIR)/make.allocd < $(SRC_DIR)/node.H > node.h + +desig.h: $(SRC_DIR)/make.allocd $(SRC_DIR)/desig.H + $(SRC_DIR)/make.allocd < $(SRC_DIR)/desig.H > desig.h + +tmpvar.c: $(SRC_DIR)/make.allocd $(SRC_DIR)/tmpvar.C + $(SRC_DIR)/make.allocd < $(SRC_DIR)/tmpvar.C > tmpvar.c + +casestat.c: $(SRC_DIR)/make.allocd $(SRC_DIR)/casestat.C + $(SRC_DIR)/make.allocd < $(SRC_DIR)/casestat.C > casestat.c + +next.c: $(NEXTFILES) $(SRC_DIR)/make.next + $(SRC_DIR)/make.next $(NEXTFILES) > next.c + +char.c: $(SRC_DIR)/char.tab + $(TABGEN) -f$(SRC_DIR)/char.tab >char.c diff --git a/fcc/Action b/fcc/Action new file mode 100644 index 0000000..3f4edc9 --- /dev/null +++ b/fcc/Action @@ -0,0 +1,6 @@ +name "Fast cc-compatible C compiler" +dir cemcom +end +name "Driver for fast cc-compatible C compiler" +dir driver +end diff --git a/fcc/cemcom/Parameters.sun3 b/fcc/cemcom/Parameters.sun3 new file mode 100644 index 0000000..9b4540d --- /dev/null +++ b/fcc/cemcom/Parameters.sun3 @@ -0,0 +1,148 @@ +!File: lint.h +/*#define LINT 1 /* if defined, 'lint' is produced */ + + +!File: pathlength.h +#define PATHLENGTH 1024 /* max. length of path to file */ + + +!File: errout.h +#define ERROUT STDERR /* file pointer for writing messages */ +#define ERR_SHADOW 5 /* a syntax error overshadows error messages + until ERR_SHADOW symbols have been + accepted without syntax error */ + + +!File: idfsize.h +#define IDFSIZE 64 /* maximum significant length of an identifier */ + + +!File: numsize.h +#define NUMSIZE 256 /* maximum length of a numeric constant */ + + +!File: nparams.h +#define NPARAMS 32 /* maximum number of parameters of macros */ + + +!File: ifdepth.h +#define IFDEPTH 256 /* maximum number of nested if-constructions */ + + +!File: density.h +#define DENSITY 3 /* see switch.[ch] for an explanation */ + + +!File: lapbuf.h +#define LAPBUF 4096 /* size of macro actual parameter buffer */ + + +!File: strsize.h +#define ISTRSIZE 32 /* minimum number of bytes allocated for + storing a string */ +#define RSTRSIZE 16 /* step size in enlarging the memory for + the storage of a string */ + + +!File: target_sizes.h +#define MAXSIZE 8 /* the maximum of the SZ_* constants */ + +/* target machine sizes */ +#define SZ_CHAR (arith)1 +#define SZ_SHORT (arith)2 +#define SZ_WORD (arith)4 +#define SZ_INT (arith)4 +#define SZ_LONG (arith)4 +#define SZ_FLOAT (arith)4 +#define SZ_DOUBLE (arith)8 +#define SZ_POINTER (arith)4 + +/* target machine alignment requirements */ +#define AL_CHAR 1 +#define AL_SHORT SZ_SHORT +#define AL_WORD 2 +#define AL_INT 2 +#define AL_LONG 2 +#define AL_FLOAT 2 +#define AL_DOUBLE 2 +#define AL_POINTER 2 +#define AL_STRUCT 2 +#define AL_UNION 2 + + +!File: botch_free.h +/*#define BOTCH_FREE 1 /* when defined, botch freed memory, as a check */ + + +!File: dataflow.h +/*#define DATAFLOW 1 /* produce some compile-time xref */ + + +!File: debug.h +/*#define DEBUG 1 /* perform various self-tests */ +#define NDEBUG 1 /* disable assertions */ + + +!File: use_tmp.h +#define PREPEND_SCOPES 1 /* collect exa, exp, ina and inp commands + and if USE_TMP is defined let them + precede the rest of the generated + compact code */ +/*#define USE_TMP 1 /* use C_insertpart, C_endpart mechanism + to generate EM-code in the order needed + for the code-generators. If not defined, + the old-style peephole optimizer is + needed. */ + + +!File: parbufsize.h +#define PARBUFSIZE 1024 + + +!File: textsize.h +#define ITEXTSIZE 32 /* 1st piece of memory for repl. text */ +#define RTEXTSIZE 16 /* stepsize for enlarging repl.text */ + + +!File: inputtype.h +#define INP_READ_IN_ONE 1 /* read input file in one */ + + +!File: nopp.h +/*#define NOPP 1 /* if NOT defined, use built-int preprocessor */ + + +!File: nobitfield.h +/*#define NOBITFIELD 1 /* if NOT defined, implement bitfields */ + + +!File: spec_arith.h +/* describes internal compiler arithmetics */ +/*#define SPECIAL_ARITHMETICS /* something different from native long */ + + +!File: static.h +#define GSTATIC /* for large global "static" arrays */ + + +!File: nofloat.h +/*#define NOFLOAT 1 /* if NOT defined, floats are implemented */ + + +!File: noRoption.h +/*#define NOROPTION 1 /* if NOT defined, R option is implemented */ + + +!File: nocross.h +#define NOCROSS 1 /* if NOT defined, cross compiler */ + + +!File: regcount.h +/*#define REGCOUNT 1 /* count occurrences for register messages */ + + +!File: dbsymtab.h +#define DBSYMTAB 1 /* ability to produce symbol table for debugger +*/ + + diff --git a/fcc/cemcom/Parameters.vax4 b/fcc/cemcom/Parameters.vax4 new file mode 100644 index 0000000..34a4f45 --- /dev/null +++ b/fcc/cemcom/Parameters.vax4 @@ -0,0 +1,148 @@ +!File: lint.h +/*#define LINT 1 /* if defined, 'lint' is produced */ + + +!File: pathlength.h +#define PATHLENGTH 1024 /* max. length of path to file */ + + +!File: errout.h +#define ERROUT STDERR /* file pointer for writing messages */ +#define ERR_SHADOW 5 /* a syntax error overshadows error messages + until ERR_SHADOW symbols have been + accepted without syntax error */ + + +!File: idfsize.h +#define IDFSIZE 64 /* maximum significant length of an identifier */ + + +!File: numsize.h +#define NUMSIZE 256 /* maximum length of a numeric constant */ + + +!File: nparams.h +#define NPARAMS 32 /* maximum number of parameters of macros */ + + +!File: ifdepth.h +#define IFDEPTH 256 /* maximum number of nested if-constructions */ + + +!File: density.h +#define DENSITY 3 /* see switch.[ch] for an explanation */ + + +!File: lapbuf.h +#define LAPBUF 4096 /* size of macro actual parameter buffer */ + + +!File: strsize.h +#define ISTRSIZE 32 /* minimum number of bytes allocated for + storing a string */ +#define RSTRSIZE 16 /* step size in enlarging the memory for + the storage of a string */ + + +!File: target_sizes.h +#define MAXSIZE 8 /* the maximum of the SZ_* constants */ + +/* target machine sizes */ +#define SZ_CHAR (arith)1 +#define SZ_SHORT (arith)2 +#define SZ_WORD (arith)4 +#define SZ_INT (arith)4 +#define SZ_LONG (arith)4 +#define SZ_FLOAT (arith)4 +#define SZ_DOUBLE (arith)8 +#define SZ_POINTER (arith)4 + +/* target machine alignment requirements */ +#define AL_CHAR 1 +#define AL_SHORT SZ_SHORT +#define AL_WORD SZ_WORD +#define AL_INT SZ_WORD +#define AL_LONG SZ_WORD +#define AL_FLOAT SZ_WORD +#define AL_DOUBLE SZ_WORD +#define AL_POINTER SZ_WORD +#define AL_STRUCT 1 +#define AL_UNION 1 + + +!File: botch_free.h +/*#define BOTCH_FREE 1 /* when defined, botch freed memory, as a check */ + + +!File: dataflow.h +/*#define DATAFLOW 1 /* produce some compile-time xref */ + + +!File: debug.h +/*#define DEBUG 1 /* perform various self-tests */ +#define NDEBUG 1 /* disable assertions */ + + +!File: use_tmp.h +#define PREPEND_SCOPES 1 /* collect exa, exp, ina and inp commands + and if USE_TMP is defined let them + precede the rest of the generated + compact code */ +/*#define USE_TMP 1 /* use C_insertpart, C_endpart mechanism + to generate EM-code in the order needed + for the code-generators. If not defined, + the old-style peephole optimizer is + needed. */ + + +!File: parbufsize.h +#define PARBUFSIZE 1024 + + +!File: textsize.h +#define ITEXTSIZE 32 /* 1st piece of memory for repl. text */ +#define RTEXTSIZE 16 /* stepsize for enlarging repl.text */ + + +!File: inputtype.h +#define INP_READ_IN_ONE 1 /* read input file in one */ + + +!File: nopp.h +/*#define NOPP 1 /* if NOT defined, use built-int preprocessor */ + + +!File: nobitfield.h +/*#define NOBITFIELD 1 /* if NOT defined, implement bitfields */ + + +!File: spec_arith.h +/* describes internal compiler arithmetics */ +/*#define SPECIAL_ARITHMETICS /* something different from native long */ + + +!File: static.h +#define GSTATIC /* for large global "static" arrays */ + + +!File: nofloat.h +/*#define NOFLOAT 1 /* if NOT defined, floats are implemented */ + + +!File: noRoption.h +/*#define NOROPTION 1 /* if NOT defined, R option is implemented */ + + +!File: nocross.h +#define NOCROSS 1 /* if NOT defined, cross compiler */ + + +!File: regcount.h +/*#define REGCOUNT 1 /* count occurrences for register messages */ + + +!File: dbsymtab.h +#define DBSYMTAB 1 /* ability to produce symbol table for debugger +*/ + + diff --git a/fcc/cemcom/proto.main b/fcc/cemcom/proto.main new file mode 100644 index 0000000..ec289a9 --- /dev/null +++ b/fcc/cemcom/proto.main @@ -0,0 +1,64 @@ +# $Id: proto.main,v 1.2 1994/06/24 11:03:17 ceriel Exp $ + +# C compilation part. Not to be called directly. +# Instead, it is to be called by the Makefile. +# SRC_DIR, UTIL_HOME, TARGET_HOME, CC, COPTIONS, LINT, LINTOPTIONS, LDOPTIONS, +# CC_AND_MKDEP, SUF, LIBSUF, CE should be set here. + +#PARAMS do not remove this line! + +MDIR = $(TARGET_HOME)/modules +LIBDIR = $(MDIR)/lib +LINTLIBDIR = $(UTIL_HOME)/modules/lib + +MALLOC = $(LIBDIR)/malloc.$(SUF) + +EMLIB = $(LIBDIR)/libem_mesO.$(LIBSUF) \ + $(LIBDIR)/libCEopt.$(LIBSUF) \ + $(CE) \ + $(TARGET_HOME)/lib.bin/em_data.$(LIBSUF) \ + $(LIBDIR)/libobject.$(LIBSUF) + +MODLIB = $(LIBDIR)/libinput.$(LIBSUF) \ + $(LIBDIR)/libassert.$(LIBSUF) \ + $(LIBDIR)/liballoc.$(LIBSUF) \ + $(MALLOC) \ + $(LIBDIR)/libflt.$(LIBSUF) \ + $(LIBDIR)/libprint.$(LIBSUF) \ + $(LIBDIR)/libstring.$(LIBSUF) \ + $(LIBDIR)/libsystem.$(LIBSUF) + +LIBS = $(EMLIB) $(MODLIB) + +LINTLIBS = $(LINTLIBDIR)/$(LINTPREF)em_mes.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)emk.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)input.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)assert.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)alloc.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)flt.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)print.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)string.$(LINTSUF) \ + $(LINTLIBDIR)/$(LINTPREF)system.$(LINTSUF) + +PROFILE = +INCLUDES = -I. -I$(SRC_DIR) -I$(TARGET_HOME)/modules/h -I$(TARGET_HOME)/h -I$(TARGET_HOME)/modules/pkg +CFLAGS = $(PROFILE) $(INCLUDES) $(COPTIONS) +LINTFLAGS = $(INCLUDES) $(LINTOPTIONS) +LDFLAGS = $(PROFILE) $(LDOPTIONS) + +# C_SRC and OBJ should be set here. +#LISTS do not remove this line! + +all: main + +clean: + rm -f *.$(SUF) main + +lint: + $(LINT) $(LINTFLAGS) $(C_SRC) $(LINTLIBS) + +main: $(OBJ) + $(CC) $(LDFLAGS) $(OBJ) $(LIBS) -o main + +# do not remove the next line; it is used for generating dependencies +#DEPENDENCIES diff --git a/fcc/cemcom/proto.make b/fcc/cemcom/proto.make new file mode 100644 index 0000000..edfdaa3 --- /dev/null +++ b/fcc/cemcom/proto.make @@ -0,0 +1,275 @@ +# $Id: proto.make,v 1.4 1994/06/24 11:03:20 ceriel Exp $ + +# make C compiler + +#PARAMS do not remove this line! + +UTIL_BIN = \ + $(UTIL_HOME)/bin +SRC_DIR = \ + $(SRC_HOME)/lang/cem/cemcom +FSRC_DIR = \ + $(SRC_HOME)/fcc/cemcom + +TABGEN= $(UTIL_BIN)/tabgen +LLGEN = $(UTIL_BIN)/LLgen +LLGENOPTIONS = \ + -v + +SRC_G = $(SRC_DIR)/program.g $(SRC_DIR)/declar.g \ + $(SRC_DIR)/expression.g $(SRC_DIR)/statement.g $(SRC_DIR)/ival.g +GEN_G = tokenfile.g +GFILES= $(GEN_G) $(SRC_G) + +SRC_C = \ + $(SRC_DIR)/Version.c \ + $(SRC_DIR)/LLlex.c \ + $(SRC_DIR)/LLmessage.c \ + $(SRC_DIR)/arith.c \ + $(SRC_DIR)/asm.c \ + $(SRC_DIR)/blocks.c \ + $(SRC_DIR)/ch7.c \ + $(SRC_DIR)/ch7bin.c \ + $(SRC_DIR)/ch7mon.c \ + $(SRC_DIR)/code.c \ + $(SRC_DIR)/conversion.c \ + $(SRC_DIR)/cstoper.c \ + $(SRC_DIR)/dataflow.c \ + $(SRC_DIR)/declarator.c \ + $(SRC_DIR)/decspecs.c \ + $(SRC_DIR)/domacro.c \ + $(SRC_DIR)/dumpidf.c \ + $(SRC_DIR)/error.c \ + $(SRC_DIR)/eval.c \ + $(SRC_DIR)/expr.c \ + $(SRC_DIR)/field.c \ + $(SRC_DIR)/idf.c \ + $(SRC_DIR)/init.c \ + $(SRC_DIR)/input.c \ + $(SRC_DIR)/l_comment.c \ + $(SRC_DIR)/l_ev_ord.c \ + $(SRC_DIR)/l_lint.c \ + $(SRC_DIR)/l_misc.c \ + $(SRC_DIR)/l_outdef.c \ + $(SRC_DIR)/l_states.c \ + $(SRC_DIR)/label.c \ + $(SRC_DIR)/main.c \ + $(SRC_DIR)/options.c \ + $(SRC_DIR)/replace.c \ + $(SRC_DIR)/scan.c \ + $(SRC_DIR)/skip.c \ + $(SRC_DIR)/stack.c \ + $(SRC_DIR)/struct.c \ + $(SRC_DIR)/switch.c \ + $(SRC_DIR)/tokenname.c \ + $(SRC_DIR)/type.c \ + $(SRC_DIR)/util.c \ + $(SRC_DIR)/stab.c + +GEN_C = tokenfile.c program.c declar.c expression.c statement.c ival.c \ + symbol2str.c char.c Lpars.c next.c +CFILES= $(SRC_C) $(GEN_C) + +SRC_H = \ + $(SRC_DIR)/LLlex.h \ + $(SRC_DIR)/align.h \ + $(SRC_DIR)/arith.h \ + $(SRC_DIR)/assert.h \ + $(SRC_DIR)/atw.h \ + $(SRC_DIR)/class.h \ + $(SRC_DIR)/decspecs.h \ + $(SRC_DIR)/file_info.h \ + $(SRC_DIR)/input.h \ + $(SRC_DIR)/interface.h \ + $(SRC_DIR)/l_class.h \ + $(SRC_DIR)/l_comment.h \ + $(SRC_DIR)/l_em.h \ + $(SRC_DIR)/l_lint.h \ + $(SRC_DIR)/label.h \ + $(SRC_DIR)/level.h \ + $(SRC_DIR)/mes.h \ + $(SRC_DIR)/sizes.h \ + $(SRC_DIR)/specials.h \ + $(SRC_DIR)/tokenname.h + +GEN_H = botch_free.h dataflow.h debug.h density.h errout.h \ + idfsize.h ifdepth.h inputtype.h lint.h \ + nobitfield.h nopp.h nocross.h \ + nparams.h numsize.h parbufsize.h pathlength.h Lpars.h \ + strsize.h target_sizes.h textsize.h use_tmp.h spec_arith.h static.h \ + regcount.h \ + code.h declar.h decspecs.h def.h expr.h field.h estack.h util.h \ + idf.h macro.h stmt.h struct.h switch.h type.h l_brace.h l_state.h \ + l_outdef.h stack.h lapbuf.h noRoption.h nofloat.h dbsymtab.h + +HFILES= $(GEN_H) $(SRC_H) + +NEXTFILES = \ + $(SRC_DIR)/code.str \ + $(SRC_DIR)/declar.str \ + $(SRC_DIR)/decspecs.str \ + $(SRC_DIR)/def.str \ + $(SRC_DIR)/expr.str \ + $(SRC_DIR)/field.str \ + $(SRC_DIR)/estack.str \ + $(SRC_DIR)/util.str \ + $(SRC_DIR)/idf.str \ + $(SRC_DIR)/macro.str \ + $(SRC_DIR)/stack.str \ + $(SRC_DIR)/stmt.str \ + $(SRC_DIR)/struct.str \ + $(SRC_DIR)/switch.str \ + $(SRC_DIR)/type.str \ + $(SRC_DIR)/l_brace.str \ + $(SRC_DIR)/l_state.str \ + $(SRC_DIR)/l_outdef.str + +all: make.main + make -f make.main main + +install: all + cp main $(TARGET_HOME)/lib.bin/c_cccompat + +cmp: all + -cmp main $(TARGET_HOME)/lib.bin/c_cccompat + +opr: + make pr | opr + +pr: + @pr $(FSRC_DIR)/proto.make $(FSRC_DIR)/proto.main \ + $(FSRC_DIR)/Parameters.vax4 $(FSRC_DIR)/Parameters.sun3 + +lint: make.main + make -f make.main lint + +Cfiles: hfiles LLfiles $(GEN_C) $(GEN_H) Makefile + echo $(CFILES) | tr ' ' '\012' > Cfiles + echo $(HFILES) | tr ' ' '\012' >> Cfiles + +resolved: Cfiles + CC="$(CC)" UTIL_HOME="$(UTIL_HOME)" do_resolve `cat Cfiles` > Cfiles.new + -if cmp -s Cfiles Cfiles.new ; then rm -f Cfiles.new ; else mv Cfiles.new Cfiles ; fi + touch resolved + +# there is no file called "dependencies"; we want dependencies checked +# every time. This means that make.main is made every time. Oh well ... +# it does not take much time. +dependencies: resolved + do_deps `grep '.c$$' Cfiles` + +make.main: dependencies make_macros lists $(FSRC_DIR)/proto.main + rm_deps $(FSRC_DIR)/proto.main | sed -e '/^.PARAMS/r make_macros' -e '/^.LISTS/r lists' > make.main + cat *.dep >> make.main + +make_macros: Makefile + echo 'SRC_DIR=$(SRC_DIR)' > make_macros + echo 'UTIL_HOME=$(UTIL_HOME)' >> make_macros + echo 'TARGET_HOME=$(TARGET_HOME)' >> make_macros + echo 'CC=$(CC)' >> make_macros + echo 'COPTIONS=$(COPTIONS) -DPEEPHOLE' >> make_macros + echo 'LDOPTIONS=$(LDOPTIONS)' >> make_macros + echo 'LINT=$(LINT)' >> make_macros + echo 'LINTSUF=$(LINTSUF)' >> make_macros + echo 'LINTPREF=$(LINTPREF)' >> make_macros + echo 'LINTOPTIONS=$(LINTOPTIONS)' >> make_macros + echo 'SUF=$(SUF)' >> make_macros + echo 'LIBSUF=$(LIBSUF)' >> make_macros + echo 'CC_AND_MKDEP=$(CC_AND_MKDEP)' >> make_macros + if [ $(MACH) = m68020 ] ; then \ + echo 'CE=$(TARGET_HOME)/lib.bin/sun3/ce.$(LIBSUF) $(TARGET_HOME)/lib.bin/sun3/back.$(LIBSUF)' >> make_macros ; \ + else \ + echo 'CE=$(TARGET_HOME)/lib.bin/vax4/ce.$(LIBSUF) $(TARGET_HOME)/lib.bin/vax4/back_vax.$(LIBSUF)' >> make_macros ; \ + fi + +lists: Cfiles + echo "C_SRC = \\" > lists + echo $(CFILES) >> lists + echo "OBJ = \\" >> lists + echo $(CFILES) | sed -e 's|[^ ]*/||g' -e 's/\.c/.$$(SUF)/g' >> lists + +clean: + -make -f make.main clean + rm -f $(GEN_C) $(GEN_G) $(GEN_H) hfiles LLfiles Cfiles LL.output + rm -f resolved *.dep lists make.main make_macros + +LLfiles: $(GFILES) + $(LLGEN) $(LLGENOPTIONS) $(GFILES) + @touch LLfiles + +hfiles: Parameters $(SRC_DIR)/make.hfiles + $(SRC_DIR)/make.hfiles Parameters + touch hfiles + +Parameters: $(FSRC_DIR)/Parameters.sun3 $(FSRC_DIR)/Parameters.vax4 + if [ $(MACH) = m68020 ] ; then \ + cp $(FSRC_DIR)/Parameters.sun3 Parameters ; \ + else \ + cp $(FSRC_DIR)/Parameters.vax4 Parameters ; \ + fi + +tokenfile.g: $(SRC_DIR)/tokenname.c $(SRC_DIR)/make.tokfile + $(SRC_DIR)/make.tokfile <$(SRC_DIR)/tokenname.c >tokenfile.g + +symbol2str.c: $(SRC_DIR)/tokenname.c $(SRC_DIR)/make.tokcase + $(SRC_DIR)/make.tokcase <$(SRC_DIR)/tokenname.c >symbol2str.c + +code.h: $(SRC_DIR)/code.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/code.str > code.h + +declar.h: $(SRC_DIR)/declar.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/declar.str > declar.h + +def.h: $(SRC_DIR)/def.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/def.str > def.h + +expr.h: $(SRC_DIR)/expr.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/expr.str > expr.h + +field.h: $(SRC_DIR)/field.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/field.str > field.h + +estack.h: $(SRC_DIR)/estack.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/estack.str > estack.h + +util.h: $(SRC_DIR)/util.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/util.str > util.h + +decspecs.h: $(SRC_DIR)/decspecs.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/decspecs.str > decspecs.h + +idf.h: $(SRC_DIR)/idf.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/idf.str > idf.h + +macro.h: $(SRC_DIR)/macro.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/macro.str > macro.h + +stack.h: $(SRC_DIR)/stack.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/stack.str > stack.h + +stmt.h: $(SRC_DIR)/stmt.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/stmt.str > stmt.h + +struct.h: $(SRC_DIR)/struct.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/struct.str > struct.h + +switch.h: $(SRC_DIR)/switch.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/switch.str > switch.h + +type.h: $(SRC_DIR)/type.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/type.str > type.h + +l_brace.h: $(SRC_DIR)/l_brace.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/l_brace.str > l_brace.h + +l_state.h: $(SRC_DIR)/l_state.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/l_state.str > l_state.h + +l_outdef.h: $(SRC_DIR)/l_outdef.str $(SRC_DIR)/make.allocd + $(SRC_DIR)/make.allocd < $(SRC_DIR)/l_outdef.str > l_outdef.h + +next.c: $(NEXTFILES) $(SRC_DIR)/make.next + $(SRC_DIR)/make.next $(NEXTFILES) > next.c + +char.c: $(SRC_DIR)/char.tab + $(TABGEN) -f$(SRC_DIR)/char.tab >char.c diff --git a/fcc/driver/fcc.1 b/fcc/driver/fcc.1 new file mode 100644 index 0000000..acd05b7 --- /dev/null +++ b/fcc/driver/fcc.1 @@ -0,0 +1,148 @@ +.TH FCC 1 +.SH NAME +fcc \- fast CC-compatible C compiler +.SH SYNOPSIS +.B fcc +[ +.B \-c +] +[ +.B \-v +] +[ +.B \-vn +] +[ \fB\-D\fIname\fR ] +[ \fB\-D\fIname\fB=\fIdef\fR ] +[ +.BI \-I pathname +] +[ +.B \-w +] +[ +.B \-o +.I outfile +] +[ +.B \-R +] +[ +.BI \-U name +] +[ +.BI -M compiler +] +.I sourcefile ... +.SH DESCRIPTION +.LP +.I Fcc +is a fast +.B C +compiler. It translates +.B C +programs +into cc(1)-compatible relocatable object modules, and does so in one pass. +Then, if the \fB\-c\fP flag is not given, +.I fcc +offers the object modules to a link-editor, +to create an executable binary. +.LP +.I Fcc +accepts several types of filename arguments. Files with +names ending in +.B .c +are taken to be +.B C +source programs. +They are compiled, and the resulting object module is placed in the current +directory. +The object module is named after its source file, the suffix +.B .o +replacing +.BR .c +in the name of the object. +.LP +Other arguments refer to loader options, +object modules, or object libraries. +Unless the +.B \-c +flag is given, these modules and libraries, together with the results of any +specified compilations, are passed (in the order given) to the +link-editor to produce +an output file named +.IR a.out . +You can specify a name for the executable by using the +.B \-o +option. +.SH OPTIONS +.LP +The \fB\-l\fIlib\fR, \fB\-n\fP, \fB\-N\fP, +\fB\-r\fP, \fB\-s\fP, \fB\-S\fP, and \fB\-u\fP options are +passed to the link-editor program. +The \fB\-u\fP option takes an extra argument. +.IP \fB\-c\fP +.br +Suppress the loading phase of the compilation, and force an object module to +be produced, even if only one program is compiled. +A single object module can be named explicitly using the +.B \-o +option. +.IP \fB\-D\fIname\fR\fB=\fIdef\fR +Define a symbol +.I name +to the +preprocessor, as if by "#define". +.IP \fB\-D\fIname\fR +.br +same as \fB\-D\fIname\fB=1\fR. +.IP \fB\-I\fIpathname\fR +.br +Add +.I pathname +to the list of directories in which to search for +.B #include +files with filenames not beginning with slash. +The compiler first searches for +.B #include +files in the directory containing +.I sourcefile, +then in directories in +.B \-I +options, and finally, in +.I /usr/include. +.IP "\fB\-o \fIoutput\fR" +Name the final output file +.I output. +.IP \fB\-U\fIname\fR +.br +Remove any initial definition of +.I name. +.IP \fB\-v\fP +.br +Verbose. Print the commands as they are executed. +.IP \fB\-vn\fP +.br +Verbose, no execute. Only print the commands, do not execute them. +.IP \fB\-w\fP +suppress warning messages. +.IP \fB\-R\fP +.br +test for more compatibility with Kernighan & Ritchie C [1]. +.IP \fB\-M\fIcompiler\fR +.br +use \fIcompiler\fR as C-2 compiler instead of the default. +.LP +Object modules produced by cc(1) and +.I fcc +can be freely mixed, as long as the link-editor is called through +.I fcc. +.SH "SEE ALSO" +.IP [1] +B.W. Kernighan, D. Ritchie, "\fIThe C programming Language\fP", Prentice-Hall Inc., 1978 +.IP [2] +E.H. Baalbergen, "\fIThe ACK CEM compiler\fP". +.IP [3] +cc(1) unix manual page. +.SH DIAGNOSTICS +Diagnostics are intended to be self-explanatory. diff --git a/fcc/driver/fcc.c b/fcc/driver/fcc.c new file mode 100644 index 0000000..101fa3f --- /dev/null +++ b/fcc/driver/fcc.c @@ -0,0 +1,676 @@ +/* fcc + Driver for fast cc-compatible ACK C compiler. + + Derived from the C compiler driver from Minix. + + Compile this file with + cc -O -I/config driver.c + Install the resulting binaries in the EM bin directory. + Suggested name: fcc +*/ + +#ifdef sun3 +#define MACHNAME "m68020" +#define SYSNAME "sun3" +#endif + +#ifdef vax4 +#define MACHNAME "vax4" +#define SYSNAME "vax4" +#endif + +#include +#include +#include +#include +#if __STDC__ +#include +#else +#include +#endif + + +/* + Version producing cc-compatible .o files in one pass. +*/ +#define MAXARGC 256 /* maximum number of arguments allowed in a list */ +#define USTR_SIZE 128 /* maximum length of string variable */ + +typedef char USTRING[USTR_SIZE]; + +struct arglist { + int al_argc; + char *al_argv[MAXARGC]; +}; + +#define CPP_NAME "$H/lib.bin/cpp" +#define LD_NAME "/bin/ld" +#define AS_NAME "/bin/as" +#define SHELL "/bin/sh" + +char *CPP; +char *COMP; + +int kids = -1; +int ecount = 0; + +struct arglist CPP_FLAGS = { + 7, + { + "-Dunix", + "-D_EM_WSIZE=4", + "-D_EM_PSIZE=4", + "-D_EM_SSIZE=2", + "-D_EM_LSIZE=4", + "-D_EM_FSIZE=4", + "-D_EM_DSIZE=8", + } +}; + +struct arglist LD_HEAD = { +#ifdef sun3 + 8, + { + "-dc", + "-dp", + "-e", + "start", + "-X", + "-L/usr/lib/fsoft", + "/usr/lib/crt0.o", + "/usr/lib/Fcrt1.o" + } +#endif +#ifdef vax4 + 2, + { + "-X", + "/lib/crt0.o" + } +#endif +}; + +struct arglist LD_TAIL = { + 2, + { + "$H/lib/$S/tail_ext", + "-lc" + } +}; + +struct arglist LD_FLAGS; + +struct arglist COMP_FLAGS; + +char *o_FILE = "a.out"; /* default name for executable file */ + +#define remove(str) ((noexec || unlink(str)), (str)[0] = '\0') +#define cleanup(str) (str && str[0] && remove(str)) +#define init(al) ((al)->al_argc = 1) + +char ProgCall[128]; + +struct arglist SRCFILES; +struct arglist LDFILES; + +int RET_CODE = 0; + +struct arglist CALL_VEC; + +int o_flag = 0; +int c_flag = 0; +int v_flag = 0; +int O_flag = 0; + +#if __STDC__ +char *mkstr(char *, ...); +#else +char *mkstr(); +#endif +char *malloc(); +char *alloc(); +char *extension(); +char *expand_string(); + +USTRING ofile; +USTRING BASE; +USTRING tmp_file; + +int noexec = 0; + +extern char *strcat(), *strcpy(), *mktemp(), *strchr(); + +trapcc(sig) + int sig; +{ + signal(sig, SIG_IGN); + if (kids != -1) kill(kids, sig); + cleanup(ofile); + cleanup(tmp_file); + exit(1); +} + +#define lang_suffix() "c" +#define comp_name() "$H/lib.bin/c_cccompat" + +int +lang_opt(str) + char *str; +{ + switch(str[1]) { + case '-': /* debug options */ + case 'R': /* strict K&R */ + case 'w': /* disable warnings */ + append(&COMP_FLAGS, str); + return 1; + } + return 0; +} + +main(argc, argv) + char *argv[]; +{ + char *str; + char **argvec; + int count; + char *ext; + register struct arglist *call = &CALL_VEC; + char *file; + char *ldfile; + int compile_cnt = 0; + + setbuf(stdout, (char *) 0); + basename(*argv++,ProgCall); + + COMP = expand_string(comp_name()); + CPP = expand_string(CPP_NAME); + +#ifdef vax4 + append(&CPP_FLAGS, "-Dvax"); +#endif +#ifdef sun3 + append(&CPP_FLAGS, "-Dsun"); + append(&CPP_FLAGS, "-Dmc68020"); + append(&CPP_FLAGS, "-Dmc68000"); +#endif + + if (signal(SIGHUP, SIG_IGN) != SIG_IGN) + signal(SIGHUP, trapcc); + if (signal(SIGINT, SIG_IGN) != SIG_IGN) + signal(SIGINT, trapcc); + if (signal(SIGQUIT, SIG_IGN) != SIG_IGN) + signal(SIGQUIT, trapcc); + while (--argc > 0) { + if (*(str = *argv++) != '-') { + append(&SRCFILES, str); + continue; + } + + if (lang_opt(str)) { + } + else switch (str[1]) { + + case 'c': /* stop after producing .o files */ + c_flag = 1; + break; + case 'D': /* preprocessor #define */ + case 'U': /* preprocessor #undef */ + append(&CPP_FLAGS, str); + break; + case 'I': /* include directory */ + append(&CPP_FLAGS, str); + break; + case 'g': /* debugger support */ + append(&COMP_FLAGS, str); + break; + case 'o': /* target file */ + if (argc-- >= 0) { + o_flag = 1; + o_FILE = *argv++; + ext = extension(o_FILE); + if (ext != o_FILE && ! strcmp(ext, lang_suffix()) + ) { + error("-o would overwrite %s", o_FILE); + } + } + break; + case 'u': /* mark identifier as undefined */ + append(&LD_FLAGS, str); + if (argc-- >= 0) + append(&LD_FLAGS, *argv++); + break; + case 'O': /* use built in peephole optimizer */ + O_flag = 1; + break; + case 'v': /* verbose */ + v_flag++; + if (str[2] == 'n') + noexec = 1; + break; + case 'l': /* library file */ + append(&SRCFILES, str); + break; + case 't': /* -target? */ + if (! strcmp(str, "-target")) { + if (argc-- >= 0) argv++; + break; + } + warning("%s flag ignored", str); + break; + case 'M': /* use other compiler (for testing) */ + strcpy(COMP, str+2); + break; + case 's': /* strip, -sun3? */ + if (! strcmp(str, "-sun3")) { + break; + } + /* fall through */ + case 'n': /* text not read-only */ + case 'N': /* text read-only */ + case 'r': /* relocation produced */ + case 'S': /* strip, but leave locals and globals */ + if (str[2] == '\0') { + append(&LD_FLAGS, str); + break; + } + /* fall through */ + default: + warning("%s flag ignored", str); + break; + } + } + + if (ecount) exit(1); + + count = SRCFILES.al_argc; + argvec = &(SRCFILES.al_argv[0]); + while (count-- > 0) { + ext = extension(*argvec); + if (*argvec[0] != '-' && + ext != *argvec++ && (! strcmp(ext, lang_suffix()) + )) { + compile_cnt++; + } + } + + if (compile_cnt > 1 && c_flag && o_flag) { + warning("-o flag ignored"); + o_flag = 0; + } + + append(&COMP_FLAGS, "-L"); + count = SRCFILES.al_argc; + argvec = &(SRCFILES.al_argv[0]); + while (count-- > 0) { + register char *f; + basename(file = *argvec++, BASE); + + ext = extension(file); + + if (file[0] != '-' && + ext != file && (!strcmp(ext, lang_suffix()) + )) { + if (compile_cnt > 1) printf("%s\n", file); + + ldfile = c_flag ? ofile : alloc((unsigned)strlen(BASE)+3); + if ( + !strcmp(ext, "s") && + needsprep(file)) { + strcpy(tmp_file, TMP_DIR); + strcat(tmp_file, "/F_XXXXXX"); + mktemp(tmp_file); + init(call); + append(call, CPP); + concat(call, &CPP_FLAGS); + append(call, file); + if (runvec(call, tmp_file)) { + file = tmp_file; + } + else { + remove(tmp_file); + tmp_file[0] = '\0'; + continue; + } + } + init(call); + if (o_flag && c_flag) { + f = o_FILE; + } + else f = mkstr(ldfile, BASE, ".", "o", (char *)0); + if (strcmp(ext, "s")) { + append(call, COMP); + concat(call, &CPP_FLAGS); + concat(call, &COMP_FLAGS); + append(call, file); + append(call, f); + } + else { + append(call, AS_NAME); + append(call, "-o"); + append(call, f); +#ifdef sun3 + append(call, "-mc68020"); +#endif + append(call, file); + } + if (runvec(call, (char *) 0)) { + file = f; + } + else { + remove(f); + continue; + } + cleanup(tmp_file); + tmp_file[0] = '\0'; + } + + else if (file[0] != '-' && + strcmp(ext, "o") && strcmp(ext, "a")) { + warning("file with unknown suffix (%s) passed to the loader", ext); + } + + if (c_flag) + continue; + + append(&LDFILES, file); + } + + /* *.s to a.out */ + if (RET_CODE == 0 && LDFILES.al_argc > 0) { + init(call); + expand(&LD_HEAD); + expand(&LD_TAIL); + append(call, expand_string(LD_NAME)); + concat(call, &LD_FLAGS); + append(call, "-o"); + append(call, o_FILE); + concat(call, &LD_HEAD); + concat(call, &LDFILES); + concat(call, &LD_TAIL); + if (! runvec(call, (char *) 0)) { + exit(RET_CODE); + } + } + exit(RET_CODE); +} + +needsprep(name) + char *name; +{ + int file; + char fc; + + file = open(name,0); + if (file < 0) return 0; + if (read(file, &fc, 1) != 1) fc = 0; + close(file); + return fc == '#'; +} + +char * +alloc(u) + unsigned u; +{ + char *p = malloc(u); + + if (p == 0) + panic("no space"); + return p; +} + +char * +expand_string(s) + char *s; +{ + char buf[1024]; + register char *p = s; + register char *q = &buf[0]; + int expanded = 0; + + if (!p) return p; + while (*p) { + if (*p == '$') { + p++; + expanded = 1; + switch(*p++) { + case 'H': + strcpy(q, EM_DIR); + break; + case 'M': + strcpy(q, MACHNAME); + break; + case 'S': + strcpy(q, SYSNAME); + break; + default: + panic("internal error"); + break; + } + while (*q) q++; + } + else *q++ = *p++; + } + if (! expanded) return s; + *q++ = '\0'; + p = alloc((unsigned int) (q - buf)); + return strcpy(p, buf); +} + +append(al, arg) + register struct arglist *al; + char *arg; +{ + if (!arg || !*arg) return; + if (al->al_argc >= MAXARGC) + panic("argument list overflow"); + al->al_argv[(al->al_argc)++] = arg; +} + +expand(al) + register struct arglist *al; +{ + register int i = al->al_argc; + register char **p = &(al->al_argv[0]); + + while (i-- > 0) { + *p = expand_string(*p); + p++; + } +} + +concat(al1, al2) + struct arglist *al1, *al2; +{ + register i = al2->al_argc; + register char **p = &(al1->al_argv[al1->al_argc]); + register char **q = &(al2->al_argv[0]); + + if ((al1->al_argc += i) >= MAXARGC) + panic("argument list overflow"); + while (i-- > 0) { + *p++ = *q++; + } +} + +#if __STDC__ +/*VARARGS*/ +char * +mkstr(char *dst, ...) +{ + va_list ap; + + va_start(ap, dst); + { + register char *p; + register char *q; + + q = dst; + p = va_arg(ap, char *); + + while (p) { + while (*q++ = *p++); + q--; + p = va_arg(ap, char *); + } + } + va_end(ap); + + return dst; +} +#else +/*VARARGS*/ +char * +mkstr(va_alist) + va_dcl +{ + va_list ap; + char *dst; + + va_start(ap); + { + register char *p; + register char *q; + + dst = q = va_arg(ap, char *); + p = va_arg(ap, char *); + + while (p) { + while (*q++ = *p++); + q--; + p = va_arg(ap, char *); + } + } + va_end(ap); + + return dst; +} +#endif + +basename(str, dst) + char *str; + register char *dst; +{ + register char *p1 = str; + register char *p2 = p1; + + while (*p1) + if (*p1++ == '/') + p2 = p1; + p1--; + while (*p1 != '.' && p1 >= p2) p1--; + if (p1 >= p2) { + *p1 = '\0'; + while (*dst++ = *p2++); + *p1 = '.'; + } + else + while (*dst++ = *p2++); +} + +char * +extension(fn) + char *fn; +{ + register char *c = fn; + + while (*c++) ; + while (*--c != '.' && c >= fn) { } + if (c++ < fn || !*c) return fn; + return c; +} + +runvec(vec, outp) + struct arglist *vec; + char *outp; +{ + int pid, status; + + if (v_flag) { + pr_vec(vec); + putc('\n', stderr); + } + if ((pid = fork()) == 0) { /* start up the process */ + if (outp) { /* redirect standard output */ + close(1); + if (creat(outp, 0666) != 1) + panic("cannot create output file"); + } + ex_vec(vec); + } + if (pid == -1) + panic("no more processes"); + kids = pid; + wait(&status); + if (status) switch(status & 0177) { + case SIGHUP: + case SIGINT: + case SIGQUIT: + case SIGTERM: + case 0: + break; + default: + error("%s died with signal %d\n", vec->al_argv[1], status&0177); + } + kids = -1; + return status ? ((RET_CODE = 1), 0) : 1; +} + +/*VARARGS1*/ +error(str, s1, s2) + char *str, *s1, *s2; +{ + fprintf(stderr, "%s: ", ProgCall); + fprintf(stderr, str, s1, s2); + putc('\n', stderr); + ecount++; +} + +/*VARARGS1*/ +warning(str, s1, s2) + char *str, *s1, *s2; +{ + fprintf(stderr, "%s: (warning) ", ProgCall); + fprintf(stderr, str, s1, s2); + putc('\n', stderr); +} + +panic(str) + char *str; +{ + error(str); + trapcc(SIGINT); +} + +pr_vec(vec) + register struct arglist *vec; +{ + register char **ap = &vec->al_argv[1]; + + vec->al_argv[vec->al_argc] = 0; + fprintf(stderr, "%s", *ap); + while (*++ap) { + fprintf(stderr, " %s", *ap); + } +} + +extern int errno; + +ex_vec(vec) + register struct arglist *vec; +{ + if (noexec) + exit(0); + vec->al_argv[vec->al_argc] = 0; + execv(vec->al_argv[1], &(vec->al_argv[1])); + if (errno == ENOEXEC) { /* not an a.out, try it with the SHELL */ + vec->al_argv[0] = SHELL; + execv(SHELL, &(vec->al_argv[0])); + } + if (access(vec->al_argv[1], 1) == 0) { + /* File is executable. */ + error("cannot execute %s", vec->al_argv[1]); + } else { + error("%s is not executable", vec->al_argv[1]); + } + exit(1); +} diff --git a/fcc/driver/proto.make b/fcc/driver/proto.make new file mode 100644 index 0000000..258ace1 --- /dev/null +++ b/fcc/driver/proto.make @@ -0,0 +1,38 @@ +# $Id: proto.make,v 1.2 1994/06/24 11:03:32 ceriel Exp $ + +#PARAMS do not remove this line! + +SRC_DIR = $(SRC_HOME)/fcc/driver +INCLUDES = -I$(TARGET_HOME)/config +CFLAGS = $(COPTIONS) $(INCLUDES) +LINTFLAGS = $(LINTOPTIONS) $(INCLUDES) +LDFLAGS = $(LDOPTIONS) + +all: fcc + +install: all + cp fcc $(TARGET_HOME)/bin + if [ $(DO_MACHINE_INDEP) = y ] ; \ + then mk_manpage $(SRC_DIR)/fcc.1 $(TARGET_HOME) ; \ + fi + +cmp: all + -cmp fcc $(TARGET_HOME)/bin/fcc + +pr: + @pr $(SRC_DIR)/proto.make $(SRC_DIR)/fcc.c + +opr: + make pr | opr + +clean: + rm -f *.$(SUF) fcc Out + +lint: + $(LINT) $(LINTFLAGS) -D`ack_sys` $(SRC_DIR)/driver.c + +fcc.$(SUF): $(SRC_DIR)/fcc.c $(TARGET_HOME)/config/em_path.h + $(CC) $(CFLAGS) -c -D$(MACH) -D`ack_sys` $(SRC_DIR)/fcc.c + +fcc: fcc.$(SUF) + $(CC) $(LDFLAGS) -o fcc fcc.$(SUF) diff --git a/first/cp_dir b/first/cp_dir new file mode 100755 index 0000000..3bc1b28 --- /dev/null +++ b/first/cp_dir @@ -0,0 +1,16 @@ +set -e +trap "rm -f /tmp/xx$$" 0 1 2 3 15 +case $2 in +/*) target_dir=$2 + ;; +*) target_dir=`pwd`/$2 + ;; +esac +cd $1 +tar cf /tmp/xx$$ . +if [ -d $target_dir ] +then : +else mkdir $target_dir +fi +cd $target_dir +tar xf /tmp/xx$$ diff --git a/first/create_dir b/first/create_dir new file mode 100755 index 0000000..e805fd6 --- /dev/null +++ b/first/create_dir @@ -0,0 +1,11 @@ +if ( cd $1 ) 2>/dev/null +then + : +elif mkdir $1 2>/dev/null +then + : +else + echo $0: could not create directory $1 1>&2 + exit 1 +fi +exit 0 diff --git a/first/em_path.h.src b/first/em_path.h.src new file mode 100644 index 0000000..3b81688 --- /dev/null +++ b/first/em_path.h.src @@ -0,0 +1,11 @@ +/* $Id: em_path.h.src,v 2.9 1994/06/24 11:03:57 ceriel Exp $ */ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* Intended as a common directory for ALL temporary files */ +#define TMP_DIR "/tmp" + +/* Access to the ACK tree and parts thereof */ +#define EM_DIR "/usr/em" /* The root directory for EM stuff */ +#define ACK_PATH "lib/descr" diff --git a/first/first b/first/first new file mode 100755 index 0000000..430034d --- /dev/null +++ b/first/first @@ -0,0 +1,214 @@ +#! /bin/sh +case $0 in +*/first) + FDIR=`expr $0 : '\(.*\)/first'` + ;; +first) FDIR=. + ;; +esac + +if echo 'a\c' | grep 'c' >/dev/null ; then + : We have BSD-like echo command. + E_FLAG='-n' + E_SFX='' +else + : We have SystemV-like echo command. + E_FLAG='' + E_SFX='\c' +fi +export E_FLAG E_SFX + +echo check write-ability of /tmp and /usr/tmp ... +if ( >/usr/tmp/aaax.$$ ) +then + rm /usr/tmp/aaax.$$ +else + echo /usr/tmp must exist and be writable. + exit 2 +fi +if ( >/tmp/aaax.$$ ) +then + rm /tmp/aaax.$$ +else + echo /tmp must exist and be writable. + exit 2 +fi +echo "/tmp and /usr/tmp ok" + +if [ -f macros ] +then + . macros +fi + +: "Now get system name and directories" +. $FDIR/get_sys + +: "Take action according to the system used" +BM=1 +OLDACM=$ACM +OLDSYS=$SYS +case $SYSNAME in +vax_bsd4_1a) ACM=vax4 ; SYS=BSD4_1 ; BYTE_ORDER=0123 ; MACH=vax4 ;; +vax_bsd4_2) ACM=vax4 ; SYS=BSD4_2 ; BYTE_ORDER=0123 ; MACH=vax4 ;; +vax_sysV_2) ACM=vax4 ; SYS=SYS_5 ; BYTE_ORDER=0123 ; MACH=vax4 ;; +i386) ACM=i386 ; SYS=SYS_5 ; BYTE_ORDER=0123 ; MACH=i386 ;; +sun3) ACM=sun3 ; SYS=BSD4_2; BYTE_ORDER=3210 ; MACH=m68020 ;; +sun2) ACM=sun2 ; SYS=BSD4_2; BYTE_ORDER=3210 ; MACH=m68k4 ;; +m68_unisoft|m68k2) ACM=m68k2 ; SYS=V7; BYTE_ORDER=3210 ; MACH=m68k2 ;; +m68_sysV_0|mantra) ACM=mantra ; SYS=SYS_5; BYTE_ORDER=3210 ; MACH=m68k4 ;; +m68020) ACM=m68020 ; SYS=SYS_5; BYTE_ORDER=3210 ; MACH=m68020 ;; +sparc) ACM=sparc ; SYS=BSD4_2; BYTE_ORDER=3210 ; MACH=sparc ;; +sparc_solaris) ACM=sparc_solaris ; SYS=SYS_5; BYTE_ORDER=3210 ; MACH=sparc_solaris ;; +i86) ACM=i86 ; SYS=SYS_5; BYTE_ORDER=0123 ; MACH=i86 ;; +xenix3) ACM=xenix3 ; SYS=SYS_5; BYTE_ORDER=0123 ; MACH=i86 ;; +minix) ACM=minix ; SYS=V7; BYTE_ORDER=0123 ; MACH=i86 ;; +pmds) ACM=pmds ; SYS=V7; BYTE_ORDER=3210 ; MACH=m68k2 ;; +pmds4) ACM=pmds4 ; SYS=V7; BYTE_ORDER=3210 ; MACH=m68k4 ;; +minixST) ACM=minixST ; SYS=V7; BYTE_ORDER=3210 ; MACH=m68k2 ;; +m68k4) ACM=m68k4 ; SYS=V7; BYTE_ORDER=3210 ; MACH=m68k4 ;; +*) ACM=XXX ; SYS=XXX ; BYTE_ORDER=XXX ; MACH=XXX ;; +esac + +: do not change the order in MACH_LIST. check limit_enquire first. +MACH_LIST="i86 xenix3 minix i386 6500 6800 6805 6809 i80 em22 em24 em44 m68k2 pmds minixST m68k4 pmds4 sun2 mantra m68020 sun3 sparc sparc_solaris ns pdp s2650 vax4 z80 z8000 arm" + +while : +do + for i in $MACH_LIST + do + if [ $i = "$ACM" ] + then break + fi + done + if [ $i = "$ACM" ] + then break + fi + echo "This installation script has no knowledge about $SYSNAME. +You will have to specify the default machine that you want ACK to +compile for. Choices:" + l= + x= + for i in $MACH_LIST + do + l="$l $i" + x=x$x + case $x in + xxxxxxxxxx) echo $l + x= + l= + ;; + esac + done + ACM=$OLDACM + echo $l + echo $E_FLAG "Your choice: [$OLDACM] $E_SFX" + . $FDIR/get_answer + case $ANS in + '') ANS="$ACM";; + esac + ACM="$ANS" +done +while : +do +case $SYS in +V7|BSD4_1|BSD4_2|SYS_5) + break + ;; +*) + SYS=$OLDSYS + echo 'What kind of Unix is the target system running? +Choices: + V7 for Unix V7, BSD 2.* + BSD4_1 for Berkeley 4.1 + BSD4_2 for Berkeley 4.2, 4.3, SunOs 3, SunOs 4 + SYS_5 for Xenix, System III, System V, SunOs 5' + echo $E_FLAG "Your choice (V7|BSD4_1|BSD4_2|SYS_5): [$OLDSYS] $E_SFX" + . $FDIR/get_answer + case $ANS in + '') ANS="$SYS";; + esac + SYS="$ANS" + ;; +esac +done +rm -f local.h +sed -e /ACKM/s/'".*"'/'"'$ACM'"'/ -e /BIGMACH/s/'[01]'/$BM/ -e /SYSTEM/s/'^#[ ]*define[ ]*[a-zA-Z_][a-zA-Z0-9_]*'/"# define $SYS"/ < $FDIR/local.h.src >local.h +case $BYTE_ORDER in +XXX) ;; +*) echo '/* Optional definition of BYTE_ORDER: */' >> local.h + echo "#define BYTE_ORDER 0x$BYTE_ORDER" >> local.h + ;; +esac + +echo "Your default machine to compile for is $ACM" + +echo "Installation of the complete ACK takes a long time. Limiting the +number of languages, runtime libraries, back-ends, and assemblers to be +installed may save a lot of time. If you want to install everything, +answer no to the next question. Otherwise, answer yes, and you will be +prompted for details" +while : +do + echo $E_FLAG "Do you want to limit the installation in any way? (y/n) [$LIMIT] $E_SFX" + . $FDIR/get_answer + case $ANS in + '') ANS="$LIMIT";; + esac + case X$ANS in + Xj*|Xy|X) + LIMIT=y + . $FDIR/limit_enquire + break + ;; + Xn*) . $FDIR/get_sysvax + LIMIT=n + DO_MACHINE_INDEP=y + DO_FAST=n + DISABLE_LANG= + DISABLE_SUP= + if [ $TARGET_HOME = $UTIL_HOME ] + then + case $ACM in + sun3|m68020|i386|vax*) + DO_FAST=y + ;; + esac + fi + break + ;; + *) echo "I do not understand your answer ($ANS). Try again." + ;; + esac +done + +. $FDIR/get_makepars + +echo "TARGET_HOME=\"$TARGET_HOME\"; export TARGET_HOME" > macros +echo "UTIL_HOME=\"$UTIL_HOME\"; export UTIL_HOME" >> macros +echo "SRC_HOME=\"$SRC_HOME\"; export SRC_HOME" >> macros +echo "SYSNAME=\"$SYSNAME\"; export SYSNAME" >> macros +echo "ACM=\"$ACM\"; export ACM" >> macros +echo "CURRENT=\"$CURRENT\"; export CURRENT" >> macros +echo "SYS=\"$SYS\"; export SYS" >> macros +echo "LIMIT=\"$LIMIT\"; export LIMIT" >> macros +echo "CONFIG=\"$CONFIG\"; export CONFIG" >> macros +echo "DISABLE_LANG=\"$DISABLE_LANG\"; export DISABLE_LANG" >> macros +echo "DISABLE_SUP=\"$DISABLE_SUP\"; export DISABLE_SUP" >> macros +echo "DO_MACHINE_INDEP=\"$DO_MACHINE_INDEP\"; export DO_MACHINE_INDEP" >> macros +echo "MACH_LIST=\"$MACH_LIST\"; export MACH_LIST" >> macros +echo "SYSVAX=\"$SYSVAX\"; export SYSVAX" >> macros +echo "MACH=\"$MACH\"; export MACH" >> macros +echo "WS=\"$WS\"; export WS" >> macros +echo "PS=\"$PS\"; export PS" >> macros +echo "DO_FAST=\"$DO_FAST\"; export DO_FAST" >> macros + +cat macros $FDIR/install_tail > INSTALL +chmod +x INSTALL + +echo " +A shell-script called 'INSTALL' has been created. Running it +installs ACK. Note that this may take a (very) long time, so run it +in the background, with its output redirected, f.i.: + sh INSTALL > INSTALL.out 2>&1 & +" +exit 0 diff --git a/first/get_answer b/first/get_answer new file mode 100755 index 0000000..b610e24 --- /dev/null +++ b/first/get_answer @@ -0,0 +1,5 @@ +if read ANS +then echo +else echo "Sorry, got EOF when reading your answer" + exit 1 +fi diff --git a/first/get_makepars b/first/get_makepars new file mode 100755 index 0000000..36da886 --- /dev/null +++ b/first/get_makepars @@ -0,0 +1,104 @@ +know_target=0 +case $SYSNAME in +vax*|i386|sun*|sparc*|m68_sysV_0|m68020|mantra|pmds4|m68k4) + WS=4 ; PS=4 + know_target=1 + ;; +m68_unisoft|m68k2|minixST|pmds) + WS=2 ; PS=4 + know_target=1 + ;; +i86|minix|xenix3) + WS=2 ; PS=2 + know_target=1 + ;; +*) trap "rm -f ws.c ws.o a.out t$$" 0 1 2 3 15 + cat > ws.c <<'EOF' +#include +main() +{ + printf("WS=%d ; PS=%d\n", sizeof(int), sizeof(char *)); + exit(0); +} +EOF + if [ $TARGET_HOME = $UTIL_HOME ] && cc ws.c 2>/dev/null + then + : We can find out ourselves what the word-size and + : the pointer-size of the target machine is. + cc ws.c 2>/dev/null + ./a.out > t$$ + . t$$ + rm -f t$$ a.out ws.[co] + else + : we will have to ask installer. + echo $E_FLAG "Please give the word-size of the target-machine (sizeof(int)) in bytes: [$WS] $E_SFX" + . $FDIR/get_answer + case $ANS in + '') ANS="$WS";; + esac + WS="$ANS" + echo $E_FLAG "Please give the pointer-size of the target-machine (sizeof(char *)) in bytes: [$PS] $E_SFX" + . $FDIR/get_answer + case $ANS in + '') ANS="$PS";; + esac + PS="$ANS" + fi + ;; +esac + +echo "# Paths: +SRC_HOME = $SRC_HOME +TARGET_HOME = $TARGET_HOME +UTIL_HOME = $UTIL_HOME + +# Machine independent part created? +DO_MACHINE_INDEP = $DO_MACHINE_INDEP + +# Target machine, only needed for fast compilers +MACH = $MACH +" > make_macros + +if [ $TARGET_HOME = $UTIL_HOME ] +then + if [ -f /bin/ranlib -o -f /usr/bin/ranlib -o -f /usr/ucb/ranlib ] + then + sed "s/^COPTIONS.*\$/COPTIONS=-O -D_EM_WSIZE=$WS -D_EM_PSIZE=$PS -D__${MACH}__/" < $FDIR/target_comp >> make_macros + else + sed -e "s/^COPTIONS.*\$/COPTIONS=-O -D_EM_WSIZE=$WS -D_EM_PSIZE=$PS -D__${MACH}__/" -e "s/^# RANLIB=:/RANLIB=:/" < $FDIR/target_comp >> make_macros + fi + case $ACM in + sun3|sparc) + ed -s make_macros <<'EOF' +/cc-and-mkdep.sun/s/^..// +w +q +EOF + ;; + esac + cat $FDIR/util_comp >> make_macros +else + case $know_target in + 1) sed -e "s/^COPTIONS.*\$/COPTIONS=-O -D_EM_WSIZE=$WS -D_EM_PSIZE=$PS -D__${MACH}__/" -e "/cc-and-mkdep.ack/s/^..//" -e "s/^CC=cc/CC=acc -m$ACM/" -e "s/^# AR=aal/AR=aal/" -e "s/^# RANLIB=:/RANLIB=:/" < $FDIR/target_comp >> make_macros + ;; + *) if [ -f /bin/ranlib -o -f /usr/bin/ranlib -o -f /usr/ucb/ranlib ] + then + sed "s/^COPTIONS.*\$/COPTIONS=-O -D_EM_WSIZE=$WS -D_EM_PSIZE=$PS -D__${MACH}__/" < $FDIR/target_comp >> make_macros + else + sed -e "s/^COPTIONS.*\$/COPTIONS=-O -D_EM_WSIZE=$WS -D_EM_PSIZE=$PS -D__${MACH}__/" -e "s/^# RANLIB=:/RANLIB=:/" < $FDIR/target_comp >> make_macros + fi + ;; + esac + sed "s/^#U/U/" < $FDIR/util_comp >> make_macros +fi + +cat $FDIR/lint_params >> make_macros + +echo "A file called 'make_macros' has been created. This file defines some +'make' variables that parameterize all Makefiles in ACK. You may want +to check it before attempting to actually install ACK." +case $know_target in +0) echo "In fact, this installation script does not know much about +your target machine, so expect some things to be wrong" + ;; +esac diff --git a/first/get_sys b/first/get_sys new file mode 100755 index 0000000..0ff1859 --- /dev/null +++ b/first/get_sys @@ -0,0 +1,155 @@ +rm -f em_path.h +echo "You will now be asked for the root directory of the ACK sources. +This directory will not be changed by the installation process. +" +while : +do + echo $E_FLAG "Please give the root of the ACK source tree, +an absolute path: [$SRC_HOME] $E_SFX" + . $FDIR/get_answer + case $ANS in + '') ANS="$SRC_HOME" ;; + esac + SRC_HOME="$ANS" + case $SRC_HOME in + /*) break; + ;; + *) echo "$SRC_HOME is not an absolute path; try again" + ;; + esac +done +echo "You will now be asked for a configuration directory. This is +the directory in which the compilations will take place. The tree that +resides in it will have the same structure as the ACK source tree, but +the directories will usually only contain Makefiles and .o files. +" +while : +do + echo $E_FLAG "Please give the root of the configuration tree, +an absolute path: [$CONFIG] $E_SFX" + . $FDIR/get_answer + case $ANS in + '') ANS="$CONFIG";; + esac + CONFIG="$ANS" + case $CONFIG in + /*) break; + ;; + *) echo "$CONFIG is not an absolute path; try again" + ;; + esac +done +echo "You will now be asked for the root directory of the ACK binaries. After +installation, this directory will have subdirectories bin, lib, lib.bin, +man, h, config, include, modules, doc. +Four of these directories will contain stuff that depends on the machine +for which the ACK binaries are made: bin, modules, config, and lib.bin. The +other sub-directories (lib, man, h, include and doc) will contain +machine-independent stuff. +This information may be useful if you want to use ACK on different platforms +and you have a shared file system. See the installation manual. +" +while : +do + echo $E_FLAG "Please give the root of the ACK binaries, +an absolute path: [$TARGET_HOME] $E_SFX" + . $FDIR/get_answer + case $ANS in + '') ANS="$TARGET_HOME";; + esac + TARGET_HOME="$ANS" + case $TARGET_HOME in + /*) break; + ;; + *) echo "$TARGET_HOME is not an absolute path; try again" + ;; + esac +done +sed -e "/^#define[ ]*EM_DIR/s@\".*\"@\"$TARGET_HOME\"@" <$FDIR/em_path.h.src >em_path.h + +echo "You will now be asked for the type of the system that you want +ACK binaries produced for. This is not neccessarily the system you +run this program on. In this case, if you have not done so already, +you will have to install ACK on the current machine first. +" +echo "Give me the type of the system, the current choice is: +vax_bsd4_1a VAX11 with BSD4.1a +vax_bsd4_2 VAX11 with BSD4.2 +vax_sysV_2 VAX11 with System V.2 +i386 Intel 80386 system running Xenix System V +sun3 Sun 3 M68020 workstation +sun2 Sun 2 M68000 workstation +m68_sysV_0 Motorola 68000 with Uniplus System V.0 Unix +m68020 Motorola M68020 VME131 running Unix System V/68 R2V2.1 +sparc SUN SPARC workstation running SunOs 4 +sparc_solaris SUN SPARC workstation running solaris 2 +ANY Neither of the above +" +echo $E_FLAG "system type: [$SYSNAME] $E_SFX" +. $FDIR/get_answer +case $ANS in +'') ANS="$SYSNAME";; +esac +SYSNAME="$ANS" + +while : +do +echo $E_FLAG "Is this the system you are running on? (y/n) [$CURRENT] $E_SFX" +. $FDIR/get_answer +case $ANS in +'') ANS="$CURRENT";; +esac +case X$ANS in +Xj*|Xy*|X) UTIL_HOME=$TARGET_HOME + CURRENT=y + break + ;; +Xn*) CURRENT=n + echo "You will now be asked for the root directory of ACK on the current machine. +This tree will not be changed by the installation process. +" + while : + do + echo $E_FLAG "Please give the root of a runnable ACK tree, +an absolute path: [$UTIL_HOME] $E_SFX" + . $FDIR/get_answer + case $ANS in + '') ANS="$UTIL_HOME" ;; + esac + UTIL_HOME="$ANS" + case $UTIL_HOME in + /*) break; + ;; + *) echo "$UTIL_HOME is not an absolute path; try again" + ;; + esac + done + break + ;; +*) echo "I do not understand your answer ($ANS). Try again." + ;; +esac +done + +while : +do +echo "The system to install ACK for is $SYSNAME, +the root of the ACK source tree is $SRC_HOME, +the root of the configuration tree is $CONFIG, +the root of the ACK binary tree to be created is $TARGET_HOME, +and the root of a runnable ACK binary tree is $UTIL_HOME. +If the machine to compile ACK for is the current machine, the last two names +may be identical." +echo $E_FLAG "Are you satisfied with all this? (y/n) $E_SFX" +. $FDIR/get_answer +case X$ANS in +Xj*|Xy*|X) break + ;; +Xn*) echo Ok, I will give you another chance.... + . $0 + break + ;; +*) echo "I do not understand your answer ($ANS). Try again." + ;; +esac +done diff --git a/first/get_sysvax b/first/get_sysvax new file mode 100755 index 0000000..eee8f12 --- /dev/null +++ b/first/get_sysvax @@ -0,0 +1,21 @@ +while : +do + echo $E_FLAG "Which system-call library do you want to install for the VAX? +You can choose between + libbsd4_1a for Berkeley Unix 4.1 + libbsd4_2 for Berkeley Unix 4.2 or newer, or Ultrix + libsysV_2 for Unix System V +Your choice: [$SYSVAX] $E_SFX" + . $FDIR/get_answer + case $ANS in + '') ANS="$SYSVAX";; + esac + SYSVAX="$ANS" + case $SYSVAX in + libbsd4_1a|libbsd4_2|libsysV_2) + break + ;; + *) echo "I do not understand your answer ($SYSVAX). Try again" + ;; + esac +done diff --git a/first/install_tail b/first/install_tail new file mode 100644 index 0000000..5e644ce --- /dev/null +++ b/first/install_tail @@ -0,0 +1,84 @@ +set -e + +PATH=::$CONFIG/bin:$UTIL_HOME/bin:/usr/ccs/bin:/usr/ucb:$PATH +export PATH + +$SRC_HOME/first/create_dir $CONFIG +$SRC_HOME/first/create_dir $CONFIG/bin + +echo "PATH=:$CONFIG/bin:$UTIL_HOME/bin:$PATH; export PATH" > $CONFIG/bin/TakeAction +cat $SRC_HOME/TakeAction >> $CONFIG/bin/TakeAction +sed '/^#PARAMS/r make_macros' < $SRC_HOME/first/mk_makefile > $CONFIG/bin/mk_makefile +cp $SRC_HOME/first/create_dir $CONFIG/bin/create_dir +cp $SRC_HOME/first/cp_dir $CONFIG/bin/cp_dir +chmod +x $CONFIG/bin/* + +$SRC_HOME/first/mk_config + +$SRC_HOME/first/mk_target + +$SRC_HOME/first/limit_impl + +case $SYSNAME in +i386) + ed -s $TARGET_HOME/lib/descr/fe << 'ABC' +1,$s/-D{NAME}/-D{NAME} -DNO_PROTOTYPE/ +w +q +ABC + ;; +esac + +: find varargs include file +: if not present use our own + +if test -f /usr/include/varargs.h +then + : +else + cp $SRC_HOME/include/_tail_cc/varargs.h $TARGET_HOME/modules/h +fi + +case X$SYSVAX in +Xvax_sysV_2) + ed -s $TARGET_HOME/lib/vax4/descr << 'ABC' +/CPP_F/s/$/ -D__USG/ +w +q +ABC + ed -s $CONFIG/mach/vax4/Action << 'ABC' +/libbsd4_2/s/libbsd4_2/libsysV_2/ +w +q +ABC + ( cd $CONFIG/mach/vax4 + for i in libcc libcc.ansi + do + ed -s $i/Makefile << 'ABC' +/BFS/s/BFS/UFS/ +w +q +ABC + done + ) + ;; +Xvax_bsd4_2) + ed -s $TARGET_HOME/lib/vax4/descr << 'ABC' +/CPP_F/s/$/ -D__BSD4_2/ +w +q +ABC + ;; +Xvax_bsd4_1a) + ed -s $CONFIG/mach/vax4/Action << 'ABC' +/libbsd4_2/s/libbsd4_2/libbsd4_1a/ +w +q +ABC + ;; +esac + +: and finally installing ... +cd $CONFIG +set +e +exec TakeAction diff --git a/first/limit_enquire b/first/limit_enquire new file mode 100644 index 0000000..8d9173d --- /dev/null +++ b/first/limit_enquire @@ -0,0 +1,207 @@ +while : +do + echo "The libraries will end up in the machine-independent part of the +ACK binary tree. You may already have them from a previous ACK installation +on a different machine, in particular if you have an NFS file system. +Therefore, it may not be neccessary to install them again. As this part +of the ACK installation takes the most time, you are given the opportunity +to disable installation of the machine-independent part" + echo $E_FLAG \ + "Do you want to install the machine-independent part? (y/n) [$DO_MACHINE_INDEP] $E_SFX" + . $FDIR/get_answer + case $ANS in + '') ANS="$DO_MACHINE_INDEP" ;; + esac + case X$ANS in + Xj*|Xy*|X) DO_MACHINE_INDEP=y + echo "machine-independent part will be installed" + echo + break + ;; + Xn*) DO_MACHINE_INDEP=n + echo "machine-independent part will not be installed" + echo + break + ;; + *) echo "I do not understand your answer ($ANS). Try again." + ;; + esac +done +OLD_DIS_LANG="$DISABLE_LANG" +DISABLE_LANG= +case X$OLD_DIS_LANG in +X) ;; +*) set $OLD_DIS_LANG + ;; +esac +for i in Modula-2 Pascal Occam Basic ANSI-C C Fortran +do + DEF=y + if [ $# != 0 -a X$i = X$1 ] + then + DEF=n + shift + fi + while : + do + echo $E_FLAG "Do you want to install $i? (y/n) [$DEF] $E_SFX" + . $FDIR/get_answer + case $ANS in + '') ANS="$DEF";; + esac + case X$ANS in + Xj*|Xy*) + echo "$i will be installed" + echo + break + ;; + Xn*) DISABLE_LANG=$DISABLE_LANG" $i" + echo "$i will not be installed" + echo + break + ;; + *) echo "I do not understand your answer ($ANS). Try again." + ;; + esac + done +done +OLD_DIS_SUP="$DISABLE_SUP" +DISABLE_SUP= +set $MACH_LIST +while test $# != 0 +do + DEF=y + for i in $OLD_DIS_SUP + do + if [ X$i = X$1 ] + then + DEF=n + break + fi + done + while : + do + case $1 in + i86) echo "not installing i86 will disable installation of xenix3 and minix." + ;; + m68k2) echo "not installing m68k2 will disable installation of pmds, minixST, +m68k4, pmds4, sun2, and mantra." + ;; + m68k4) echo "not installing m68k4 will disable installation of pmds4, sun2, and mantra." + ;; + m68020) echo "not installing m68020 will disable installation of sun3." + ;; + esac + echo $E_FLAG "Do you want to install the $1 support? (y/n) [$DEF] $E_SFX" + . $FDIR/get_answer + case $ANS in + '') ANS="$DEF";; + esac + case X$ANS in + Xj*|Xy*) + echo "The $1 support will be installed" + echo + case $1 in + vax4) case $SYSNAME in + vax_bsd4_1a) SYSVAX=libbsd4_1a + ;; + vax_bsd4_2) SYSVAX=libbsd4_2 + ;; + vax_sysV_2) SYSVAX=libsysV_2 + ;; + *) if [ $DO_MACHINE_INDEP = y ] + then + . $FDIR/get_sysvax + fi + ;; + esac + ;; + sparc) shift + ;; + esac + break + ;; + Xn*) DISABLE_SUP=$DISABLE_SUP" $1" + echo "The $1 support will not be installed" + echo + case $1 in + i86) + shift + DISABLE_SUP=$DISABLE_SUP" $1" + shift + DISABLE_SUP=$DISABLE_SUP" $1" + ;; + m68k2) + shift + DISABLE_SUP=$DISABLE_SUP" $1" + shift + DISABLE_SUP=$DISABLE_SUP" $1" + shift + DISABLE_SUP=$DISABLE_SUP" $1" + shift + DISABLE_SUP=$DISABLE_SUP" $1" + shift + DISABLE_SUP=$DISABLE_SUP" $1" + shift + DISABLE_SUP=$DISABLE_SUP" $1" + ;; + m68k4) + shift + DISABLE_SUP=$DISABLE_SUP" $1" + shift + DISABLE_SUP=$DISABLE_SUP" $1" + shift + DISABLE_SUP=$DISABLE_SUP" $1" + ;; + sparc) + shift + DISABLE_SUP=$DISABLE_SUP" $1" + ;; + m68020) + shift + DISABLE_SUP=$DISABLE_SUP" $1" + ;; + esac + break + ;; + *) echo "I do not understand your answer ($ANS). Try again." + ;; + esac + done + shift +done + +if [ $TARGET_HOME = $UTIL_HOME ] +then + case $ACM in + sun3|m68020|i386|vax*) + while : + do + echo $E_FLAG "Do you want to install the fast compilers? (y/n) [$DO_FAST] $E_SFX" + . $FDIR/get_answer + case $ANS in + '') ANS="$DO_FAST" ;; + esac + case X$ANS in + Xj*|Xy*) + DO_FAST=y + echo "The fast compilers will be installed" + break + ;; + Xn*) + DO_FAST=n + echo "The fast compilers will not be installed" + break + ;; + *) echo "I do not understand your answer ($ANS). Try again." + ;; + esac + done + ;; + *) + DO_FAST=n + ;; + esac +else + DO_FAST=n +fi diff --git a/first/limit_impl b/first/limit_impl new file mode 100755 index 0000000..2314ab7 --- /dev/null +++ b/first/limit_impl @@ -0,0 +1,206 @@ +for i in $DISABLE_LANG +do + ed -s $CONFIG/Action <> Action <> Action < $CONFIG/bin/ack_sys +chmod +x $CONFIG/bin/ack_sys + +cd $SRC_HOME +find . -type d -perm -555 -print > $CONFIG/dir_list + +cd $CONFIG +for i in mach/*/libsys +do + rm -rf $i +done + +for i in `cat dir_list` +do + create_dir $i + rm -f $i/No* + if [ -f $i/Makefile ] + then + ( cd $i ; if make clean ; then exit 0 ; else exit 0 ; fi ) > /dev/null 2>&1 + fi + if [ -f $SRC_HOME/$i/proto.make ] + then mk_makefile $SRC_HOME/$i/proto.make > $i/Makefile + fi + if [ -f $SRC_HOME/$i/Action ] + then + cd $SRC_HOME/$i + cp Action* $CONFIG/$i + chmod +w $CONFIG/$i/Action* + cd $CONFIG + fi +done + +cd $CONFIG + +for i in lang/cem/cemcom.ansi lang/cem/cemcom lang/m2/comp +do + cp $SRC_HOME/$i/BigPars $CONFIG/$i/Parameters + chmod +w $CONFIG/$i/Parameters +done +for i in lang/pc/comp lang/cem/cpp.ansi +do + cp $SRC_HOME/$i/Parameters $CONFIG/$i/Parameters + chmod +w $CONFIG/$i/Parameters +done + +cd $CONFIG/mach +for i in * +do + if [ -d $i ] + then + if [ -d $i/as ] + then + cd $i/as + mk_makefile $SRC_HOME/mach/proto/as/proto.make | sed -e "/#MACH_DEFINE/,/^MACH/s/=.*/= $i/" > Makefile + cd ../.. + fi + if [ -d $i/top ] + then + cd $i/top + mk_makefile $SRC_HOME/mach/proto/top/proto.make | sed -e "/#MACH_DEFINE/,/^MACH/s/=.*/= $i/" > Makefile + cd ../.. + fi + if [ -d $i/cg ] + then + cd $i/cg + mk_makefile $SRC_HOME/mach/proto/cg/proto.make | sed -e "/#MACH_DEFINE/,/^MACH/s/=.*/= $i/" > Makefile + cd ../.. + fi + if [ -d $i/ncg ] + then + cd $i/ncg + mk_makefile $SRC_HOME/mach/proto/ncg/proto.make | sed -e "/#MACH_DEFINE/,/^MACH/s/=.*/= $i/" > Makefile + if [ -f $SRC_HOME/mach/$i/ncg/table_dir ] + then + ed -s Makefile < Makefile + cd ../.. + fi + done + for j in libbsd4_1a libbsd4_2 libsysV_2 + do + if [ -d $i/$j ] + then + cd $i/$j + mk_makefile $SRC_HOME/mach/proto/libg/proto.libsys | sed -e "/#MACH_PARAMS/r $SRC_HOME/mach/$i/mach_params" -e "s/libsys/$j/g" > Makefile + cd ../.. + fi + done + for j in libcc libcc.ansi libm2 libpc libbc liboc libf77 + do + create_dir $i/$j + rm -f $i/$j/No* + cd $i/$j + mk_makefile $SRC_HOME/mach/proto/libg/proto.$j | sed -e "/#MACH_PARAMS/r $SRC_HOME/mach/$i/mach_params" > Makefile + cd ../.. + done + if [ $i = vax4 ] + then : + elif [ -d $i/libsys ] + then : + else + create_dir $i/libsys + rm -f $i/libsys/No* + cd $i/libsys + mk_makefile $SRC_HOME/mach/proto/libg/proto.sysmon | sed -e "/#MACH_PARAMS/r $SRC_HOME/mach/$i/mach_params" > Makefile + cd ../.. + fi + fi +done diff --git a/first/mk_makefile b/first/mk_makefile new file mode 100755 index 0000000..1b92018 --- /dev/null +++ b/first/mk_makefile @@ -0,0 +1,34 @@ +: '$Id: mk_makefile,v 1.5 1995/08/14 08:08:56 ceriel Exp $' + +: This shell script inserts make macros after a line +: starting with #PARAMS in "make_proto", and produces the result on +: standard output. + +trap "rm -f /tmp/mk_mak$$" 0 1 2 3 15 +case $# in +1) ;; +*) echo "Usage: $0 " 1>&2 + exit 1 + ;; +esac + +cp $1 /tmp/mk_mak$$ + +ed -s /tmp/mk_mak$$ << 'EOF' +/^#PARAMS/c +#PARAMS do not remove this line! +. +w +q +EOF +case `ack_sys` in +sparc_solaris) + ed -s /tmp/mk_mak$$ << 'EOF' +g/^EXTRALIB/s/=/= -lelf/ +w +q +EOF + ;; +esac +cat /tmp/mk_mak$$ +exit 0 diff --git a/first/mk_target b/first/mk_target new file mode 100755 index 0000000..1bc5025 --- /dev/null +++ b/first/mk_target @@ -0,0 +1,39 @@ +set -e + +: machine-dependant stuff + +create_dir $TARGET_HOME +create_dir $TARGET_HOME/config +create_dir $TARGET_HOME/lib.bin +create_dir $TARGET_HOME/modules +create_dir $TARGET_HOME/modules/h +create_dir $TARGET_HOME/bin + +cp local.h em_path.h $TARGET_HOME/config +cp_dir $SRC_HOME/bin $TARGET_HOME/bin +cp $CONFIG/bin/cp_dir $TARGET_HOME/bin/cp_dir +echo "echo $SYSNAME" > $TARGET_HOME/bin/ack_sys +chmod +x $TARGET_HOME/bin/ack_sys + +: machine-independant stuff + +if [ $DO_MACHINE_INDEP = n ] +then + exit 0 +fi + +create_dir $TARGET_HOME/lib +create_dir $TARGET_HOME/etc +create_dir $TARGET_HOME/h +create_dir $TARGET_HOME/include +create_dir $TARGET_HOME/doc + +cp $SRC_HOME/etc/ip_spec.t $TARGET_HOME/etc/ip_spec.t +cp_dir $SRC_HOME/lib $TARGET_HOME/lib +cp_dir $SRC_HOME/h $TARGET_HOME/h +cp_dir $SRC_HOME/include $TARGET_HOME/include + +cd $TARGET_HOME +find . -type f -exec chmod +w {} \; + +exit 0 diff --git a/first/target_comp b/first/target_comp new file mode 100644 index 0000000..bd95b42 --- /dev/null +++ b/first/target_comp @@ -0,0 +1,27 @@ +# compiler set for target machine + +CC=cc# # compiler to be used for compiling ACK + +# always passed to $(CC) -c. +COPTIONS=-O -D_EM_WSIZE=4 -D_EM_PSIZE=4 + +# passed to $(CC) -c when compiling modules. +LIBOPTIONS=# +# LIBOPTIONS=-LIB -L # when $(CC) is ACK + +CC_AND_MKDEP=cc-and-mkdep.all# # when $(CC) is neither ACK or SUN, +# CC_AND_MKDEP=cc-and-mkdep.ack## when $(CC) is an ACK-derived C compiler, +# CC_AND_MKDEP=cc-and-mkdep.sun## when $(CC) is a SUN C compiler + +LDOPTIONS=# # always passed to $(CC) when linking + +SUF=o# # suffix of files produced with $(CC) -c + +AR=ar# # archiver for Unix format objects +# AR=aal# # archiver for ACK .o format objects +# AR=arch# # archiver for ACK .s format objects + +RANLIB=ranlib# # when ranlib required +# RANLIB=:# # when ranlib not required + +LIBSUF=a# # suffix of object libraries diff --git a/first/util_comp b/first/util_comp new file mode 100644 index 0000000..377d046 --- /dev/null +++ b/first/util_comp @@ -0,0 +1,23 @@ + +# compiler set for producing runnable binaries (possibly using $(UTIL_HOME)). +# This must describe the compiler with which $(UTIL_HOME) has been compiled. +# If $(TARGET_HOME) is identical to $(UTIL_HOME), which usually will be +# the case, this part does not have to be changed. Otherwise (when you are +# cross-compiling ACK), you will have to change this part. Probable +# replacements are given in comments. Maybe the installation script +# has already changed them, but they should be checked to be sure. + +UCC=$(CC) +#UCC=cc# # compiler to be used + +UCOPTIONS=$(COPTIONS) +#UCOPTIONS=-O# # always passed to $(UCC) -c. + +ULDOPTIONS=$(LDOPTIONS) +#ULDOPTIONS=# # always passed to $(UCC) when linking + +USUF=$(SUF) +#USUF=o# # suffix of files produced with $(UCC) -c + +ULIBSUF=$(LIBSUF) +#ULIBSUF=a# # suffix of object libraries for $(UCC) diff --git a/h/Makefile b/h/Makefile new file mode 100644 index 0000000..d94a71a --- /dev/null +++ b/h/Makefile @@ -0,0 +1,10 @@ +install cmp: + +opr: + make pr | opr + +pr: + @pr Makefile *.h + +clean: + -rm -f *.old diff --git a/h/arch.h b/h/arch.h new file mode 100644 index 0000000..91cc1f5 --- /dev/null +++ b/h/arch.h @@ -0,0 +1,25 @@ +/* $Id: arch.h,v 1.7 1994/06/24 10:07:47 ceriel Exp $ */ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ + +#ifndef __ARCH_H_INCLUDED +#define __ARCH_H_INCLUDED + +#define ARMAG 0177545 +#define AALMAG 0177454 + +struct ar_hdr { + char ar_name[14]; + long ar_date; + char ar_uid; + char ar_gid; + short ar_mode; + long ar_size; +}; + +#define AR_TOTAL 26 +#define AR_SIZE 22 + +#endif /* __ARCH_H_INCLUDED */ diff --git a/h/as_spec.h b/h/as_spec.h new file mode 100644 index 0000000..df6c428 --- /dev/null +++ b/h/as_spec.h @@ -0,0 +1,6 @@ +/* $Id: as_spec.h,v 1.4 1994/06/24 10:07:50 ceriel Exp $ */ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +#define as_magic (sp_magic|(14<<8)) diff --git a/h/bc_io.h b/h/bc_io.h new file mode 100644 index 0000000..8ae1fbe --- /dev/null +++ b/h/bc_io.h @@ -0,0 +1,33 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +#include + +/* $Id: bc_io.h,v 2.3 1994/06/24 10:07:53 ceriel Exp $ */ + +/* BASIC file io definitions */ + +extern FILE *_chanrd; +extern FILE *_chanwr; +extern int _chann; +/* BASIC file descriptor table */ +/* Channel assignment: + -1 terminal IO + 0 data file + 1-15 user files +*/ + +/* FILE MODES:*/ +#define IMODE 1 +#define OMODE 2 +#define RMODE 3 + +typedef struct { + char *fname; + FILE *fd; + int pos; + int mode; + int reclength; + }Filedesc; +extern Filedesc _fdtable[16]; diff --git a/h/bc_string.h b/h/bc_string.h new file mode 100644 index 0000000..89dfa19 --- /dev/null +++ b/h/bc_string.h @@ -0,0 +1,21 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +# + +/* $Id: bc_string.h,v 2.4 1994/06/24 10:07:57 ceriel Exp $ */ + +/* Strings are allocated in a fixed string descriptor table +** This mechanism is used to avoid string copying as much as possible +*/ + +typedef struct{ + char *strval; + int strcount; + int strlength; + } String; + +String *_newstr() ; + +#define MAXSTRING 1024 diff --git a/h/cg_pattern.h b/h/cg_pattern.h new file mode 100644 index 0000000..33dc6f2 --- /dev/null +++ b/h/cg_pattern.h @@ -0,0 +1,161 @@ +/* $Id: cg_pattern.h,v 1.5 1994/06/24 10:08:01 ceriel Exp $ */ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* offsets of interesting fields in EM-pattern */ + +#define PO_HASH 0 +#define PO_NEXT 1 +#define PO_MATCH 3 + +#define ILLHASH 0177777 + +/* Escapes in printstrings */ + +#define PR_TOK '\001' +#define PR_TOKFLD '\002' +#define PR_EMINT '\003' +#define PR_EMSTR '\004' +#define PR_ALLREG '\005' +#define PR_SUBREG '\006' +/* + * In case this list gets longer remember to keep out printable nonprintables + * like \t \n \r and the like. + */ + +/* Commands for codegenerator, in low order 5 bits of byte */ + +#define DO_NEXTEM 0 +#define DO_MATCH 1 +#define DO_XMATCH 2 +#define DO_XXMATCH 3 +#define DO_REMOVE 4 +#define DO_DEALLOCATE 5 +#define DO_REALLOCATE 6 +#define DO_ALLOCATE 7 +#define DO_LOUTPUT 8 +#define DO_ROUTPUT 9 +#define DO_MOVE 10 +#define DO_ERASE 11 +#define DO_TOKREPLACE 12 +#define DO_EMREPLACE 13 +#define DO_COST 14 +#define DO_RETURN 15 +#define DO_COERC 16 +#define DO_PRETURN 17 +#define DO_RREMOVE 18 + +typedef struct instance { + int in_which; +# define IN_COPY 1 +# define IN_RIDENT 2 +# define IN_ALLOC 3 +# define IN_DESCR 4 +# define IN_REGVAR 5 + int in_info[TOKENSIZE+1]; +} inst_t,*inst_p; + +typedef struct { + int c_size; /* index in enode-table */ + int c_time; /* dito */ +} cost_t,*cost_p; + +typedef struct { + int m_set1; /* number of tokenexpr in move: from */ + int m_expr1; /* optional expression */ + int m_set2; /* number of tokenexpr in move: to */ + int m_expr2; /* optional expression */ + int m_cindex; /* code index to really do it */ + cost_t m_cost; /* associated cost */ +} move_t, *move_p; + +typedef struct { + int set_size; + short set_val[SETSIZE]; +} set_t,*set_p; + +struct exprnode { + short ex_operator; + short ex_lnode; + short ex_rnode; +}; +typedef struct exprnode node_t; +typedef struct exprnode *node_p; + +typedef struct { /* to stack coercions */ + int c1_texpno; /* token expression number */ + int c1_expr; /* boolean expression */ + int c1_prop; /* property of register needed */ + int c1_codep; /* code index */ + cost_t c1_cost; /* cost involved */ +} c1_t,*c1_p; + +#ifdef MAXSPLIT +typedef struct { /* splitting coercions */ + int c2_texpno; /* token expression number */ + int c2_nsplit; /* split factor */ + int c2_repl[MAXSPLIT]; /* replacement instances */ + int c2_codep; /* code index */ +} c2_t,*c2_p; +#endif /* MAXSPLIT */ + +typedef struct { /* one to one coercions */ + int c3_texpno; /* token expression number */ + int c3_prop; /* property of register needed */ + int c3_repl; /* replacement instance */ + int c3_codep; /* code index */ +} c3_t,*c3_p; + +/* + * contents of .ex_operator + */ + +#define EX_TOKFIELD 0 +#define EX_ARG 1 +#define EX_CON 2 +#define EX_ALLREG 3 +#define EX_SAMESIGN 4 +#define EX_SFIT 5 +#define EX_UFIT 6 +#define EX_ROM 7 +#define EX_NCPEQ 8 +#define EX_SCPEQ 9 +#define EX_RCPEQ 10 +#define EX_NCPNE 11 +#define EX_SCPNE 12 +#define EX_RCPNE 13 +#define EX_NCPGT 14 +#define EX_NCPGE 15 +#define EX_NCPLT 16 +#define EX_NCPLE 17 +#define EX_OR2 18 +#define EX_AND2 19 +#define EX_PLUS 20 +#define EX_CAT 21 +#define EX_MINUS 22 +#define EX_TIMES 23 +#define EX_DIVIDE 24 +#define EX_MOD 25 +#define EX_LSHIFT 26 +#define EX_RSHIFT 27 +#define EX_NOT 28 +#define EX_COMP 29 +#define EX_COST 30 +#define EX_STRING 31 +#define EX_DEFINED 32 +#define EX_SUBREG 33 +#define EX_TOSTRING 34 +#define EX_UMINUS 35 +#define EX_REG 36 +#define EX_LOWW 37 +#define EX_HIGHW 38 +#define EX_INREG 39 +#define EX_REGVAR 40 + + + +#define getint(a,b) \ + if ((a=((*(b)++)&BMASK)) >= 128) {\ + a = ((a-128)<= 128) {\ + a = ((a-128)<=0) */ +#define PAR_F 0004 /* address offset */ +#define PAR_L 0005 /* addressing locals/parameters */ +#define PAR_G 0006 /* addressing globals */ +#define PAR_W 0007 /* size: word multiple, fits word, possibly indirect */ +#define PAR_S 0010 /* size: word multiple */ +#define PAR_Z 0011 /* size: zero or word multiple */ +#define PAR_O 0012 /* size: word multiple or word fraction */ +#define PAR_P 0013 /* procedure name */ +#define PAR_B 0014 /* branch: instruction label */ +#define PAR_R 0015 /* register number (0,1,2) */ + +/* flow */ +#define FLO_NO 0000 /* straight on */ +#define FLO_C 0020 /* conditional branch */ +#define FLO_P 0040 /* procedure: call and return */ +#define FLO_T 0060 /* terminate: no return */ diff --git a/h/em_mes.h b/h/em_mes.h new file mode 100644 index 0000000..094ee16 --- /dev/null +++ b/h/em_mes.h @@ -0,0 +1,29 @@ +/* $Id: em_mes.h,v 2.8 1994/06/24 10:08:16 ceriel Exp $ */ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* + * mnemonics for the message numbers in EM + */ + +#define ms_err 0 /* Compilation error occurred, ignore rest of module */ +#define ms_opt 1 /* Disable optimization please */ +#define ms_emx 2 /* Wordsize and pointersize assumed */ +#define ms_reg 3 /* Hint for possible register usage from frontend */ +#define ms_src 4 /* Number of source lines in this module */ +#define ms_flt 5 /* Floating point used */ +#define ms_com 6 /* Comment to be retained in compact code */ +#define ms_ret 7 /* Reserved */ +#define ms_ext 8 /* List of exported symbols from this library module */ +#define ms_par 9 /* Number of bytes of parameters accessed */ +#define ms_ego 10 /* Hint from EM Global Optimizer */ +#define ms_gto 11 /* Dangerous procedure, uses nonlocal goto */ +#define ms_stb 12 /* symbol table entry (for debugger) */ +#define ms_std 13 /* symbol table entry (for debugger) referring to dot */ +#define ms_tes 14 /* Size of the top element at a certain label */ + +/* + * for details about ms_reg, see em_reg.h + * for details about ms_ego, see em_ego.h + */ diff --git a/h/em_mnem.h b/h/em_mnem.h new file mode 100644 index 0000000..ffdca86 --- /dev/null +++ b/h/em_mnem.h @@ -0,0 +1,133 @@ +#define op_aar 1 +#define op_adf 2 +#define op_adi 3 +#define op_adp 4 +#define op_ads 5 +#define op_adu 6 +#define op_and 7 +#define op_asp 8 +#define op_ass 9 +#define op_beq 10 +#define op_bge 11 +#define op_bgt 12 +#define op_ble 13 +#define op_blm 14 +#define op_bls 15 +#define op_blt 16 +#define op_bne 17 +#define op_bra 18 +#define op_cai 19 +#define op_cal 20 +#define op_cff 21 +#define op_cfi 22 +#define op_cfu 23 +#define op_cif 24 +#define op_cii 25 +#define op_ciu 26 +#define op_cmf 27 +#define op_cmi 28 +#define op_cmp 29 +#define op_cms 30 +#define op_cmu 31 +#define op_com 32 +#define op_csa 33 +#define op_csb 34 +#define op_cuf 35 +#define op_cui 36 +#define op_cuu 37 +#define op_dch 38 +#define op_dec 39 +#define op_dee 40 +#define op_del 41 +#define op_dup 42 +#define op_dus 43 +#define op_dvf 44 +#define op_dvi 45 +#define op_dvu 46 +#define op_exg 47 +#define op_fef 48 +#define op_fif 49 +#define op_fil 50 +#define op_gto 51 +#define op_inc 52 +#define op_ine 53 +#define op_inl 54 +#define op_inn 55 +#define op_ior 56 +#define op_lae 57 +#define op_lal 58 +#define op_lar 59 +#define op_ldc 60 +#define op_lde 61 +#define op_ldf 62 +#define op_ldl 63 +#define op_lfr 64 +#define op_lil 65 +#define op_lim 66 +#define op_lin 67 +#define op_lni 68 +#define op_loc 69 +#define op_loe 70 +#define op_lof 71 +#define op_loi 72 +#define op_lol 73 +#define op_lor 74 +#define op_los 75 +#define op_lpb 76 +#define op_lpi 77 +#define op_lxa 78 +#define op_lxl 79 +#define op_mlf 80 +#define op_mli 81 +#define op_mlu 82 +#define op_mon 83 +#define op_ngf 84 +#define op_ngi 85 +#define op_nop 86 +#define op_rck 87 +#define op_ret 88 +#define op_rmi 89 +#define op_rmu 90 +#define op_rol 91 +#define op_ror 92 +#define op_rtt 93 +#define op_sar 94 +#define op_sbf 95 +#define op_sbi 96 +#define op_sbs 97 +#define op_sbu 98 +#define op_sde 99 +#define op_sdf 100 +#define op_sdl 101 +#define op_set 102 +#define op_sig 103 +#define op_sil 104 +#define op_sim 105 +#define op_sli 106 +#define op_slu 107 +#define op_sri 108 +#define op_sru 109 +#define op_ste 110 +#define op_stf 111 +#define op_sti 112 +#define op_stl 113 +#define op_str 114 +#define op_sts 115 +#define op_teq 116 +#define op_tge 117 +#define op_tgt 118 +#define op_tle 119 +#define op_tlt 120 +#define op_tne 121 +#define op_trp 122 +#define op_xor 123 +#define op_zeq 124 +#define op_zer 125 +#define op_zge 126 +#define op_zgt 127 +#define op_zle 128 +#define op_zlt 129 +#define op_zne 130 +#define op_zre 131 +#define op_zrf 132 +#define op_zrl 133 diff --git a/h/em_pseu.h b/h/em_pseu.h new file mode 100644 index 0000000..61108b6 --- /dev/null +++ b/h/em_pseu.h @@ -0,0 +1,12 @@ +#define ps_bss 150 +#define ps_con 151 +#define ps_end 152 +#define ps_exa 153 +#define ps_exc 154 +#define ps_exp 155 +#define ps_hol 156 +#define ps_ina 157 +#define ps_inp 158 +#define ps_mes 159 +#define ps_pro 160 +#define ps_rom 161 diff --git a/h/em_ptyp.h b/h/em_ptyp.h new file mode 100644 index 0000000..7bff68b --- /dev/null +++ b/h/em_ptyp.h @@ -0,0 +1,25 @@ +/* $Id: em_ptyp.h,v 1.5 1994/06/24 10:08:19 ceriel Exp $ */ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +#define ptyp(x) (1<<(x-sp_fspec)) + +#define cst_ptyp (ptyp(sp_cst2)|ptyp(sp_cst4)) +#define nof_ptyp (ptyp(sp_dlb1)|ptyp(sp_dlb2)|ptyp(sp_doff)) +#define sof_ptyp (ptyp(sp_dnam)|ptyp(sp_doff)) +#define lab_ptyp (ptyp(sp_dlb1)|ptyp(sp_dlb2)|ptyp(sp_dnam)) +#define ico_ptyp (ptyp(sp_icon)) +#define uco_ptyp (ptyp(sp_ucon)) +#define fco_ptyp (ptyp(sp_fcon)) +#define str_ptyp (ptyp(sp_scon)) +#define con_ptyp (str_ptyp|ico_ptyp|uco_ptyp|fco_ptyp) +#define ilb_ptyp (ptyp(sp_ilb1)|ptyp(sp_ilb2)) +#define pro_ptyp (ptyp(sp_pnam)) +#define off_ptyp (ptyp(sp_doff)) +#define end_ptyp (ptyp(sp_cend)) +#define sym_ptyp (lab_ptyp) +#define arg_ptyp (nof_ptyp|cst_ptyp|sof_ptyp) +#define par_ptyp (arg_ptyp|ico_ptyp|uco_ptyp|fco_ptyp|pro_ptyp|ilb_ptyp) +#define val_ptyp (par_ptyp|str_ptyp) +#define any_ptyp (val_ptyp|end_ptyp) diff --git a/h/em_reg.h b/h/em_reg.h new file mode 100644 index 0000000..4e9012f --- /dev/null +++ b/h/em_reg.h @@ -0,0 +1,15 @@ +/* $Id: em_reg.h,v 1.4 1994/06/24 10:08:21 ceriel Exp $ */ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* + * mes ms_reg,offset,size,type,priority + * + * Here are the defines for type + */ + +#define reg_any 0 /* Unspecified type */ +#define reg_loop 1 /* loop control variable */ +#define reg_pointer 2 /* pointer variable */ +#define reg_float 3 /* floating point variable */ diff --git a/h/em_spec.h b/h/em_spec.h new file mode 100644 index 0000000..9fc3213 --- /dev/null +++ b/h/em_spec.h @@ -0,0 +1,29 @@ +#define sp_magic 173 +#define sp_fmnem 1 +#define sp_nmnem 149 +#define sp_fpseu 150 +#define sp_npseu 30 +#define sp_filb0 180 +#define sp_nilb0 60 +#define sp_fcst0 0 +#define sp_zcst0 120 +#define sp_ncst0 240 +#define sp_fspec 240 +#define sp_nspec 16 +#define sp_ilb1 240 +#define sp_ilb2 241 +#define sp_dlb1 242 +#define sp_dlb2 243 +#define sp_dnam 244 +#define sp_cst2 245 +#define sp_cst4 246 +#define sp_cst8 247 +#define sp_doff 248 +#define sp_pnam 249 +#define sp_scon 250 +#define sp_icon 251 +#define sp_ucon 252 +#define sp_fcon 253 +#define sp_cend 255 +#define sp_lpseu 161 +#define sp_lmnem 133 diff --git a/h/ip_spec.h b/h/ip_spec.h new file mode 100644 index 0000000..a0c8fd0 --- /dev/null +++ b/h/ip_spec.h @@ -0,0 +1,40 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* Contents of flags used when describing interpreter opcodes */ + +#define RCS_IP "$Id: ip_spec.h,v 1.2 1994/06/24 10:08:24 ceriel Exp $" + +#define OPTYPE 07 /* type field in flag */ + +#define OPMINI 0 /* m MINI */ +#define OPSHORT 1 /* s SHORT */ +#define OPNO 2 /* - No operand */ +#define OP8 3 /* 1 1-byte signed operand */ +#define OP16 4 /* 2 2-byte signed operand */ +#define OP32 5 /* 4 4-byte signed operand */ +#define OP64 6 /* 8 8-byte signed operand */ +#define OP16U 7 /* u 2-byte unsigned operand */ + +#define OPESC 010 /* e escaped opcode */ +#define OPWORD 020 /* w operand is word multiple */ +#define OPNZ 040 /* o operand starts at 1 ( or wordsize if w-flag) */ + +#define OPRANGE 0300 /* Range of operands: Positive, negative, both */ + +#define OP_BOTH 0000 /* the default */ +#define OP_POS 0100 /* p Positive (>=0) operands only */ +#define OP_NEG 0200 /* n Negative (<0) operands only */ + +struct opform { + char i_opcode ; /* the opcode number */ + char i_flag ; /* the flag byte */ + char i_low ; /* the interpreter first opcode */ + char i_num ; /* the number of shorts/minis (optional) */ +}; + +/* Escape indicators */ + +#define ESC 254 /* To escape group */ +#define ESC_L 255 /* To 32 and 64 bit operands */ diff --git a/h/m2_traps.h b/h/m2_traps.h new file mode 100644 index 0000000..bf929cc --- /dev/null +++ b/h/m2_traps.h @@ -0,0 +1,16 @@ +/* $Id: m2_traps.h,v 2.7 1994/06/24 10:08:27 ceriel Exp $ */ +/* + * (c) copyright 1990 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ + +/* Modula-2 runtime errors */ + +#define M2_TOOLARGE 64 /* stack of process too large */ +#define M2_TOOMANY 65 /* too many nested traps & handlers */ +#define M2_NORESULT 66 /* no RETURN from procedure function */ +#define M2_UOVFL 67 /* cardinal overflow */ +#define M2_FORCH 68 /* FOR-loop control variable changed */ +#define M2_UUVFL 69 /* cardinal underflow */ +#define M2_INTERNAL 70 /* internal error, should not happen */ +#define M2_UNIXSIG 71 /* unix signal */ diff --git a/h/ocm_chan.h b/h/ocm_chan.h new file mode 100644 index 0000000..b52c003 --- /dev/null +++ b/h/ocm_chan.h @@ -0,0 +1,52 @@ +/* $Id: ocm_chan.h,v 1.4 1994/06/24 10:08:30 ceriel Exp $ */ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* ocm_chan.h - channel definitions */ +#include +#include "ocm_parco.h" + +typedef union channel { + struct { /* Interprocess channel */ + char _type; /* Channel type, see note */ + char synch; /* State in channel synchronization */ + long val; /* Transmitted value */ + } c; + struct { /* File channel */ + char _type; /* Dummy field, see note */ + char index; /* Index in the file array */ + char flgs; /* Status flags: in use & readahead */ + char preread; /* Possible preread character */ + } f; +} chan; +#define type c._type /* Channel type */ +/* Note: The channel type should not be part of each structure in chan. But + * the C alignment rules would make chan about 50% bigger if we had done it + * the right way. Note that the order of fields in a struct cannot be a problem + * as long as struct c is the largest within the union. + */ + +#define C_T_CHAN 0 /* Type of a interprocess channel */ +#define C_T_FILE 1 /* Type of a file channel */ + +#define C_S_FREE 0 /* IP channel is free */ +#define C_S_ANY 1 /* IP channel contains data */ +#define C_S_ACK 2 /* IP channel data is removed */ + +#define C_F_EOF (-1L) /* File channel returns EOF */ +#define C_F_TEXT (-2L) /* File channel becomes line oriented */ +#define C_F_RAW (-3L) /* File channel becomes character oriented */ + +#define C_F_INUSE 0x01 /* File channel is connected to a UNIX file */ +#define C_F_READAHEAD 0x02 /* File channel has a preread character */ + +extern chan file[20]; /* Array of file channels */ +extern FILE *unix_file[20]; /* Pointers to buffered UNIX files */ + +void c_init(); + +void chan_in(), cbyte_in(), c_wa_in(), c_ba_in(); +void chan_out(), c_wa_out(), c_ba_out(); + +int chan_any(); diff --git a/h/ocm_parco.h b/h/ocm_parco.h new file mode 100644 index 0000000..0cd86df --- /dev/null +++ b/h/ocm_parco.h @@ -0,0 +1,23 @@ +/* $Id: ocm_parco.h,v 1.4 1994/06/24 10:08:33 ceriel Exp $ */ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* parco.h - Define names for simulation routines + * + * This file is to be included by users of the higher-level routines + * + */ + +void pc_begin(), resumenext(), parend(), resume(), coend(); +int pc_fork(); + +#define nullid ((int *) 0 - (int *) 0) + /* I.e. a 0 of type "pointer difference" */ + +#define parbegin(sbrk) pc_begin(sbrk, nullid) +#define parfork() pc_fork(nullid) +#define cobegin(sbrk, id) pc_begin(sbrk, id) +#define cofork(id) pc_fork(id) + +extern int deadlock; diff --git a/h/ocm_proc.h b/h/ocm_proc.h new file mode 100644 index 0000000..ceea281 --- /dev/null +++ b/h/ocm_proc.h @@ -0,0 +1,61 @@ +/* $Id: ocm_proc.h,v 1.4 1994/06/24 10:08:36 ceriel Exp $ */ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* process.h - Define administration types and functions + * + * This file is to be included by implementors of the higher + * level routines + * + */ +#include "ocm_parco.h" + +#ifndef ptrdiff /* This type must be able to hold a pointer difference */ +#if EM_WSIZE