From fcaf807aed6def067e1aa99cf01f398533fab557 Mon Sep 17 00:00:00 2001
From: ceriel <none@none>
Date: Tue, 3 Mar 1987 10:25:25 +0000
Subject: [PATCH] Initial revision

---
 doc/ego/Makefile  |  52 +++++
 doc/ego/bo/bo1    | 151 +++++++++++++
 doc/ego/ca/ca1    |  75 +++++++
 doc/ego/cf/cf1    |  94 ++++++++
 doc/ego/cf/cf2    |  50 +++++
 doc/ego/cf/cf3    |  53 +++++
 doc/ego/cf/cf4    |  93 ++++++++
 doc/ego/cf/cf5    |  79 +++++++
 doc/ego/cf/cf6    |  21 ++
 doc/ego/refs.gen  | 111 ++++++++++
 doc/ego/refs.opt  | 546 ++++++++++++++++++++++++++++++++++++++++++++++
 doc/ego/refs.stat |  29 +++
 12 files changed, 1354 insertions(+)
 create mode 100644 doc/ego/Makefile
 create mode 100644 doc/ego/bo/bo1
 create mode 100644 doc/ego/ca/ca1
 create mode 100644 doc/ego/cf/cf1
 create mode 100644 doc/ego/cf/cf2
 create mode 100644 doc/ego/cf/cf3
 create mode 100644 doc/ego/cf/cf4
 create mode 100644 doc/ego/cf/cf5
 create mode 100644 doc/ego/cf/cf6
 create mode 100644 doc/ego/refs.gen
 create mode 100644 doc/ego/refs.opt
 create mode 100644 doc/ego/refs.stat

diff --git a/doc/ego/Makefile b/doc/ego/Makefile
new file mode 100644
index 000000000..7b5b067c8
--- /dev/null
+++ b/doc/ego/Makefile
@@ -0,0 +1,52 @@
+REFS=-p refs.opt -p refs.stat -p refs.gen
+INTRO=intro/intro?
+OV=ov/ov?
+IC=ic/ic?
+CF=cf/cf?
+IL=il/il?
+SR=sr/sr?
+CS=cs/cs?
+SP=sp/sp?
+UD=ud/ud?
+LV=lv/lv?
+CJ=cj/cj?
+BO=bo/bo?
+RA=ra/ra?
+CA=ca/ca?
+EGO=$(INTRO) $(OV) $(IC) $(CF) $(IL) $(SR) $(CS) $(SP) $(CJ) $(BO) \
+    $(UD) $(LV) $(RA) $(CA)
+REFER=refer
+
+../ego.doc:	$(EGO)
+	 $(REFER) -sA+T -l4,2 $(REFS) intro/head $(EGO) intro/tail > ../ego.doc
+
+ego.f:	$(EGO)
+	 $(REFER)  -sA+T -l4,2 $(REFS) intro/head $(EGO) intro/tail | nroff -ms > ego.f
+intro.f:	$(INTRO)
+	 $(REFER)  -sA+T -l4,2 $(REFS) ov/head $(INTRO) intro/tail | nroff -ms > intro.f
+ov.f:	$(OV)
+	 $(REFER)  -sA+T -l4,2 $(REFS) ov/head $(OV) intro/tail | nroff -ms > ov.f
+ic.f:	$(IC)
+	 $(REFER)  -sA+T -l4,2 $(REFS) ic/head $(IC) intro/tail | nroff -ms > ic.f
+cf.f:	$(CF)
+	 $(REFER)  -sA+T -l4,2 $(REFS) cf/head $(CF) intro/tail | nroff -ms > cf.f
+il.f:	$(IL)
+	 $(REFER)  -sA+T -l4,2 $(REFS) il/head $(IL) intro/tail | nroff -ms > il.f
+sr.f:	$(SR)
+	 $(REFER)  -sA+T -l4,2 $(REFS) sr/head $(SR) intro/tail | nroff -ms > sr.f
+cs.f:	$(CS)
+	 $(REFER)	-sA+T -l4,2 $(REFS) cs/head $(CS) intro/tail | nroff -ms > cs.f
+sp.f:	$(SP)
+	 $(REFER)  -sA+T -l4,2 $(REFS) sp/head $(SP) intro/tail | nroff -ms > sp.f
+cj.f:	$(CJ)
+	 $(REFER)  -sA+T -l4,2 $(REFS) cj/head $(CJ) intro/tail | nroff -ms > cj.f
+bo.f:	$(BO)
+	 $(REFER)  -sA+T -l4,2 $(REFS) bo/head $(BO) intro/tail | nroff -ms > bo.f
+ud.f:	$(UD)
+	 $(REFER)  -sA+T -l4,2 $(REFS) ud/head $(UD) intro/tail | nroff -ms > ud.f
+lv.f:	$(LV)
+	 $(REFER)  -sA+T -l4,2 $(REFS) lv/head $(LV) intro/tail | nroff -ms > lv.f
+ra.f:	$(RA)
+	 $(REFER)  -sA+T -l4,2 $(REFS) ra/head $(RA) intro/tail | nroff -ms > ra.f
+ca.f:	$(CA)
+	 $(REFER)  -sA+T -l4,2 $(REFS) ca/head $(CA) intro/tail | nroff -ms > ca.f
diff --git a/doc/ego/bo/bo1 b/doc/ego/bo/bo1
new file mode 100644
index 000000000..23019605e
--- /dev/null
+++ b/doc/ego/bo/bo1
@@ -0,0 +1,151 @@
+.bp
+.NH 1
+Branch Optimization
+.NH 2
+Introduction
+.PP
+The Branch Optimization phase (BO) performs two related
+(branch) optimizations.
+.NH 3
+Fusion of basic blocks
+.PP
+If two basic blocks B1 and B2 have the following properties:
+.DS
+SUCC(B1) = {B2}
+PRED(B2) = {B1}
+.DE
+then B1 and B2 can be combined into one basic block.
+If B1 ends in an unconditional jump to the beginning of B2, this
+jump can be eliminated,
+hence saving a little execution time and object code size.
+This technique can be used to eliminate some deficiencies
+introduced by the front ends (for example, the "C" front end
+translates switch statements inefficiently due to its one pass nature).
+.NH 3
+While-loop optimization
+.PP
+The straightforward way to translate a while loop is to
+put the test for loop termination at the beginning of the loop.
+.DS
+while cond loop                  LAB1: Test cond
+   body of the loop     --->           Branch On False To LAB2
+end loop                               code for body of loop
+				       Branch To LAB1
+				 LAB2:
+
+Fig. 10.1 Example of Branch Optimization
+.DE
+If the condition fails at the Nth iteration, the following code
+gets executed (dynamically):
+.DS
+N   *  conditional branch (which fails N-1 times)
+N-1 *  unconditional branch
+N-1 *  body of the loop
+.DE
+An alternative translation is:
+.DS
+     Branch To LAB2
+LAB1:
+     code for body of loop
+LAB2:
+     Test cond
+     Branch On True To LAB1
+.DE
+This translation results in the following profile:
+.DS
+N   *  conditional branch (which succeeds N-1 times)
+1   *  unconditional branch
+N-1 *  body of the loop
+.DE
+So the second translation will be significantly faster if N >> 2.
+If N=2, execution time will be slightly increased.
+On the average, the program will be speeded up.
+Note that the code sizes of the two translations will be the same.
+.NH 2
+Implementation
+.PP
+The basic block fusion technique is implemented
+by traversing the control flow graph of a procedure,
+looking for basic blocks B with only one successor (S).
+If one is found, it is checked if S has only one predecessor
+(which has to be B).
+If so, the two basic blocks can in principle be combined.
+However, as one basic block will have to be moved,
+the textual order of the basic blocks will be altered.
+This reordering causes severe problems in the presence
+of conditional jumps.
+For example, if S ends in a conditional branch,
+the basic block that comes textually next to S must stay
+in that position.
+So the transformation in Fig. 10.2 is illegal.
+.DS
+LAB1: S1              LAB1: S1
+      BRA LAB2        S2
+      ...       -->   BEQ LAB3
+LAB2: S2              ...
+      BEQ LAB3        S3
+      S3
+
+Fig. 10.2 An illegal transformation of Branch Optimization
+.DE
+If B is moved towards S the same problem occurs if the block before B
+ends in a conditional jump.
+The problem could be solved by adding one extra branch,
+but this would reduce the gains of the optimization to zero.
+Hence the optimization will only be done if the block that
+follows S (in the textual order) is not a successor of S.
+This condition assures that S does not end in a conditional branch.
+The condition always holds for the code generated by the "C"
+front end for a switch statement.
+.PP
+After the transformation has been performed,
+some attributes of the basic blocks involved (such as successor and
+predecessor sets and immediate dominator) must be recomputed.
+.PP
+The while-loop technique is applied to one loop at a time.
+The list of basic blocks of the loop is traversed to find
+a block B that satisfies the following conditions:
+.IP 1.
+the textually next block to B is not part of the loop
+.IP 2.
+the last instruction of B is an unconditional branch;
+hence B has only one successor, say S
+.IP 3.
+the textually next block of B is a successor of S
+.IP 4.
+the last instruction of S is a conditional branch
+.LP
+If such a block B is found, the control flow graph is changed
+as depicted in Fig. 10.3.
+.DS
+       |				    |
+       |				    v
+       v				    |
+       |-----<------|			    ----->-----|
+   ____|____	    |				       |
+   |	   |	    |		    |-------|	       |
+   |  S1   |	    |		    |	    v	       |
+   |  Bcc  |	    |		    |	  ....	       |
+|--|	   |	    |		    |		       |
+|  ---------	    |		    |	----|----      |
+|		    |		    |	|	|      |
+|     ....	    ^		    |	|  S2	|      |
+|		    |		    |	|	|      |
+|   ---------	    |		    |	|	|      |
+v   |	    |	    |		    ^	---------      |
+|   |  S2   |	    |		    |	    |	       |
+|   | BRA   |	    |		    |	    |-----<-----
+|   |	    |	    |		    |	    v
+|   ---------	    |		    |	____|____
+|	|	    |		    |	|	|
+|	------>------		    |	|  S1	|
+|				    |	|  Bnn  |
+|-------|			    |	|	|
+	|			    |	----|----
+	v			    |	    |
+				    |----<--|
+					    |
+					    v
+
+Fig. 10.3 Transformation of the CFG by Branch Optimization
+.DE
diff --git a/doc/ego/ca/ca1 b/doc/ego/ca/ca1
new file mode 100644
index 000000000..781710ea7
--- /dev/null
+++ b/doc/ego/ca/ca1
@@ -0,0 +1,75 @@
+.bp
+.NH 1
+Compact assembly generation
+.NH 2
+Introduction
+.PP
+The "Compact Assembly generation phase" (CA) transforms the
+intermediate code of the optimizer into EM code in
+Compact Assembly Language (CAL) format.
+In the intermediate code, all program entities
+(such as procedures, labels, global variables)
+are denoted by a unique identifying number (see 3.5).
+In the CAL output of the optimizer these numbers have to
+be replaced by normal identifiers (strings).
+The original identifiers of the input program are used whenever possible.
+Recall that the IC phase generates two files that can be
+used to map unique identifying numbers to procedure names and
+global variable names.
+For instruction labels CA always generates new names.
+The reasons for doing so are:
+.IP -
+instruction labels are only visible inside one procedure, so they can
+not be referenced in other modules
+.IP -
+the names are not very suggestive anyway, as they must be integer numbers
+.IP -
+the optimizer considerably changes the control structure of the program,
+so there is really no one to one mapping of instruction labels in
+the input and the output program.
+.LP
+As the optimizer combines all input modules into one module,
+visibility problems may occur.
+Two modules M1 and M2 can both define an identifier X (provided that
+X is not externally visible in any of these modules).
+If M1 and M2 are combined into one module M, two distinct
+entities with the same name would exist in M, which
+is not allowed.
+.[~[
+tanenbaum machine architecture
+.], section 11.1.4.3]
+In these cases, CA invents a new unique name for one of the entities.
+.NH 2
+Implementation
+.PP
+CA first reads the files containing the procedure and global variable names
+and stores the names in two tables.
+It scans these tables to make sure that all names are different.
+Subsequently it reads the EM text, one procedure at a time,
+and outputs it in CAL format.
+The major part of the code that does the latter transformation
+is adapted from the EM Peephole Optimizer.
+.PP
+The main problem of the implementation of CA is to
+assure that the visibility rules are obeyed.
+If an identifier must be externally visible (i.e.
+it was externally visible in the input program)
+and the identifier is defined (in the output program) before
+being referenced,
+an EXA or EXP pseudo must be generated for it.
+(Note that the optimizer may change the order of definitions and
+references, so some pseudos may be needed that were not
+present in the input program).
+On the other hand, an identifier may be only internally visible.
+If such an identifier is referenced before being defined,
+an INA or INP pseudo must be emitted prior to its first reference.
+.UH
+Acknowledgements
+.PP
+The author would like to thank Andy Tanenbaum for his guidance,
+Duk Bekema for implementing the Common Subexpression Elimination phase
+and writing the initial documentation of that phase,
+Dick Grune for reading the manuscript of this report
+and Ceriel Jacobs, Ed Keizer, Martin Kersten, Hans van Staveren
+and the members of the S.T.W. user's group for their
+interest and assistance.
diff --git a/doc/ego/cf/cf1 b/doc/ego/cf/cf1
new file mode 100644
index 000000000..e65547458
--- /dev/null
+++ b/doc/ego/cf/cf1
@@ -0,0 +1,94 @@
+.bp
+.NH
+The Control Flow Phase
+.PP
+In the previous chapter we described the intermediate
+code of the global optimizer.
+We also specified which part of this code
+was constructed by the IC phase of the optimizer.
+The Control Flow Phase (\fICF\fR) does
+the remainder of the job,
+i.e. it determines:
+.IP -
+the control flow graphs
+.IP -
+the loop tables
+.IP -
+the calling, change and use attributes of
+the procedure table entries
+.LP
+CF operates on one procedure at a time.
+For every procedure it first reads the EM instructions
+from the EM-text file and groups them into basic blocks.
+For every basic block, its successors and
+predecessors are determined,
+resulting in the control flow graph.
+Next, the immediate dominator of every basic block
+is computed.
+Using these dominators, any loop in the
+procedure is detected.
+Finally, interprocedural analysis is done,
+after which we will know the global effects of
+every procedure call on its environment.
+.sp
+CF uses the same internal data structures
+for the procedure table and object table as IC.
+.NH 2
+Partitioning into basic blocks
+.PP
+With regard to flow of control, we distinguish
+three kinds of EM instructions:
+jump instructions, instruction label definitions and
+normal instructions.
+Jump instructions are all conditional or unconditional
+branch instructions,
+the case instructions (CSA/CSB)
+and the RET (return) instruction.
+A procedure call (CAL) is not considered to be a jump.
+A defining occurrence of an instruction label
+is regarded as an EM instruction.
+.PP
+An instruction starts
+a new basic block, in any of the following cases:
+.IP 1.
+It is the first instruction of a procedure
+.IP 2.
+It is the first of a list of instruction label
+defining occurrences
+.IP 3.
+It follows a jump
+.LP
+If there are several consecutive instruction labels
+(which is highly unusual),
+all of them are put in the same basic block.
+Note that several cases may overlap,
+e.g. a label definition at the beginning of a procedure
+or a label following a jump.
+.PP
+A simple Finite State Machine is used to model
+the above rules.
+It also recognizes the end of a procedure,
+marked by an END pseudo.
+The basic blocks are stored internally as a doubly linked
+linear list.
+The blocks are linked in textual order.
+Every node of this list has the attributes described
+in the previous chapter (see syntax rule for
+basic_block).
+Furthermore, every node contains a pointer to its
+EM instructions,
+which are represented internally
+as a linear, doubly linked list,
+just as in the IC phase.
+However, instead of one list per procedure (as in IC)
+there is now one list per basic block.
+.PP
+On the fly, a table is build that maps
+every label identifier to the label definition
+instruction.
+This table is used for computing the control flow.
+The table is stored as a dynamically allocated array.
+The length of the array is the number of labels
+of the current procedure;
+this value can be found in the procedure table,
+where it was stored by IC.
diff --git a/doc/ego/cf/cf2 b/doc/ego/cf/cf2
new file mode 100644
index 000000000..c4dd95d5d
--- /dev/null
+++ b/doc/ego/cf/cf2
@@ -0,0 +1,50 @@
+.NH 2
+Control Flow
+.PP
+A \fIsuccessor\fR of a basic block B is a block C
+that can be executed immediately after B.
+C is said to be a \fIpredecessor\fR of B.
+A block ending with a RET instruction
+has no successors.
+Such a block is called a \fIreturn block\fR.
+Any block that has no predecessors cannot be
+executed at all (i.e. it is unreachable),
+unless it is the first block of a procedure,
+called the \fIprocedure entry block\fR.
+.PP
+Internally, the successor and predecessor
+attributes of a basic block are stored as \fIsets\fR.
+Alternatively, one may regard all these
+sets of all basic blocks as a conceptual \fIgraph\fR,
+in which there is an edge from B to C if C
+is in the successor set of B.
+We call this conceptual graph
+the \fIControl Flow Graph\fR.
+.PP
+The only successor of a basic block ending on an
+unconditional branch instruction is the block that
+contains the label definition of the target of the jump.
+The target instruction can be found via the LAB_ID
+that is the operand of the jump instruction,
+by using the label-map table mentioned
+above.
+If the last instruction of a block is a
+conditional jump,
+the successors are the target block and the textually
+next block.
+The last instruction can also be a case jump
+instruction (CSA or CSB).
+We then analyze the case descriptor,
+to find all possible target instructions
+and their associated blocks.
+We require the case descriptor to be allocated in
+a ROM, so it cannot be changed dynamically.
+A case jump via an alterable descriptor could in principle
+go to any label in the program.
+In the presence of such an uncontrolled jump,
+hardly any optimization can be done.
+We do not expect any front end to generate such a descriptor,
+however, because of the controlled nature
+of case statements in high level languages.
+If the basic block does not end in a jump instruction,
+its only successor is the textually next block.
diff --git a/doc/ego/cf/cf3 b/doc/ego/cf/cf3
new file mode 100644
index 000000000..42e8827b1
--- /dev/null
+++ b/doc/ego/cf/cf3
@@ -0,0 +1,53 @@
+.NH 2
+Immediate dominators
+.PP
+A basic block B dominates a block C if every path
+in the control flow graph from the procedure entry block
+to C goes through B.
+The immediate dominator of C is the closest dominator
+of C on any path from the entry block.
+See also
+.[~[
+aho compiler design
+.], section 13.1.]
+.PP
+There are a number of algorithms to compute
+the immediate dominator relation.
+.IP 1.
+Purdom and Moore give an algorithm that is
+easy to program and easy to describe (although the
+description they give is unreadable;
+it is given in a very messy Algol60 program full of gotos).
+.[
+predominators 
+.]
+.IP 2.
+Aho and Ullman present a bitvector algorithm, which is also
+easy to program and to understand.
+(See 
+.[~[
+aho compiler design
+.], section 13.1.]).
+.IP 3
+Lengauer and Tarjan introduce a fast algorithm that is
+hard to understand, yet remarkably easy to implement.
+.[
+lengauer dominators
+.]
+.LP
+The Purdom-Moore algorithm is very slow if the
+number of basic blocks in the flow graph is large.
+The Aho-Ullman algorithm in fact computes the
+dominator relation,
+from which the immediate dominator relation can be computed
+in time quadratic to the number of basic blocks, worst case.
+The storage requirement is also quadratic to the number
+of blocks.
+The running time of the third algorithm is proportional
+to:
+.DS
+(number of edges in the graph) * log(number of blocks).
+.DE
+We have chosen this algorithm because it is fast
+(as shown by experiments done by Lengauer and Tarjan),
+it is easy to program and requires little data space.
diff --git a/doc/ego/cf/cf4 b/doc/ego/cf/cf4
new file mode 100644
index 000000000..ca19394df
--- /dev/null
+++ b/doc/ego/cf/cf4
@@ -0,0 +1,93 @@
+.NH 2
+Loop detection
+.PP
+Loops are detected by using the loop construction
+algorithm of.
+.[~[
+aho compiler design
+.], section 13.1.]
+This algorithm uses \fIback edges\fR.
+A back edge is an edge from B to C in the CFG,
+whose head (C) dominates its tail (B).
+The loop associated with this back edge
+consists of C plus all nodes in the CFG
+that can reach B without going through C.
+.PP
+As an example of how the algorithm works,
+consider the piece of program of Fig. 4.1.
+First just look at the program and think for
+yourself what part of the code constitutes the loop.
+.DS
+loop
+   if cond then                       1
+      -- lots of simple
+      -- assignment
+      -- statements              2          3
+      exit; -- exit loop
+   else
+      S; -- one statement
+   end if;
+end loop;
+
+Fig. 4.1 A misleading loop
+.DE
+Although a human being may be easily deceived
+by the brackets "loop" and "end loop",
+the loop detection algorithm will correctly
+reply that only the test for "cond" and
+the single statement in the false-part
+of the if statement are part of the loop!
+The statements in the true-part only get
+executed once, so there really is no reason at all
+to say they're part of the loop too.
+The CFG contains one back edge, "3->1".
+As node 3 cannot be reached from node 2,
+the latter node is not part of the loop.
+.PP
+A source of problems with the algorithm is the fact
+that different back edges may result in
+the same loop.
+Such an ill-structured loop is
+called a \fImessy\fR loop.
+After a loop has been constructed, it is checked
+if it is really a new loop.
+.PP
+Loops can partly overlap, without one being nested
+inside the other.
+This is the case in the program of Fig. 4.2.
+.DS
+1:                              1
+   S1;
+2:
+   S2;                          2
+   if cond then
+      goto 4;
+   S3;                     3         4
+   goto 1;
+4:
+   S4;
+   goto 1;
+
+Fig. 4.2 Partly overlapping loops
+.DE
+There are two back edges "3->1" and "4->1",
+resulting in the loops {1,2,3} and {1,2,4}.
+With every basic block we associate a set of
+all loops it is part of.
+It is not sufficient just to record its
+most enclosing loop.
+.PP
+After all loops of a procedure are detected, we determine
+the nesting level of every loop.
+Finally, we find all strong and firm blocks of the loop.
+If the loop has only one back edge (i.e. it is not messy),
+the set of firm blocks consists of the
+head of this back edge and its dominators
+in the loop (including the loop entry block).
+A firm block is also strong if it is not a
+successor of a block that may exit the loop;
+a block may exit a loop if it has an (immediate) successor
+that is not part of the loop.
+For messy loops we do not determine the strong
+and firm blocks. These loops are expected
+to occur very rarely.
diff --git a/doc/ego/cf/cf5 b/doc/ego/cf/cf5
new file mode 100644
index 000000000..8ef4d11c3
--- /dev/null
+++ b/doc/ego/cf/cf5
@@ -0,0 +1,79 @@
+.NH 2
+Interprocedural analysis
+.PP
+It is often desirable to know the effects
+a procedure call may have.
+The optimization below is only possible if
+we know for sure that the call to P cannot
+change A.
+.DS
+A := 10;                        A:= 10;
+P;  -- procedure call    -->    P;
+B := A + 2;                     B := 12;
+.DE
+Although it is not possible to predict exactly
+all the effects a procedure call has, we may
+determine a kind of upper bound for it.
+So we compute all variables that may be
+changed by P, although they need not be
+changed at every invocation of P.
+We can get hold of this set by just looking
+at all assignment (store) instructions
+in the body of P.
+EM also has a set of \fIindirect\fR assignment
+instructions,
+i.e. assignment through a pointer variable.
+In general, it is not possible to determine
+which variable is affected by such an assignment.
+In these cases, we just record the fact that P
+does an indirect assignment.
+Note that this does not mean that all variables
+are potentially affected, as the front ends
+may generate messages telling that certain
+variables can never be accessed indirectly.
+We also set a flag if P does a use (load) indirect.
+Note that we only have to look at \fIglobal\fR
+variables.
+If P changes or uses any of its locals,
+this has no effect on its environment.
+Local variables of a lexically enclosing
+procedure can only be accessed indirectly.
+.PP
+A procedure P may of course call another procedure.
+To determine the effects of a call to P,
+we also must know the effects of a call to the second procedure.
+This second one may call a third one, and so on.
+Effectively, we need to compute the \fItransitive closure\fR
+of the effects.
+To do this, we determine for every procedure
+which other procedures it calls.
+This set is the "calling" attribute of a procedure.
+One may regard all these sets as a conceptual graph,
+in which there is an edge from P to Q
+if Q is in the calling set of P. This graph will
+be referred to as the \fIcall graph\fR.
+(Note the resemblance with the control flow graph).
+.PP
+We can detect which procedures are called by P
+by looking at all CAL instructions in its body.
+Unfortunately, a procedure may also be
+called indirectly, via a CAI instruction.
+Yet, only procedures that are used as operand of an LPI
+instruction can be called indirect,
+because this is the only way to take the address of a procedure.
+We determine for every procedure whether it does
+a CAI instruction.
+We also build a set of all procedures used as
+operand of an LPI.
+.sp
+After all procedures have been processed (i.e. all CFGs
+are constructed, all loops are detected,
+all procedures are analyzed to see which variables
+they may change, which procedures they call,
+whether they do a CAI or are used in an LPI) the
+transitive closure of all interprocedural
+information is computed.
+During the same process,
+the calling set of every procedure that uses a CAI
+is extended with the above mentioned set of all
+procedures that can be called indirect.
diff --git a/doc/ego/cf/cf6 b/doc/ego/cf/cf6
new file mode 100644
index 000000000..a560b48e3
--- /dev/null
+++ b/doc/ego/cf/cf6
@@ -0,0 +1,21 @@
+.NH 2
+Source files
+.PP
+The sources of CF are in the following files and packages:
+.IP cf.h: 14
+declarations of global variables and data structures
+.IP cf.c:
+the routine main; interprocedural analysis;
+transitive closure
+.IP succ:
+control flow (successor and predecessor)
+.IP idom:
+immediate dominators
+.IP loop:
+loop detection
+.IP get:
+read object and procedure table;
+read EM text and partition it into basic blocks
+.IP put:
+write tables, CFGs and EM text
+.LP
diff --git a/doc/ego/refs.gen b/doc/ego/refs.gen
new file mode 100644
index 000000000..b13d57fee
--- /dev/null
+++ b/doc/ego/refs.gen
@@ -0,0 +1,111 @@
+%T A Practical Toolkit for Making Portable Compilers
+%A A.S. Tanenbaum
+%A H. van Staveren
+%A E.G. Keizer
+%A J.W. Stevenson
+%I Vrije Universiteit, Amsterdam
+%R Rapport nr IR-74
+%D October 1981
+
+%T A Practical Toolkit for Making Portable Compilers
+%A A.S. Tanenbaum
+%A H. van Staveren
+%A E.G. Keizer
+%A J.W. Stevenson
+%J CACM
+%V 26
+%N 9
+%P 654-660
+%D September 1983
+
+%T A Unix Toolkit for Making Portable Compilers
+%A A.S. Tanenbaum
+%A H. van Staveren
+%A E.G. Keizer
+%A J.W. Stevenson
+%J Proceedings USENIX conf.
+%C Toronto, Canada
+%V 26
+%D July 1983
+%P 255-261
+
+%T Using Peephole Optimization on Intermediate Code
+%A A.S. Tanenbaum
+%A H. van Staveren
+%A J.W. Stevenson
+%J TOPLAS
+%V 4
+%N 1
+%P 21-36
+%D January 1982
+
+%T Description of a machine architecture for use with
+block structured languages
+%A A.S. Tanenbaum
+%A H. van Staveren
+%A E.G. Keizer
+%A J.W. Stevenson
+%I Vrije Universiteit, Amsterdam
+%R Rapport nr IR-81
+%D August 1983
+
+%T Amsterdam Compiler Kit documentation
+%A A.S. Tanenbaum et. al.
+%I Vrije Universiteit, Amsterdam
+%R Rapport nr IR-90
+%D June 1984
+
+%T The C Programming Language - Reference Manual
+%A D.M. Ritchie
+%I Bell Laboratories
+%C Murray Hill, New Jersey
+%D 1978
+
+%T Unix programmer's manual, Seventh Edition
+%A B.W. Kernighan
+%A M.D. McIlroy
+%I Bell Laboratories
+%C Murray Hill, New Jersey
+%V 1
+%D January 1979
+
+%T A Tour Through the Portable C Compiler
+%A S.C. Johnson
+%I Bell Laboratories
+%B Unix programmer's manual, Seventh Edition
+%C Murray Hill, New Jersey
+%D January 1979
+
+
+%T Ada Programming Language - MILITARY STANDARD 
+%A J.D. Ichbiah
+%I U.S. Department of Defense
+%R ANSI/MIL-STD-1815A
+%D 22 January 1983
+
+%T Rationale for the Design of the Ada Programming Language
+%A J.D. Ichbiah
+%J SIGPLAN Notices
+%V 14
+%N 6
+%D June 1979
+
+%T The Programming Languages LISP and TRAC
+%A W.L. van der Poel
+%I Technische Hogeschool Delft
+%C Delft
+%D 1972
+
+%T Compiler construction
+%A W.M. Waite
+%A G. Goos
+%I Springer-Verlag
+%C New York
+%D 1984
+
+%T The C Programming Language
+%A B.W. Kernighan
+%A D.M. Ritchie
+%I Prentice-Hall, Inc
+%C Englewood Cliffs,NJ
+%D 1978
diff --git a/doc/ego/refs.opt b/doc/ego/refs.opt
new file mode 100644
index 000000000..6029c7bcf
--- /dev/null
+++ b/doc/ego/refs.opt
@@ -0,0 +1,546 @@
+%T Principles of compiler design
+%A A.V. Aho
+%A J.D. Ullman
+%I Addison-Wesley
+%C Reading, Massachusetts
+%D 1978
+
+%T The Design and Analysis of Computer Algorithms
+%A A.V. Aho
+%A J.E. Hopcroft
+%A J.D. Ullman
+%I Addison-Wesley
+%C Reading, Massachusetts
+%D 1974
+
+%T Code generation in a machine-independent compiler
+%A R.G.G. Cattell
+%A J.M. Newcomer
+%A B.W. Leverett
+%J SIGPLAN Notices
+%V 14
+%N 8
+%P 65-75
+%D August 1979
+
+%T An algorithm for Reduction of Operator Strength
+%A J. Cocke
+%A K. Kennedy
+%J CACM
+%V 20
+%N 11
+%P 850-856
+%D November 1977
+
+%T Reduction of Operator Strength
+%A F.E. Allen
+%A J. Cocke
+%A K. Kennedy
+%B Program Flow Analysis
+%E S.S. Muchnick and  D. Jones
+%I Prentice-Hall
+%C Englewood Cliffs, N.J.
+%D 1981
+
+%T Simplifying Code Generation Through Peephole Optimization
+%A J.W. Davidson
+%R Ph.D. thesis
+%I Dept. of Computer Science
+%C Univ. of Arizona
+%D December 1981
+
+%T A study of selective optimization techniques
+%A G.R. Katkus
+%R Ph.D. Thesis
+%C University of Southern California
+%D 1973
+
+%T Automatic subroutine generation in an optimizing compiler
+%A J.B. Shaffer
+%R Ph.D. Thesis
+%C University of Maryland
+%D 1978
+
+%T Optimal mixed code generation for microcomputers
+%A D.S. Photopoulos
+%R Ph.D. Thesis
+%C Northeastern University
+%D 1981
+
+%T The Design of an Optimizing Compiler
+%A W.A. Wulf
+%A R.K. Johnsson
+%A C.B. Weinstock
+%A S.O. Hobbs
+%A C.M. Geschke
+%I American Elsevier Publishing Company
+%C New York
+%D 1975
+
+%T Retargetable Compiler Code Generation
+%A M. Ganapathi
+%A C.N. Fischer
+%A J.L. Hennessy
+%J ACM Computing Surveys
+%V 14
+%N 4
+%P 573-592
+%D December 1982
+
+%T An Optimizing Pascal Compiler
+%A R.N. Faiman
+%A A.A. Kortesoja
+%J IEEE Trans. on Softw. Eng.
+%V 6
+%N 6
+%P 512-518
+%D November 1980
+
+%T Experience with the SETL Optimizer
+%A S.M. Freudenberger
+%A J.T. Schwartz
+%J TOPLAS
+%V 5
+%N 1
+%P 26-45
+%D Januari 1983
+
+%T An Optimizing Ada Compiler
+%A W. Kirchgaesner
+%A J. Uhl
+%A G. Winterstein
+%A G. Goos
+%A M. Dausmann
+%A S. Drossopoulou
+%I Institut fur Informatik II, Universitat Karlsruhe
+%D February 1983
+
+%T A Fast Algorithm for Finding Dominators
+in a Flowgraph
+%A T. Lengauer
+%A R.E. Tarjan
+%J TOPLAS
+%V 1
+%N 1
+%P 121-141
+%D July 1979
+
+%T Optimization of hierarchical directed graphs
+%A M.T. Lepage
+%A D.T. Barnard
+%A A. Rudmik
+%J Computer Languages
+%V 6
+%N 1
+%P 19-34
+%D Januari 1981
+
+%T Object Code Optimization
+%A E.S. Lowry
+%A C.W. Medlock
+%J CACM
+%V 12
+%N 1
+%P 13-22
+%D Januari 1969
+
+%T Automatic Program Improvement:
+Variable Usage Transformations
+%A B. Maher
+%A D.H. Sleeman
+%J TOPLAS
+%V 5
+%N 2
+%P 236-264
+%D April 1983
+
+%T The design of a global optimizer
+%A R.J. Mintz
+%A G.A. Fisher
+%A M. Sharir
+%J SIGPLAN Notices
+%V 14
+%N 9
+%P 226-234
+%D September 1979
+
+%T Global Optimization by Suppression of Partial Redundancies
+%A E. Morel
+%A C. Renvoise
+%J CACM
+%V 22
+%N 2
+%P 96-103
+%D February 1979
+
+%T Efficient Computation of Expressions with Common Subexpressions
+%A B. Prabhala
+%A R. Sethi
+%J JACM
+%V 27
+%N 1
+%P 146-163
+%D Januari 1980
+
+%T An Analysis of Inline Substitution for a Structured
+Programming Language
+%A R.W. Scheifler
+%J CACM
+%V 20
+%N 9
+%P 647-654
+%D September 1977
+
+%T Immediate Predominators in a Directed Graph
+%A P.W. Purdom
+%A E.F. Moore
+%J CACM
+%V 15
+%N 8
+%P 777-778
+%D August 1972
+
+%T The Generation of Optimal Code for Arithmetic Expressions
+%A R. Sethi
+%A J.D. Ullman
+%J JACM
+%V 17
+%N 4
+%P 715-728
+%D October 1970
+
+%T Exposing side-effects in a PL/I optimizing compiler
+%A T.C. Spillman
+%B Information Processing 1971
+%I North-Holland Publishing Company
+%C Amsterdam
+%P 376-381
+%D 1971
+
+%T Inner Loops in Flowgraphs and Code Optimization
+%A S. Vasudevan
+%J Acta Informatica
+%N 17
+%P 143-155
+%D 1982
+
+%T A New Strategy for Code Generation - the General-Purpose
+Optimizing Compiler
+%A W.H. Harrison
+%J IEEE Trans. on Softw. Eng.
+%V 5
+%N 4
+%P 367-373
+%D July 1979
+
+%T PQCC: A Machine-Relative Compiler Technology
+%A W.M. Wulf
+%R CMU-CS-80-144
+%I Carnegie-Mellon University
+%C Pittsburgh
+%D 25 september 1980
+
+%T Machine-independent Pascal code optimization
+%A D.R. Perkins
+%A R.L. Sites
+%J SIGPLAN Notices
+%V 14
+%N 8
+%P 201-207
+%D August 1979
+
+%T A Case Study of a New Code Generation Technique for Compilers
+%A J.L. Carter
+%J CACM
+%V 20
+%N 12
+%P 914-920
+%D December 1977
+
+%T Table-driven Code Generation
+%A S.L. Graham
+%J IEEE Computer
+%V 13
+%N 8
+%P 25-33
+%D August 1980
+
+%T Register Allocation in Optimizing Compilers
+%A B.W. Leverett
+%R Ph.D. Thesis, CMU-CS-81-103
+%I Carnegie-Mellon University
+%C Pittsburgh
+%D February 1981
+
+%T Register Allocation via Coloring
+%A G.J. Chaitin
+%A M.A. Auslander
+%A A.K. Chandra
+%A J. Cocke
+%A M.E. Hopkins
+%A P.W. Markstein
+%J Computer Languages
+%V 6
+%N 1
+%P 47-57
+%D January 1981
+
+%T How to Call Procedures, or Second Thoughts on
+Ackermann's Function
+%A B.A. Wichmann
+%J Software - Practice and Experience
+%V 7
+%P 317-329
+%D 1977
+
+%T Register Allocation Via Usage Counts
+%A R.A. Freiburghouse
+%J CACM
+%V 17
+%N 11
+%P 638-642
+%D November 1974
+
+%T Machine-independent register allocation
+%A R.L. Sites
+%J SIGPLAN Notices
+%V 14
+%N 8
+%P 221-225
+%D August 1979
+
+%T An Overview of the Production-Quality Compiler-Compiler Project
+%A B.W. Leverett
+%A R.G.G Cattell
+%A S.O. Hobbs
+%A J.M. Newcomer
+%A A.H. Reiner
+%A B.R. Schatz
+%A W.A. Wulf
+%J IEEE Computer
+%V 13
+%N 8
+%P 38-49
+%D August 1980
+
+%T An Overview of the Production-Quality Compiler-Compiler Project
+%A B.W. Leverett
+%A R.G.G Cattell
+%A S.O. Hobbs
+%A J.M. Newcomer
+%A A.H. Reiner
+%A B.R. Schatz
+%A W.A. Wulf
+%R CMU-CS-79-105
+%I Carnegie-Mellon University
+%C Pittsburgh
+%D 1979
+
+%T Topics in Code Generation and Register Allocation
+%A B.W. Leverett
+%R CMU-CS-82-130
+%I Carnegie-Mellon University
+%C Pittsburgh
+%D 28 July 1982
+
+%T Predicting the Effects of Optimization on a Procedure Body
+%A J.E. Ball
+%J SIGPLAN Notices
+%V 14
+%N 8
+%P 214-220
+%D August 1979
+
+%T The C Language Calling Sequence
+%A S.C. Johnson
+%A D.M. Ritchie
+%I Bell Laboratories
+%C Murray Hill, New Jersey
+%D September 1981
+
+%T A Generalization of Two Code Ordering Optimizations
+%A C.W. Fraser
+%R TR 82-11
+%I Department of Computer Science
+%C The University of Arizona, Tucson
+%D October 1982
+
+%T A Survey of Data Flow Analysis Techniques
+%A K. Kennedy
+%B Program Flow Analysis
+%E S.S. Muchnick and  D. Jones
+%I Prentice-Hall
+%C Englewood Cliffs
+%D 1981
+
+%T Delayed Binding in PQCC Generated Compilers
+%A W.A. Wulf
+%A K.V. Nori
+%R CMU-CS-82-138
+%I Carnegie-Mellon University
+%C Pittsburgh
+%D 1982
+
+%T Interprocedural Data Flow Analysis in the presence
+of Pointers, Procedure Variables, and Label Variables
+%A W.E. Weihl
+%J Conf. Rec. of the 7th ACM Symp. on Principles of
+Programming Languages
+%C Las Vegas, Nevada
+%P 83-94
+%D 1980
+
+%T Low-Cost, High-Yield Code Optimization
+%A D.R. Hanson
+%R TR 82-17
+%I Department of Computer Science
+%C The University of Arizona, Tucson
+%D November 1982
+
+%T Program Flow Analysis
+%E S.S. Muchnick and  D. Jones
+%I Prentice-Hall
+%C Englewood Cliffs
+%D 1981
+
+%T A machine independent algorithm for code generation and its
+use in retargetable compilers
+%A R. Glanville
+%R Ph.D. thesis
+%C University of California, Berkeley
+%D December 1977
+
+%T A formal framework for the derivation of machine-specific optimizers
+%A R. Giegerich
+%J TOPLAS
+%V 5
+%N 3
+%P 478-498
+%D July 1983
+
+%T Engineering a compiler: Vax-11 code generation and optimization
+%A P. Anklam
+%A D. Cutler
+%A R. Heinen
+%A M. MacLaren
+%I Digital Equipment Corporation
+%D 1982
+
+%T Analyzing exotic instructions for a retargetable code generator
+%A T.M. Morgan
+%A L.A. Rowe
+%J SIGPLAN Notices
+%V 17
+%N 6
+%P 197-204
+%D June 1982
+
+%T TCOLAda and the Middle End of the PQCC Ada Compiler
+%A B.M. Brosgol
+%J SIGPLAN Notices
+%V 15
+%N 11
+%P 101-112
+%D November 1980
+
+%T Implementation Implications of Ada Generics
+%A G. Bray
+%J Ada Letters
+%V III
+%N 2
+%P 62-71
+%D September 1983
+
+%T Attributed Linear Intermediate Representations for Retargetable
+Code Generators
+%A M. Ganapathi
+%A C.N. Fischer
+%J Software-Practice and Experience
+%V 14
+%N 4
+%P 347-364
+%D April 1984
+
+%T UNCOL: The myth and the fact
+%A T.B. Steel
+%J Annu. Rev. Autom. Program.
+%V 2
+%D 1960
+%P 325-344
+
+%T Experience with a Graham-Glanville Style Code Generator
+%A P. Aigrain
+%A S.L. Graham
+%A R.R. Henry
+%A M.K. McKusick
+%A E.P. Llopart
+%J SIGPLAN Notices
+%V 19
+%N 6
+%D June 1984
+%P 13-24
+
+%T Using Dynamic Programming to generate Optimized Code in a
+Graham-Glanville Style Code Generator
+%A T.W. Christopher
+%A P.J. Hatcher
+%A R.C. Kukuk
+%J SIGPLAN Notices
+%V 19
+%N 6
+%D June 1984
+%P 25-36
+
+%T Peep - An Architectural Description Driven Peephole Optimizer
+%A R.R. Kessler
+%J SIGPLAN Notices
+%V 19
+%N 6
+%D June 1984
+%P 106-110
+
+%T Automatic Generation of Peephole Optimizations
+%A J.W. Davidson
+%A C.W. Fraser
+%J SIGPLAN Notices
+%V 19
+%N 6
+%D June 1984
+%P 111-116
+
+%T Analysing and Compressing Assembly Code
+%A C.W. Fraser
+%A E.W. Myers
+%A A.L. Wendt
+%J SIGPLAN Notices
+%V 19
+%N 6
+%D June 1984
+%P 117-121
+
+%T Register Allocation by Priority-based Coloring
+%A F. Chow
+%A J. Hennessy
+%J SIGPLAN Notices
+%V 19
+%N 6
+%D June 1984
+%P 222-232
+%V 19
+%N 6
+%D June 1984
+%P 117-121
+
+%T Code Selection through Object Code Optimization
+%A J.W. Davidson
+%A C.W. Fraser
+%I Dept. of Computer Science
+%C Univ. of Arizona
+%D November 1981
+
+%T A Portable Machine-Independent Global Optimizer - Design
+and Measurements
+%A F.C. Chow
+%I Computer Systems Laboratory
+%C Stanford University
+%D December 1983
diff --git a/doc/ego/refs.stat b/doc/ego/refs.stat
new file mode 100644
index 000000000..56fcd7fd1
--- /dev/null
+++ b/doc/ego/refs.stat
@@ -0,0 +1,29 @@
+%T An analysis of Pascal Programs
+%A L.R. Carter
+%I UMI Research Press
+%C Ann Arbor, Michigan
+%D 1982
+
+%T An Emperical Study of FORTRAN Programs
+%A D.E. Knuth
+%J Software - Practice and Experience
+%V 1
+%P 105-133
+%D 1971
+
+%T F77 Performance
+%A D.A. Mosher
+%A R.P. Corbett
+%J ;login:
+%V 7
+%N 3
+%D June 1982
+
+%T Ada Language Statistics for the iMAX 432 Operating System
+%A S.F. Zeigler
+%A R.P. Weicker
+%J Ada LETTERS
+%V 2
+%N 6
+%P 63-67
+%D May 1983
-- 
2.34.1