Get precedences working in the YACC/Bison way where both terminals and productions...
authorNick Downing <downing.nick@gmail.com>
Wed, 18 Jul 2018 23:43:31 +0000 (09:43 +1000)
committerNick Downing <downing.nick@gmail.com>
Wed, 18 Jul 2018 23:43:31 +0000 (09:43 +1000)
ast.py
bison_lr1dfa.py
lr1.py

diff --git a/ast.py b/ast.py
index 596c722..0fcc13c 100644 (file)
--- a/ast.py
+++ b/ast.py
@@ -653,7 +653,7 @@ class PYACC(element.Element):
         else:
           assert False
         if self.symbol >= 0:
-          self.last_terminal = self.symbol
+          production.last_terminal = self.symbol
       def add_to_symbols(self, pyacc, last_action, symbols):
         assert last_action is None
         symbols.append(
@@ -776,13 +776,6 @@ class PYACC(element.Element):
         last_action = i.add_to_symbols(pyacc, last_action, symbols)
       _lr1.productions.append(
         (
-          # precedence
-          (
-            (pyacc.terminal_symbols[self.precedence_terminal].precedence << 2) |
-            (pyacc.terminal_symbols[self.precedence_terminal].associativity & 3)
-          if self.precedence_terminal != -1 else
-            -1
-          ),
           # symbols (list of terminal_set, nonterminal_set)
           symbols,
           # lookaheads (list of initial_set, can_be_empty)
@@ -791,6 +784,19 @@ class PYACC(element.Element):
           last_action if last_action is not None else PYACC.BracedCode()
         )
       )
+      precedence = (
+        (-1, -1)
+      if self.precedence_terminal == -1 else
+        (
+          pyacc.terminal_symbols[self.precedence_terminal].precedence,
+          pyacc.terminal_symbols[self.precedence_terminal].associativity
+        )
+      )
+      if len(_lr1.precedences[3]) and _lr1.precedences[3][-1] == precedence:
+        _lr1.precedences[2][-1] = len(_lr1.productions)
+      else:
+        _lr1.precedences[2].append(len(_lr1.productions))
+        _lr1.precedences[3].append(precedence)
 
   class Section(element.Element):
     # GENERATE ELEMENT() BEGIN
@@ -2633,10 +2639,9 @@ class PYACC(element.Element):
 
   def to_lr1(self):
     _lr1 = lr1.LR1(
+      # productions
       [
         (
-          # precedence
-          -1,
           # symbols (list of terminal_set, nonterminal_set)
           [
             (
@@ -2650,9 +2655,36 @@ class PYACC(element.Element):
           PYACC.BracedCode()
         )
       ],
-      max([0] + [i.character_set[1] for i in self.terminal_symbols]),
+      # precedences
+      ([], [], [1], [(-1, -1)]),
+      # n_terminals
+      max([0] + [i.character_set[-1] for i in self.terminal_symbols]),
+      # eof_terminal
       0
     )
+
+    # compute terminals precedence table, it is out of order
+    character0 = 0
+    for character1, _, precedence in sorted(
+      [
+        j
+        for i in self.terminal_symbols
+        for j in [
+          (i.character_set[0], True, (-1, -1)),
+          (i.character_set[1], False, (i.precedence, i.associativity))
+        ]
+      ] +
+      [(_lr1.n_terminals, True, (-1, -1))]
+    ):
+      if character1 > character0:
+        if len(_lr1.precedences[1]) and _lr1.precedences[1][-1] == precedence:
+          _lr1.precedences[0][-1] = character1
+        else:
+          _lr1.precedences[0].append(character1)
+          _lr1.precedences[1].append(precedence)
+        character0 = character1
+
+    # compute productions and nonterminals precedence table
     for i in self.productions:
       i.add_to_lr1(self, _lr1)
 
@@ -2660,13 +2692,13 @@ class PYACC(element.Element):
     modified = True
     while modified:
       modified = False
-      for _, symbols, lookaheads, _ in _lr1.productions:
+      for symbols, lookaheads, _ in _lr1.productions:
         for i in range(len(symbols) - 1, -1, -1):
           initial_set, nonterminal_set = symbols[i]
           can_be_empty = False
           for j in range(0, len(nonterminal_set), 2):
             for k in range(nonterminal_set[j], nonterminal_set[j + 1]):
-              child_initial_set, child_can_be_empty = _lr1.productions[k][2][0]
+              child_initial_set, child_can_be_empty = _lr1.productions[k][1][0]
               initial_set = bisect_set.bisect_set_or(
                 initial_set,
                 child_initial_set
index 253b81f..8130764 100644 (file)
@@ -66,7 +66,7 @@ class BisonLR1DFA:
       numpy.int16
     )
     new_action_table[:, translate_terminals] = action_table
-    action_table = new_action_table 
+    action_table = new_action_table
     new_goto_table = numpy.zeros(
       (n_nonterminals, len(lr1dfa.states)),
       numpy.int16
@@ -218,7 +218,7 @@ class BisonLR1DFA:
             )
             new_entry_used[:entry_size, :] = entry_used[:entry_size, :]
             entry_used = new_entry_used
-   
+
           # find a suitable spot and store differences from default
           start_index = self.entry_base - indices[0]
           while start_index < entry_size:
@@ -366,7 +366,7 @@ def generate(pyacc, skel_file, out_file):
                 if i.character_set[0] >= 0x20 else
                   '\\\\x{0:02x}'.format(i.character_set[0])
                 )
-              ) 
+              )
               for i in pyacc.terminal_symbols
             ] +
             ['"{0:s}"'.format(i.name) for i in pyacc.nonterminal_symbols] +
diff --git a/lr1.py b/lr1.py
index 1e107ad..2e5dc67 100644 (file)
--- a/lr1.py
+++ b/lr1.py
@@ -12,17 +12,16 @@ class LR1:
   def __init__(
     self,
     productions = [],
+    precedences = ([], [], [], []),
     n_terminals = n_characters + 1,
     eof_terminal = n_characters
   ):
     # productions: list of production
     # production: (
-    #   priority,
     #   symbols,
     #   lookaheads,
     #   ref_data
     # )
-    # priority: bit 0 = right to left, bits 1: = numeric priority
     # symbols: list of symbol_desc
     # symbol_desc: (terminal_set, nonterminal_set)
     # terminal_set: similar to character_set, even length list of pairs of breaks
@@ -42,10 +41,14 @@ class LR1:
     #   where start_index, end_index are indices into list of character_set,
     #   and tag, kwargs will be passed to apply_markup() hence factory(),
     #   noting that markup has to be applied in reverse order of the list
+    # precedences:
+    #   (terminal_breaks, terminal_precs, nonterminal_breaks, nonterminal_precs)
+    #   similar to action/goto tables but *_precs is list of (prec, assoc) pairs
     # n_terminals: offset to apply to productions[] index to get symbol
     #   (character set code), also symbol for productions[0] = start production
     # eof_terminal: usually == n_terminals - 1 (must be valid terminal value)
     self.productions = productions
+    self.precedences = precedences
     self.n_terminals = n_terminals
     self.eof_terminal = eof_terminal
 
@@ -58,7 +61,7 @@ class LR1:
       i = queue[qhead]
       in_queue[i] = False
       j, k, lookahead_set = items[i]
-      _, symbols, lookaheads, _ = self.productions[j]
+      symbols, lookaheads, _ = self.productions[j]
       if k < len(symbols):
         _, nonterminal_set = symbols[k]
         if len(nonterminal_set):
@@ -82,7 +85,7 @@ class LR1:
                   if not in_queue[n]:
                     # optimization: do not re-queue unless it is transparent
                     # to changes in the lookahead set wrt. further propagation
-                    _, child_symbols, child_lookaheads, _ = self.productions[m]
+                    child_symbols, child_lookaheads, _ = self.productions[m]
                     if len(child_symbols) and child_lookaheads[1][1]:
                       in_queue[n] = True
                       queue.append(n)
@@ -101,7 +104,7 @@ class LR1:
     terminal0 = 0
     terminal1 = self.n_terminals
     for i, j, lookahead_set in items:
-      _, symbols, _, _ = self.productions[i]
+      symbols, _, _ = self.productions[i]
       if j < len(symbols):
         terminal_set, _ = symbols[j]
         k = bisect.bisect_right(terminal_set, terminal)
@@ -129,7 +132,7 @@ class LR1:
     nonterminal0 = 0
     nonterminal1 = len(self.productions)
     for i, j, lookahead_set in items:
-      _, symbols, _, _ = self.productions[i]
+      symbols, _, _ = self.productions[i]
       if j < len(symbols):
         _, nonterminal_set = symbols[j]
         k = bisect.bisect_right(nonterminal_set, nonterminal)
@@ -173,7 +176,7 @@ class LR1:
   #          )
   #        )
   #      reduce = min(reductions)
-  #      _, symbols, _, ref_data = self.productions[reduce]
+  #      symbols, _, ref_data = self.productions[reduce]
   #      base = len(value_stack) - len(symbols) - 1
   #      for j in range(len(ref_data) - 1, -1, -1):
   #        k, l, tag, _ = ref_data[j]
@@ -259,7 +262,7 @@ class LR1:
   #          )
   #        )
   #      reduce = min(reductions)
-  #      _, symbols, _, ref_data = self.productions[reduce]
+  #      symbols, _, ref_data = self.productions[reduce]
   #      base = len(value_stack) - len(symbols) - 1
   #      end_relative = len(value_stack)
   #      for j in range(len(ref_data) - 1, -1, -1):
@@ -310,7 +313,7 @@ class LR1:
       [],
       [
         (len(symbols), ref_data)
-        for _, symbols, _, ref_data in self.productions
+        for symbols, _, ref_data in self.productions
       ],
       self.n_terminals,
       self.eof_terminal
@@ -347,25 +350,53 @@ class LR1:
           self.lookahead_item_set_action(items, terminal)
         )
         assert terminal0 == terminal and terminal1 > terminal
-        if len(next_items) != 0:
-          if len(reductions) != 0:
-            sys.stderr.write(
-              'state {0:d} shift/reduce conflict: {1:s} vs {2:s}\n'.format(
-                len(_lr1dfa.states),
-                ','.join([str(i) for i, _, _ in next_items]),
-                ','.join([str(i) for i in reductions])
-              )
-            )
-          action = add_state(next_items, next_item_to_index) * 2
-        elif len(reductions) != 0:
+        if len(reductions) != 0:
           if len(reductions) != 1:
             sys.stderr.write(
               'state {0:d} reduce/reduce conflict: {1:s}\n'.format(
                 len(_lr1dfa.states),
-                ','.join([str(i) for i in reductions])
+                ', '.join([str(i) for i in sorted(reductions)])
               )
             )
-          action = min(reductions) * 2 + 1 # odd means reduce
+          reduction = min(reductions)
+          if len(next_items) != 0:
+            j = bisect.bisect_right(self.precedences[0], terminal)
+            if j > 0 and terminal0 < self.precedences[0][j - 1]:
+              terminal0 = self.precedences[0][j - 1]
+            assert j < len(self.precedences[0])
+            if terminal1 > self.precedences[0][j]:
+              terminal1 = self.precedences[0][j]
+            shift_precedence, shift_associativity = self.precedences[1][j]
+            reduce_precedence, reduce_associativity = self.precedences[3][
+              bisect.bisect_right(self.precedences[2], reduction)
+            ]
+            if shift_precedence == -1 or reduce_precedence == -1:
+              sys.stderr.write(
+                'state {0:d} shift/reduce conflict: {1:d} vs {2:d}\n'.format(
+                  len(_lr1dfa.states),
+                  terminal,
+                  reduction
+                )
+              )
+              action = add_state(next_items, next_item_to_index) * 2
+            elif shift_precedence > reduce_precedence:
+              action = add_state(next_items, next_item_to_index) * 2
+            elif shift_precedence < reduce_precedence:
+              action = reduction * 2 + 1 # odd means reduce
+            else:
+              assert shift_associativity == reduce_associativity
+              if shift_associativity == -1: # precedence
+                assert False
+              elif shift_associativity == 0: # right
+                action = add_state(next_items, next_item_to_index) * 2
+              elif shift_associativity == 1: # left
+                action = reduction * 2 + 1 # odd means reduce
+              else: # nonassoc
+                action = -1
+          else:
+            action = reduction * 2 + 1 # odd means reduce
+        elif len(next_items) != 0:
+          action = add_state(next_items, next_item_to_index) * 2
         else:
           action = -1
         state_desc[0].append(terminal1)
@@ -394,7 +425,7 @@ class LR1:
       [],
       [
         (len(symbols), ref_data)
-        for _, symbols, _, ref_data in self.productions
+        for symbols, _, ref_data in self.productions
       ],
       self.n_terminals,
       self.eof_terminal
@@ -448,25 +479,53 @@ class LR1:
           self.lookahead_item_set_action(items, terminal)
         )
         assert terminal0 == terminal and terminal1 > terminal
-        if len(next_items) != 0:
-          if len(reductions) != 0:
-            sys.stderr.write(
-              'state {0:d} shift/reduce conflict: {1:s} vs {2:s}\n'.format(
-                i,
-                ','.join([str(j) for j, _, _ in next_items]),
-                ','.join([str(j) for j in reductions])
-              )
-            )
-          action = add_state(next_items, next_item_to_index) * 2
-        elif len(reductions) != 0:
+        if len(reductions) != 0:
           if len(reductions) != 1:
             sys.stderr.write(
               'state {0:d} reduce/reduce conflict: {1:s}\n'.format(
-                i,
-                ','.join([str(j) for j in reductions])
+                len(_lr1dfa.states),
+                ', '.join([str(i) for i in sorted(reductions)])
               )
             )
-          action = min(reductions) * 2 + 1 # odd means reduce
+          reduction = min(reductions)
+          if len(next_items) != 0:
+            j = bisect.bisect_right(self.precedences[0], terminal)
+            if j > 0 and terminal0 < self.precedences[0][j - 1]:
+              terminal0 = self.precedences[0][j - 1]
+            assert j < len(self.precedences[0])
+            if terminal1 > self.precedences[0][j]:
+              terminal1 = self.precedences[0][j]
+            shift_precedence, shift_associativity = self.precedences[1][j]
+            reduce_precedence, reduce_associativity = self.precedences[3][
+              bisect.bisect_right(self.precedences[2], reduction)
+            ]
+            if shift_precedence == -1 or reduce_precedence == -1:
+              sys.stderr.write(
+                'state {0:d} shift/reduce conflict: {1:d} vs {2:d}\n'.format(
+                  len(_lr1dfa.states),
+                  terminal,
+                  reduction
+                )
+              )
+              action = add_state(next_items, next_item_to_index) * 2
+            elif shift_precedence > reduce_precedence:
+              action = add_state(next_items, next_item_to_index) * 2
+            elif shift_precedence < reduce_precedence:
+              action = reduction * 2 + 1 # odd means reduce
+            else:
+              assert shift_associativity == reduce_associativity
+              if shift_associativity == -1: # precedence
+                assert False
+              elif shift_associativity == 0: # right
+                action = add_state(next_items, next_item_to_index) * 2
+              elif shift_associativity == 1: # left
+                action = reduction * 2 + 1 # odd means reduce
+              else: # nonassoc
+                action = -1
+          else:
+            action = reduction * 2 + 1 # odd means reduce
+        elif len(next_items) != 0:
+          action = add_state(next_items, next_item_to_index) * 2
         else:
           action = -1
         state_desc[0].append(terminal1)
@@ -495,8 +554,9 @@ class LR1:
     return _lr1dfa
 
   def __repr__(self):
-    return 'lr1.LR1({0:s}, {1:d}, {2:d})'.format(
+    return 'lr1.LR1({0:s}, {1:s}, {2:d}, {3:d})'.format(
       repr(self.productions),
+      repr(self.precedences),
       self.n_terminals,
       self.eof_terminal
     )