Get precedences working in the YACC/Bison way where both terminals and productions...

author Nick Downing <downing.nick@gmail.com>

Wed, 18 Jul 2018 23:43:31 +0000 (09:43 +1000)

committer Nick Downing <downing.nick@gmail.com>

Wed, 18 Jul 2018 23:43:31 +0000 (09:43 +1000)
author Nick Downing <downing.nick@gmail.com>
Wed, 18 Jul 2018 23:43:31 +0000 (09:43 +1000)
committer Nick Downing <downing.nick@gmail.com>
Wed, 18 Jul 2018 23:43:31 +0000 (09:43 +1000)
diff --git a/ast.py b/ast.py

index 596c722..0fcc13c 100644 (file)
--- a/ast.py
+++ b/ast.py
@@ -653,7 +653,7 @@ class PYACC(element.Element):
          else:
            assert False
          if self.symbol >= 0:
-          self.last_terminal = self.symbol
+          production.last_terminal = self.symbol
        def add_to_symbols(self, pyacc, last_action, symbols):
          assert last_action is None
          symbols.append(
@@ -776,13 +776,6 @@ class PYACC(element.Element):
          last_action = i.add_to_symbols(pyacc, last_action, symbols)
        _lr1.productions.append(
          (
-          # precedence
-          (
-            (pyacc.terminal_symbols[self.precedence_terminal].precedence << 2) |
-            (pyacc.terminal_symbols[self.precedence_terminal].associativity & 3)
-          if self.precedence_terminal != -1 else
-            -1
-          ),
            # symbols (list of terminal_set, nonterminal_set)
            symbols,
            # lookaheads (list of initial_set, can_be_empty)
@@ -791,6 +784,19 @@ class PYACC(element.Element):
            last_action if last_action is not None else PYACC.BracedCode()
          )
        )
+      precedence = (
+        (-1, -1)
+      if self.precedence_terminal == -1 else
+        (
+          pyacc.terminal_symbols[self.precedence_terminal].precedence,
+          pyacc.terminal_symbols[self.precedence_terminal].associativity
+        )
+      )
+      if len(_lr1.precedences[3]) and _lr1.precedences[3][-1] == precedence:
+        _lr1.precedences[2][-1] = len(_lr1.productions)
+      else:
+        _lr1.precedences[2].append(len(_lr1.productions))
+        _lr1.precedences[3].append(precedence)
  
    class Section(element.Element):
      # GENERATE ELEMENT() BEGIN
@@ -2633,10 +2639,9 @@ class PYACC(element.Element):
  
    def to_lr1(self):
      _lr1 = lr1.LR1(
+      # productions
        [
          (
-          # precedence
-          -1,
            # symbols (list of terminal_set, nonterminal_set)
            [
              (
@@ -2650,9 +2655,36 @@ class PYACC(element.Element):
            PYACC.BracedCode()
          )
        ],
-      max([0] + [i.character_set[1] for i in self.terminal_symbols]),
+      # precedences
+      ([], [], [1], [(-1, -1)]),
+      # n_terminals
+      max([0] + [i.character_set[-1] for i in self.terminal_symbols]),
+      # eof_terminal
        0
      )
+
+    # compute terminals precedence table, it is out of order
+    character0 = 0
+    for character1, _, precedence in sorted(
+      [
+        j
+        for i in self.terminal_symbols
+        for j in [
+          (i.character_set[0], True, (-1, -1)),
+          (i.character_set[1], False, (i.precedence, i.associativity))
+        ]
+      ] +
+      [(_lr1.n_terminals, True, (-1, -1))]
+    ):
+      if character1 > character0:
+        if len(_lr1.precedences[1]) and _lr1.precedences[1][-1] == precedence:
+          _lr1.precedences[0][-1] = character1
+        else:
+          _lr1.precedences[0].append(character1)
+          _lr1.precedences[1].append(precedence)
+        character0 = character1
+
+    # compute productions and nonterminals precedence table
      for i in self.productions:
        i.add_to_lr1(self, _lr1)
  
@@ -2660,13 +2692,13 @@ class PYACC(element.Element):
      modified = True
      while modified:
        modified = False
-      for _, symbols, lookaheads, _ in _lr1.productions:
+      for symbols, lookaheads, _ in _lr1.productions:
          for i in range(len(symbols) - 1, -1, -1):
            initial_set, nonterminal_set = symbols[i]
            can_be_empty = False
            for j in range(0, len(nonterminal_set), 2):
              for k in range(nonterminal_set[j], nonterminal_set[j + 1]):
-              child_initial_set, child_can_be_empty = _lr1.productions[k][2][0]
+              child_initial_set, child_can_be_empty = _lr1.productions[k][1][0]
                initial_set = bisect_set.bisect_set_or(
                  initial_set,
                  child_initial_set
diff --git a/bison_lr1dfa.py b/bison_lr1dfa.py

index 253b81f..8130764 100644 (file)
--- a/bison_lr1dfa.py
+++ b/bison_lr1dfa.py
@@ -66,7 +66,7 @@ class BisonLR1DFA:
        numpy.int16
      )
      new_action_table[:, translate_terminals] = action_table
-    action_table = new_action_table 
+    action_table = new_action_table
      new_goto_table = numpy.zeros(
        (n_nonterminals, len(lr1dfa.states)),
        numpy.int16
@@ -218,7 +218,7 @@ class BisonLR1DFA:
              )
              new_entry_used[:entry_size, :] = entry_used[:entry_size, :]
              entry_used = new_entry_used
-   
+
            # find a suitable spot and store differences from default
            start_index = self.entry_base - indices[0]
            while start_index < entry_size:
@@ -366,7 +366,7 @@ def generate(pyacc, skel_file, out_file):
                  if i.character_set[0] >= 0x20 else
                    '\\\\x{0:02x}'.format(i.character_set[0])
                  )
-              ) 
+              )
                for i in pyacc.terminal_symbols
              ] +
              ['"{0:s}"'.format(i.name) for i in pyacc.nonterminal_symbols] +
diff --git a/lr1.py b/lr1.py

index 1e107ad..2e5dc67 100644 (file)
--- a/lr1.py
+++ b/lr1.py
@@ -12,17 +12,16 @@ class LR1:
    def __init__(
      self,
      productions = [],
+    precedences = ([], [], [], []),
      n_terminals = n_characters + 1,
      eof_terminal = n_characters
    ):
      # productions: list of production
      # production: (
-    #   priority,
      #   symbols,
      #   lookaheads,
      #   ref_data
      # )
-    # priority: bit 0 = right to left, bits 1: = numeric priority
      # symbols: list of symbol_desc
      # symbol_desc: (terminal_set, nonterminal_set)
      # terminal_set: similar to character_set, even length list of pairs of breaks
@@ -42,10 +41,14 @@ class LR1:
      #   where start_index, end_index are indices into list of character_set,
      #   and tag, kwargs will be passed to apply_markup() hence factory(),
      #   noting that markup has to be applied in reverse order of the list
+    # precedences:
+    #   (terminal_breaks, terminal_precs, nonterminal_breaks, nonterminal_precs)
+    #   similar to action/goto tables but *_precs is list of (prec, assoc) pairs
      # n_terminals: offset to apply to productions[] index to get symbol
      #   (character set code), also symbol for productions[0] = start production
      # eof_terminal: usually == n_terminals - 1 (must be valid terminal value)
      self.productions = productions
+    self.precedences = precedences
      self.n_terminals = n_terminals
      self.eof_terminal = eof_terminal
  
@@ -58,7 +61,7 @@ class LR1:
        i = queue[qhead]
        in_queue[i] = False
        j, k, lookahead_set = items[i]
-      _, symbols, lookaheads, _ = self.productions[j]
+      symbols, lookaheads, _ = self.productions[j]
        if k < len(symbols):
          _, nonterminal_set = symbols[k]
          if len(nonterminal_set):
@@ -82,7 +85,7 @@ class LR1:
                    if not in_queue[n]:
                      # optimization: do not re-queue unless it is transparent
                      # to changes in the lookahead set wrt. further propagation
-                    _, child_symbols, child_lookaheads, _ = self.productions[m]
+                    child_symbols, child_lookaheads, _ = self.productions[m]
                      if len(child_symbols) and child_lookaheads[1][1]:
                        in_queue[n] = True
                        queue.append(n)
@@ -101,7 +104,7 @@ class LR1:
      terminal0 = 0
      terminal1 = self.n_terminals
      for i, j, lookahead_set in items:
-      _, symbols, _, _ = self.productions[i]
+      symbols, _, _ = self.productions[i]
        if j < len(symbols):
          terminal_set, _ = symbols[j]
          k = bisect.bisect_right(terminal_set, terminal)
@@ -129,7 +132,7 @@ class LR1:
      nonterminal0 = 0
      nonterminal1 = len(self.productions)
      for i, j, lookahead_set in items:
-      _, symbols, _, _ = self.productions[i]
+      symbols, _, _ = self.productions[i]
        if j < len(symbols):
          _, nonterminal_set = symbols[j]
          k = bisect.bisect_right(nonterminal_set, nonterminal)
@@ -173,7 +176,7 @@ class LR1:
    #          )
    #        )
    #      reduce = min(reductions)
-  #      _, symbols, _, ref_data = self.productions[reduce]
+  #      symbols, _, ref_data = self.productions[reduce]
    #      base = len(value_stack) - len(symbols) - 1
    #      for j in range(len(ref_data) - 1, -1, -1):
    #        k, l, tag, _ = ref_data[j]
@@ -259,7 +262,7 @@ class LR1:
    #          )
    #        )
    #      reduce = min(reductions)
-  #      _, symbols, _, ref_data = self.productions[reduce]
+  #      symbols, _, ref_data = self.productions[reduce]
    #      base = len(value_stack) - len(symbols) - 1
    #      end_relative = len(value_stack)
    #      for j in range(len(ref_data) - 1, -1, -1):
@@ -310,7 +313,7 @@ class LR1:
        [],
        [
          (len(symbols), ref_data)
-        for _, symbols, _, ref_data in self.productions
+        for symbols, _, ref_data in self.productions
        ],
        self.n_terminals,
        self.eof_terminal
@@ -347,25 +350,53 @@ class LR1:
            self.lookahead_item_set_action(items, terminal)
          )
          assert terminal0 == terminal and terminal1 > terminal
-        if len(next_items) != 0:
-          if len(reductions) != 0:
-            sys.stderr.write(
-              'state {0:d} shift/reduce conflict: {1:s} vs {2:s}\n'.format(
-                len(_lr1dfa.states),
-                ','.join([str(i) for i, _, _ in next_items]),
-                ','.join([str(i) for i in reductions])
-              )
-            )
-          action = add_state(next_items, next_item_to_index) * 2
-        elif len(reductions) != 0:
+        if len(reductions) != 0:
            if len(reductions) != 1:
              sys.stderr.write(
                'state {0:d} reduce/reduce conflict: {1:s}\n'.format(
                  len(_lr1dfa.states),
-                ','.join([str(i) for i in reductions])
+                ', '.join([str(i) for i in sorted(reductions)])
                )
              )
-          action = min(reductions) * 2 + 1 # odd means reduce
+          reduction = min(reductions)
+          if len(next_items) != 0:
+            j = bisect.bisect_right(self.precedences[0], terminal)
+            if j > 0 and terminal0 < self.precedences[0][j - 1]:
+              terminal0 = self.precedences[0][j - 1]
+            assert j < len(self.precedences[0])
+            if terminal1 > self.precedences[0][j]:
+              terminal1 = self.precedences[0][j]
+            shift_precedence, shift_associativity = self.precedences[1][j]
+            reduce_precedence, reduce_associativity = self.precedences[3][
+              bisect.bisect_right(self.precedences[2], reduction)
+            ]
+            if shift_precedence == -1 or reduce_precedence == -1:
+              sys.stderr.write(
+                'state {0:d} shift/reduce conflict: {1:d} vs {2:d}\n'.format(
+                  len(_lr1dfa.states),
+                  terminal,
+                  reduction
+                )
+              )
+              action = add_state(next_items, next_item_to_index) * 2
+            elif shift_precedence > reduce_precedence:
+              action = add_state(next_items, next_item_to_index) * 2
+            elif shift_precedence < reduce_precedence:
+              action = reduction * 2 + 1 # odd means reduce
+            else:
+              assert shift_associativity == reduce_associativity
+              if shift_associativity == -1: # precedence
+                assert False
+              elif shift_associativity == 0: # right
+                action = add_state(next_items, next_item_to_index) * 2
+              elif shift_associativity == 1: # left
+                action = reduction * 2 + 1 # odd means reduce
+              else: # nonassoc
+                action = -1
+          else:
+            action = reduction * 2 + 1 # odd means reduce
+        elif len(next_items) != 0:
+          action = add_state(next_items, next_item_to_index) * 2
          else:
            action = -1
          state_desc[0].append(terminal1)
@@ -394,7 +425,7 @@ class LR1:
        [],
        [
          (len(symbols), ref_data)
-        for _, symbols, _, ref_data in self.productions
+        for symbols, _, ref_data in self.productions
        ],
        self.n_terminals,
        self.eof_terminal
@@ -448,25 +479,53 @@ class LR1:
            self.lookahead_item_set_action(items, terminal)
          )
          assert terminal0 == terminal and terminal1 > terminal
-        if len(next_items) != 0:
-          if len(reductions) != 0:
-            sys.stderr.write(
-              'state {0:d} shift/reduce conflict: {1:s} vs {2:s}\n'.format(
-                i,
-                ','.join([str(j) for j, _, _ in next_items]),
-                ','.join([str(j) for j in reductions])
-              )
-            )
-          action = add_state(next_items, next_item_to_index) * 2
-        elif len(reductions) != 0:
+        if len(reductions) != 0:
            if len(reductions) != 1:
              sys.stderr.write(
                'state {0:d} reduce/reduce conflict: {1:s}\n'.format(
-                i,
-                ','.join([str(j) for j in reductions])
+                len(_lr1dfa.states),
+                ', '.join([str(i) for i in sorted(reductions)])
                )
              )
-          action = min(reductions) * 2 + 1 # odd means reduce
+          reduction = min(reductions)
+          if len(next_items) != 0:
+            j = bisect.bisect_right(self.precedences[0], terminal)
+            if j > 0 and terminal0 < self.precedences[0][j - 1]:
+              terminal0 = self.precedences[0][j - 1]
+            assert j < len(self.precedences[0])
+            if terminal1 > self.precedences[0][j]:
+              terminal1 = self.precedences[0][j]
+            shift_precedence, shift_associativity = self.precedences[1][j]
+            reduce_precedence, reduce_associativity = self.precedences[3][
+              bisect.bisect_right(self.precedences[2], reduction)
+            ]
+            if shift_precedence == -1 or reduce_precedence == -1:
+              sys.stderr.write(
+                'state {0:d} shift/reduce conflict: {1:d} vs {2:d}\n'.format(
+                  len(_lr1dfa.states),
+                  terminal,
+                  reduction
+                )
+              )
+              action = add_state(next_items, next_item_to_index) * 2
+            elif shift_precedence > reduce_precedence:
+              action = add_state(next_items, next_item_to_index) * 2
+            elif shift_precedence < reduce_precedence:
+              action = reduction * 2 + 1 # odd means reduce
+            else:
+              assert shift_associativity == reduce_associativity
+              if shift_associativity == -1: # precedence
+                assert False
+              elif shift_associativity == 0: # right
+                action = add_state(next_items, next_item_to_index) * 2
+              elif shift_associativity == 1: # left
+                action = reduction * 2 + 1 # odd means reduce
+              else: # nonassoc
+                action = -1
+          else:
+            action = reduction * 2 + 1 # odd means reduce
+        elif len(next_items) != 0:
+          action = add_state(next_items, next_item_to_index) * 2
          else:
            action = -1
          state_desc[0].append(terminal1)
@@ -495,8 +554,9 @@ class LR1:
      return _lr1dfa
  
    def __repr__(self):
-    return 'lr1.LR1({0:s}, {1:d}, {2:d})'.format(
+    return 'lr1.LR1({0:s}, {1:s}, {2:d}, {3:d})'.format(
        repr(self.productions),
+      repr(self.precedences),
        self.n_terminals,
        self.eof_terminal
      )
author	Nick Downing <downing.nick@gmail.com>
	Wed, 18 Jul 2018 23:43:31 +0000 (09:43 +1000)
committer	Nick Downing <downing.nick@gmail.com>
	Wed, 18 Jul 2018 23:43:31 +0000 (09:43 +1000)
ast.py		patch \| blob \| history
bison_lr1dfa.py		patch \| blob \| history
lr1.py		patch \| blob \| history