Tidy up handling of associativities by adding an extra table which is indexed by...
authorNick Downing <downing.nick@gmail.com>
Thu, 19 Jul 2018 00:05:43 +0000 (10:05 +1000)
committerNick Downing <downing.nick@gmail.com>
Thu, 19 Jul 2018 00:05:43 +0000 (10:05 +1000)
ast.py
lr1.py

diff --git a/ast.py b/ast.py
index 0fcc13c..181f3ab 100644 (file)
--- a/ast.py
+++ b/ast.py
@@ -41,7 +41,7 @@ class Item(element.Element):
 class PYACC(element.Element):
   # internal classes
   class Symbol(element.Element):
-    # GENERATE ELEMENT(str name, list(int) character_set, int precedence, int associativity) BEGIN
+    # GENERATE ELEMENT(str name, list(int) character_set, int precedence) BEGIN
     def __init__(
       self,
       tag = 'PYACC_Symbol',
@@ -50,8 +50,7 @@ class PYACC(element.Element):
       children = [],
       name = '',
       character_set = [],
-      precedence = -1,
-      associativity = -1
+      precedence = -1
     ):
       element.Element.__init__(
         self,
@@ -71,11 +70,6 @@ class PYACC(element.Element):
       if isinstance(precedence, str) else
         precedence
       )
-      self.associativity = (
-        element.deserialize_int(associativity)
-      if isinstance(associativity, str) else
-        associativity
-      )
     def serialize(self, ref_list, indent = 0):
       element.Element.serialize(self, ref_list, indent)
       self.set('name', element.serialize_str(self.name))
@@ -84,7 +78,6 @@ class PYACC(element.Element):
         ' '.join([element.serialize_int(i) for i in self.character_set])
       )
       self.set('precedence', element.serialize_int(self.precedence))
-      self.set('associativity', element.serialize_int(self.associativity))
     def deserialize(self, ref_list):
       element.Element.deserialize(self, ref_list)
       self.name = element.deserialize_str(self.get('name', ''))
@@ -93,7 +86,6 @@ class PYACC(element.Element):
         for i in self.get('character_set', '').split()
       ]
       self.precedence = element.deserialize_int(self.get('precedence', '-1'))
-      self.associativity = element.deserialize_int(self.get('associativity', '-1'))
     def copy(self, factory = None):
       result = element.Element.copy(
         self,
@@ -102,7 +94,6 @@ class PYACC(element.Element):
       result.name = self.name
       result.character_set = self.character_set
       result.precedence = self.precedence
-      result.associativity = self.associativity
       return result
     def repr_serialize(self, params):
       element.Element.repr_serialize(self, params)
@@ -120,10 +111,6 @@ class PYACC(element.Element):
         params.append(
           'precedence = {0:s}'.format(repr(self.precedence))
         )
-      if self.associativity != -1:
-        params.append(
-          'associativity = {0:s}'.format(repr(self.associativity))
-        )
     def __repr__(self):
       params = []
       self.repr_serialize(params)
@@ -568,7 +555,6 @@ class PYACC(element.Element):
           character_to_symbol,
           name_to_symbol,
           -1, # precedence
-          -1, # associativity
           None # tag
         )
         assert production.precedence_terminal == -1
@@ -785,12 +771,9 @@ class PYACC(element.Element):
         )
       )
       precedence = (
-        (-1, -1)
+        -1
       if self.precedence_terminal == -1 else
-        (
-          pyacc.terminal_symbols[self.precedence_terminal].precedence,
-          pyacc.terminal_symbols[self.precedence_terminal].associativity
-        )
+        pyacc.terminal_symbols[self.precedence_terminal].precedence
       )
       if len(_lr1.precedences[3]) and _lr1.precedences[3][-1] == precedence:
         _lr1.precedences[2][-1] = len(_lr1.productions)
@@ -1000,7 +983,6 @@ class PYACC(element.Element):
       character_to_symbol,
       name_to_symbol,
       precedence,
-      associativity,
       tag
     ):
       raise NotImplementedException
@@ -1058,7 +1040,6 @@ class PYACC(element.Element):
       character_to_symbol,
       name_to_symbol,
       precedence,
-      associativity,
       tag
     ):
       return self
@@ -1129,7 +1110,6 @@ class PYACC(element.Element):
       character_to_symbol,
       name_to_symbol,
       precedence,
-      associativity,
       tag
     ):
       if isinstance(self[0], PYACC.Char):
@@ -1165,9 +1145,6 @@ class PYACC(element.Element):
       if precedence != -1:
         assert pyacc.terminal_symbols[self.terminal].precedence == -1
         pyacc.terminal_symbols[self.terminal].precedence = precedence
-      if associativity != -1:
-        assert pyacc.terminal_symbols[self.terminal].associativity == -1
-        pyacc.terminal_symbols[self.terminal].associativity = associativity
       return tag
 
   class NonterminalRef(TagOrSymbolRef):
@@ -1236,7 +1213,6 @@ class PYACC(element.Element):
       character_to_symbol,
       name_to_symbol,
       precedence,
-      associativity,
       tag
     ):
       if isinstance(self[0], PYACC.ID):
@@ -1253,6 +1229,8 @@ class PYACC(element.Element):
           )
       else:
         assert False
+      assert self.user_token == -1
+      assert precedence == -1
       return tag
 
   class Text(element.Element):
@@ -1454,10 +1432,9 @@ class PYACC(element.Element):
           section,
           character_to_symbol,
           name_to_symbol,
-          pyacc.precedences, # precedence
-          self.type # associativity
+          len(pyacc.associativities) # precedence
         )
-        pyacc.precedences += 1
+        pyacc.associativities.append(self.type)
 
     class Start(Item):
       # GENERATE ELEMENT() BEGIN
@@ -1498,8 +1475,7 @@ class PYACC(element.Element):
           section,
           character_to_symbol,
           name_to_symbol,
-          -1, # precedence
-          -1 # associativity
+          -1 # precedence
         )
         pyacc.start_nonterminal = self[0].nonterminal
 
@@ -1536,8 +1512,7 @@ class PYACC(element.Element):
         section,
         character_to_symbol,
         name_to_symbol,
-        precedence,
-        associativity
+        precedence
       ):
         tag = None
         for i in self:
@@ -1547,7 +1522,6 @@ class PYACC(element.Element):
             character_to_symbol,
             name_to_symbol,
             precedence,
-            associativity,
             tag
           )
 
@@ -1590,8 +1564,7 @@ class PYACC(element.Element):
           section,
           character_to_symbol,
           name_to_symbol,
-          -1, # precedence
-          -1 # associativity
+          -1 # precedence
         )
 
     class Type(Item):
@@ -2328,7 +2301,6 @@ class PYACC(element.Element):
           character_to_symbol,
           name_to_symbol,
           -1, # precedence
-          -1, # associativity
           None # tag
         )
         if pyacc.first_nonterminal == -1:
@@ -2460,7 +2432,7 @@ class PYACC(element.Element):
       return 'ast.PYACC.ValueReference({0:s})'.format(', '.join(params))
     # GENERATE END
 
-  # GENERATE ELEMENT(list(ref) prologue_text, int precedences, list(ref) terminal_symbols, list(ref) nonterminal_symbols, list(ref) productions, int first_nonterminal, int start_nonterminal) BEGIN
+  # GENERATE ELEMENT(list(ref) prologue_text, list(ref) terminal_symbols, list(ref) nonterminal_symbols, list(ref) productions, int first_nonterminal, int start_nonterminal, list(int) associativities) BEGIN
   def __init__(
     self,
     tag = 'PYACC',
@@ -2468,12 +2440,12 @@ class PYACC(element.Element):
     text = '',
     children = [],
     prologue_text = [],
-    precedences = -1,
     terminal_symbols = [],
     nonterminal_symbols = [],
     productions = [],
     first_nonterminal = -1,
-    start_nonterminal = -1
+    start_nonterminal = -1,
+    associativities = []
   ):
     element.Element.__init__(
       self,
@@ -2483,11 +2455,6 @@ class PYACC(element.Element):
       children
     )
     self.prologue_text = prologue_text
-    self.precedences = (
-      element.deserialize_int(precedences)
-    if isinstance(precedences, str) else
-      precedences
-    )
     self.terminal_symbols = terminal_symbols
     self.nonterminal_symbols = nonterminal_symbols
     self.productions = productions
@@ -2501,13 +2468,17 @@ class PYACC(element.Element):
     if isinstance(start_nonterminal, str) else
       start_nonterminal
     )
+    self.associativities = (
+      [element.deserialize_int(i) for i in associativities.split()]
+    if isinstance(associativities, str) else
+      associativities
+    )
   def serialize(self, ref_list, indent = 0):
     element.Element.serialize(self, ref_list, indent)
     self.set(
       'prologue_text',
       ' '.join([element.serialize_ref(i, ref_list) for i in self.prologue_text])
     )
-    self.set('precedences', element.serialize_int(self.precedences))
     self.set(
       'terminal_symbols',
       ' '.join([element.serialize_ref(i, ref_list) for i in self.terminal_symbols])
@@ -2522,13 +2493,16 @@ class PYACC(element.Element):
     )
     self.set('first_nonterminal', element.serialize_int(self.first_nonterminal))
     self.set('start_nonterminal', element.serialize_int(self.start_nonterminal))
+    self.set(
+      'associativities',
+      ' '.join([element.serialize_int(i) for i in self.associativities])
+    )
   def deserialize(self, ref_list):
     element.Element.deserialize(self, ref_list)
     self.prologue_text = [
       element.deserialize_ref(i, ref_list)
       for i in self.get('prologue_text', '').split()
     ]
-    self.precedences = element.deserialize_int(self.get('precedences', '-1'))
     self.terminal_symbols = [
       element.deserialize_ref(i, ref_list)
       for i in self.get('terminal_symbols', '').split()
@@ -2543,18 +2517,22 @@ class PYACC(element.Element):
     ]
     self.first_nonterminal = element.deserialize_int(self.get('first_nonterminal', '-1'))
     self.start_nonterminal = element.deserialize_int(self.get('start_nonterminal', '-1'))
+    self.associativities = [
+      element.deserialize_int(i)
+      for i in self.get('associativities', '').split()
+    ]
   def copy(self, factory = None):
     result = element.Element.copy(
       self,
       PYACC if factory is None else factory
     )
     result.prologue_text = self.prologue_text
-    result.precedences = self.precedences
     result.terminal_symbols = self.terminal_symbols
     result.nonterminal_symbols = self.nonterminal_symbols
     result.productions = self.productions
     result.first_nonterminal = self.first_nonterminal
     result.start_nonterminal = self.start_nonterminal
+    result.associativities = self.associativities
     return result
   def repr_serialize(self, params):
     element.Element.repr_serialize(self, params)
@@ -2564,10 +2542,6 @@ class PYACC(element.Element):
           ', '.join([repr(i) for i in self.prologue_text])
         )
       )
-    if self.precedences != -1:
-      params.append(
-        'precedences = {0:s}'.format(repr(self.precedences))
-      )
     if len(self.terminal_symbols):
       params.append(
         'terminal_symbols = [{0:s}]'.format(
@@ -2594,6 +2568,12 @@ class PYACC(element.Element):
       params.append(
         'start_nonterminal = {0:s}'.format(repr(self.start_nonterminal))
       )
+    if len(self.associativities):
+      params.append(
+        'associativities = [{0:s}]'.format(
+          ', '.join([repr(i) for i in self.associativities])
+        )
+      )
   def __repr__(self):
     params = []
     self.repr_serialize(params)
@@ -2656,7 +2636,10 @@ class PYACC(element.Element):
         )
       ],
       # precedences
-      ([], [], [1], [(-1, -1)]),
+      # (terminal_breaks, terminal_prec, nonterminal_breaks, nonterminal_prec)
+      ([], [], [1], [-1]),
+      # associativities (indexed by *_prec value)
+      self.associativities,
       # n_terminals
       max([0] + [i.character_set[-1] for i in self.terminal_symbols]),
       # eof_terminal
@@ -2667,14 +2650,15 @@ class PYACC(element.Element):
     character0 = 0
     for character1, _, precedence in sorted(
       [
-        j
+        k
         for i in self.terminal_symbols
-        for j in [
-          (i.character_set[0], True, (-1, -1)),
-          (i.character_set[1], False, (i.precedence, i.associativity))
+        for j in range(0, len(i.character_set), 2)
+        for k in [
+          (i.character_set[j], True, -1),
+          (i.character_set[j + 1], False, i.precedence)
         ]
       ] +
-      [(_lr1.n_terminals, True, (-1, -1))]
+      [(_lr1.n_terminals, True, -1)]
     ):
       if character1 > character0:
         if len(_lr1.precedences[1]) and _lr1.precedences[1][-1] == precedence:
diff --git a/lr1.py b/lr1.py
index 2e5dc67..7fba214 100644 (file)
--- a/lr1.py
+++ b/lr1.py
@@ -9,10 +9,15 @@ import sys
 n_characters = 0x100
 
 class LR1:
+  ASSOCIATIVITY_RIGHT = 0
+  ASSOCIATIVITY_LEFT = 1
+  ASSOCIATIVITY_NON = 2
+
   def __init__(
     self,
     productions = [],
     precedences = ([], [], [], []),
+    associativities = [],
     n_terminals = n_characters + 1,
     eof_terminal = n_characters
   ):
@@ -41,14 +46,17 @@ class LR1:
     #   where start_index, end_index are indices into list of character_set,
     #   and tag, kwargs will be passed to apply_markup() hence factory(),
     #   noting that markup has to be applied in reverse order of the list
-    # precedences:
-    #   (terminal_breaks, terminal_precs, nonterminal_breaks, nonterminal_precs)
-    #   similar to action/goto tables but *_precs is list of (prec, assoc) pairs
+    # precedences: (terminal_breaks, terminal_precs, nonterminal_breaks,
+    #   nonterminal_precs) encoded similarly to the action and goto tables
+    # associativities: indexed by value from (non)terminal_precs, contains
+    #   -1 for not specified (equal precedence is a compile time error) or an
+    #   ASSOCIATIVITY_* value (NON means equal precedence is run time error)
     # n_terminals: offset to apply to productions[] index to get symbol
     #   (character set code), also symbol for productions[0] = start production
     # eof_terminal: usually == n_terminals - 1 (must be valid terminal value)
     self.productions = productions
     self.precedences = precedences
+    self.associativities = associativities
     self.n_terminals = n_terminals
     self.eof_terminal = eof_terminal
 
@@ -366,8 +374,8 @@ class LR1:
             assert j < len(self.precedences[0])
             if terminal1 > self.precedences[0][j]:
               terminal1 = self.precedences[0][j]
-            shift_precedence, shift_associativity = self.precedences[1][j]
-            reduce_precedence, reduce_associativity = self.precedences[3][
+            shift_precedence = self.precedences[1][j]
+            reduce_precedence = self.precedences[3][
               bisect.bisect_right(self.precedences[2], reduction)
             ]
             if shift_precedence == -1 or reduce_precedence == -1:
@@ -378,25 +386,25 @@ class LR1:
                   reduction
                 )
               )
-              action = add_state(next_items, next_item_to_index) * 2
+              action = add_state(next_items, next_item_to_index) * 2 # shift
             elif shift_precedence > reduce_precedence:
-              action = add_state(next_items, next_item_to_index) * 2
+              action = add_state(next_items, next_item_to_index) * 2 # shift
             elif shift_precedence < reduce_precedence:
-              action = reduction * 2 + 1 # odd means reduce
+              action = reduction * 2 + 1 # reduce
             else:
-              assert shift_associativity == reduce_associativity
-              if shift_associativity == -1: # precedence
-                assert False
-              elif shift_associativity == 0: # right
-                action = add_state(next_items, next_item_to_index) * 2
-              elif shift_associativity == 1: # left
-                action = reduction * 2 + 1 # odd means reduce
-              else: # nonassoc
+              associativity = self.associativities[shift_precedence]
+              if associativity == LR1.ASSOCIATIVITY_RIGHT:
+                action = add_state(next_items, next_item_to_index) * 2 # shift
+              elif associativity == LR1.ASSOCIATIVITY_LEFT:
+                action = reduction * 2 + 1 # reduce
+              elif associativity == LR1.ASSOCIATIVITY_NON: # run time error
                 action = -1
+              else:
+                assert False # not specified (compile time error)
           else:
-            action = reduction * 2 + 1 # odd means reduce
+            action = reduction * 2 + 1 # reduce
         elif len(next_items) != 0:
-          action = add_state(next_items, next_item_to_index) * 2
+          action = add_state(next_items, next_item_to_index) * 2 # shift
         else:
           action = -1
         state_desc[0].append(terminal1)
@@ -495,8 +503,8 @@ class LR1:
             assert j < len(self.precedences[0])
             if terminal1 > self.precedences[0][j]:
               terminal1 = self.precedences[0][j]
-            shift_precedence, shift_associativity = self.precedences[1][j]
-            reduce_precedence, reduce_associativity = self.precedences[3][
+            shift_precedence = self.precedences[1][j]
+            reduce_precedence = self.precedences[3][
               bisect.bisect_right(self.precedences[2], reduction)
             ]
             if shift_precedence == -1 or reduce_precedence == -1:
@@ -507,25 +515,25 @@ class LR1:
                   reduction
                 )
               )
-              action = add_state(next_items, next_item_to_index) * 2
+              action = add_state(next_items, next_item_to_index) * 2 # shift
             elif shift_precedence > reduce_precedence:
-              action = add_state(next_items, next_item_to_index) * 2
+              action = add_state(next_items, next_item_to_index) * 2 # shift
             elif shift_precedence < reduce_precedence:
-              action = reduction * 2 + 1 # odd means reduce
+              action = reduction * 2 + 1 # reduce
             else:
-              assert shift_associativity == reduce_associativity
-              if shift_associativity == -1: # precedence
-                assert False
-              elif shift_associativity == 0: # right
-                action = add_state(next_items, next_item_to_index) * 2
-              elif shift_associativity == 1: # left
-                action = reduction * 2 + 1 # odd means reduce
-              else: # nonassoc
+              associativity = self.associativities[shift_precedence]
+              if associativity == LR1.ASSOCIATIVITY_RIGHT:
+                action = add_state(next_items, next_item_to_index) * 2 # shift
+              elif associativity == LR1.ASSOCIATIVITY_LEFT:
+                action = reduction * 2 + 1 # reduce
+              elif associativity == LR1.ASSOCIATIVITY_NON: # run time error
                 action = -1
+              else:
+                assert False # not specified (compile time error)
           else:
-            action = reduction * 2 + 1 # odd means reduce
+            action = reduction * 2 + 1 # reduce
         elif len(next_items) != 0:
-          action = add_state(next_items, next_item_to_index) * 2
+          action = add_state(next_items, next_item_to_index) * 2 # shift
         else:
           action = -1
         state_desc[0].append(terminal1)
@@ -554,9 +562,10 @@ class LR1:
     return _lr1dfa
 
   def __repr__(self):
-    return 'lr1.LR1({0:s}, {1:s}, {2:d}, {3:d})'.format(
+    return 'lr1.LR1({0:s}, {1:s}, {2:s}, {3:d}, {4:d})'.format(
       repr(self.productions),
       repr(self.precedences),
+      repr(self.associativities),
       self.n_terminals,
       self.eof_terminal
     )