Tidy up handling of associativities by adding an extra table which is indexed by...

author Nick Downing <downing.nick@gmail.com>

Thu, 19 Jul 2018 00:05:43 +0000 (10:05 +1000)

committer Nick Downing <downing.nick@gmail.com>

Thu, 19 Jul 2018 00:05:43 +0000 (10:05 +1000)
author Nick Downing <downing.nick@gmail.com>
Thu, 19 Jul 2018 00:05:43 +0000 (10:05 +1000)
committer Nick Downing <downing.nick@gmail.com>
Thu, 19 Jul 2018 00:05:43 +0000 (10:05 +1000)
diff --git a/ast.py b/ast.py

index 0fcc13c..181f3ab 100644 (file)
--- a/ast.py
+++ b/ast.py
@@ -41,7 +41,7 @@ class Item(element.Element):
  class PYACC(element.Element):
    # internal classes
    class Symbol(element.Element):
-    # GENERATE ELEMENT(str name, list(int) character_set, int precedence, int associativity) BEGIN
+    # GENERATE ELEMENT(str name, list(int) character_set, int precedence) BEGIN
      def __init__(
        self,
        tag = 'PYACC_Symbol',
@@ -50,8 +50,7 @@ class PYACC(element.Element):
        children = [],
        name = '',
        character_set = [],
-      precedence = -1,
-      associativity = -1
+      precedence = -1
      ):
        element.Element.__init__(
          self,
@@ -71,11 +70,6 @@ class PYACC(element.Element):
        if isinstance(precedence, str) else
          precedence
        )
-      self.associativity = (
-        element.deserialize_int(associativity)
-      if isinstance(associativity, str) else
-        associativity
-      )
      def serialize(self, ref_list, indent = 0):
        element.Element.serialize(self, ref_list, indent)
        self.set('name', element.serialize_str(self.name))
@@ -84,7 +78,6 @@ class PYACC(element.Element):
          ' '.join([element.serialize_int(i) for i in self.character_set])
        )
        self.set('precedence', element.serialize_int(self.precedence))
-      self.set('associativity', element.serialize_int(self.associativity))
      def deserialize(self, ref_list):
        element.Element.deserialize(self, ref_list)
        self.name = element.deserialize_str(self.get('name', ''))
@@ -93,7 +86,6 @@ class PYACC(element.Element):
          for i in self.get('character_set', '').split()
        ]
        self.precedence = element.deserialize_int(self.get('precedence', '-1'))
-      self.associativity = element.deserialize_int(self.get('associativity', '-1'))
      def copy(self, factory = None):
        result = element.Element.copy(
          self,
@@ -102,7 +94,6 @@ class PYACC(element.Element):
        result.name = self.name
        result.character_set = self.character_set
        result.precedence = self.precedence
-      result.associativity = self.associativity
        return result
      def repr_serialize(self, params):
        element.Element.repr_serialize(self, params)
@@ -120,10 +111,6 @@ class PYACC(element.Element):
          params.append(
            'precedence = {0:s}'.format(repr(self.precedence))
          )
-      if self.associativity != -1:
-        params.append(
-          'associativity = {0:s}'.format(repr(self.associativity))
-        )
      def __repr__(self):
        params = []
        self.repr_serialize(params)
@@ -568,7 +555,6 @@ class PYACC(element.Element):
            character_to_symbol,
            name_to_symbol,
            -1, # precedence
-          -1, # associativity
            None # tag
          )
          assert production.precedence_terminal == -1
@@ -785,12 +771,9 @@ class PYACC(element.Element):
          )
        )
        precedence = (
-        (-1, -1)
+        -1
        if self.precedence_terminal == -1 else
-        (
-          pyacc.terminal_symbols[self.precedence_terminal].precedence,
-          pyacc.terminal_symbols[self.precedence_terminal].associativity
-        )
+        pyacc.terminal_symbols[self.precedence_terminal].precedence
        )
        if len(_lr1.precedences[3]) and _lr1.precedences[3][-1] == precedence:
          _lr1.precedences[2][-1] = len(_lr1.productions)
@@ -1000,7 +983,6 @@ class PYACC(element.Element):
        character_to_symbol,
        name_to_symbol,
        precedence,
-      associativity,
        tag
      ):
        raise NotImplementedException
@@ -1058,7 +1040,6 @@ class PYACC(element.Element):
        character_to_symbol,
        name_to_symbol,
        precedence,
-      associativity,
        tag
      ):
        return self
@@ -1129,7 +1110,6 @@ class PYACC(element.Element):
        character_to_symbol,
        name_to_symbol,
        precedence,
-      associativity,
        tag
      ):
        if isinstance(self[0], PYACC.Char):
@@ -1165,9 +1145,6 @@ class PYACC(element.Element):
        if precedence != -1:
          assert pyacc.terminal_symbols[self.terminal].precedence == -1
          pyacc.terminal_symbols[self.terminal].precedence = precedence
-      if associativity != -1:
-        assert pyacc.terminal_symbols[self.terminal].associativity == -1
-        pyacc.terminal_symbols[self.terminal].associativity = associativity
        return tag
  
    class NonterminalRef(TagOrSymbolRef):
@@ -1236,7 +1213,6 @@ class PYACC(element.Element):
        character_to_symbol,
        name_to_symbol,
        precedence,
-      associativity,
        tag
      ):
        if isinstance(self[0], PYACC.ID):
@@ -1253,6 +1229,8 @@ class PYACC(element.Element):
            )
        else:
          assert False
+      assert self.user_token == -1
+      assert precedence == -1
        return tag
  
    class Text(element.Element):
@@ -1454,10 +1432,9 @@ class PYACC(element.Element):
            section,
            character_to_symbol,
            name_to_symbol,
-          pyacc.precedences, # precedence
-          self.type # associativity
+          len(pyacc.associativities) # precedence
          )
-        pyacc.precedences += 1
+        pyacc.associativities.append(self.type)
  
      class Start(Item):
        # GENERATE ELEMENT() BEGIN
@@ -1498,8 +1475,7 @@ class PYACC(element.Element):
            section,
            character_to_symbol,
            name_to_symbol,
-          -1, # precedence
-          -1 # associativity
+          -1 # precedence
          )
          pyacc.start_nonterminal = self[0].nonterminal
  
@@ -1536,8 +1512,7 @@ class PYACC(element.Element):
          section,
          character_to_symbol,
          name_to_symbol,
-        precedence,
-        associativity
+        precedence
        ):
          tag = None
          for i in self:
@@ -1547,7 +1522,6 @@ class PYACC(element.Element):
              character_to_symbol,
              name_to_symbol,
              precedence,
-            associativity,
              tag
            )
  
@@ -1590,8 +1564,7 @@ class PYACC(element.Element):
            section,
            character_to_symbol,
            name_to_symbol,
-          -1, # precedence
-          -1 # associativity
+          -1 # precedence
          )
  
      class Type(Item):
@@ -2328,7 +2301,6 @@ class PYACC(element.Element):
            character_to_symbol,
            name_to_symbol,
            -1, # precedence
-          -1, # associativity
            None # tag
          )
          if pyacc.first_nonterminal == -1:
@@ -2460,7 +2432,7 @@ class PYACC(element.Element):
        return 'ast.PYACC.ValueReference({0:s})'.format(', '.join(params))
      # GENERATE END
  
-  # GENERATE ELEMENT(list(ref) prologue_text, int precedences, list(ref) terminal_symbols, list(ref) nonterminal_symbols, list(ref) productions, int first_nonterminal, int start_nonterminal) BEGIN
+  # GENERATE ELEMENT(list(ref) prologue_text, list(ref) terminal_symbols, list(ref) nonterminal_symbols, list(ref) productions, int first_nonterminal, int start_nonterminal, list(int) associativities) BEGIN
    def __init__(
      self,
      tag = 'PYACC',
@@ -2468,12 +2440,12 @@ class PYACC(element.Element):
      text = '',
      children = [],
      prologue_text = [],
-    precedences = -1,
      terminal_symbols = [],
      nonterminal_symbols = [],
      productions = [],
      first_nonterminal = -1,
-    start_nonterminal = -1
+    start_nonterminal = -1,
+    associativities = []
    ):
      element.Element.__init__(
        self,
@@ -2483,11 +2455,6 @@ class PYACC(element.Element):
        children
      )
      self.prologue_text = prologue_text
-    self.precedences = (
-      element.deserialize_int(precedences)
-    if isinstance(precedences, str) else
-      precedences
-    )
      self.terminal_symbols = terminal_symbols
      self.nonterminal_symbols = nonterminal_symbols
      self.productions = productions
@@ -2501,13 +2468,17 @@ class PYACC(element.Element):
      if isinstance(start_nonterminal, str) else
        start_nonterminal
      )
+    self.associativities = (
+      [element.deserialize_int(i) for i in associativities.split()]
+    if isinstance(associativities, str) else
+      associativities
+    )
    def serialize(self, ref_list, indent = 0):
      element.Element.serialize(self, ref_list, indent)
      self.set(
        'prologue_text',
        ' '.join([element.serialize_ref(i, ref_list) for i in self.prologue_text])
      )
-    self.set('precedences', element.serialize_int(self.precedences))
      self.set(
        'terminal_symbols',
        ' '.join([element.serialize_ref(i, ref_list) for i in self.terminal_symbols])
@@ -2522,13 +2493,16 @@ class PYACC(element.Element):
      )
      self.set('first_nonterminal', element.serialize_int(self.first_nonterminal))
      self.set('start_nonterminal', element.serialize_int(self.start_nonterminal))
+    self.set(
+      'associativities',
+      ' '.join([element.serialize_int(i) for i in self.associativities])
+    )
    def deserialize(self, ref_list):
      element.Element.deserialize(self, ref_list)
      self.prologue_text = [
        element.deserialize_ref(i, ref_list)
        for i in self.get('prologue_text', '').split()
      ]
-    self.precedences = element.deserialize_int(self.get('precedences', '-1'))
      self.terminal_symbols = [
        element.deserialize_ref(i, ref_list)
        for i in self.get('terminal_symbols', '').split()
@@ -2543,18 +2517,22 @@ class PYACC(element.Element):
      ]
      self.first_nonterminal = element.deserialize_int(self.get('first_nonterminal', '-1'))
      self.start_nonterminal = element.deserialize_int(self.get('start_nonterminal', '-1'))
+    self.associativities = [
+      element.deserialize_int(i)
+      for i in self.get('associativities', '').split()
+    ]
    def copy(self, factory = None):
      result = element.Element.copy(
        self,
        PYACC if factory is None else factory
      )
      result.prologue_text = self.prologue_text
-    result.precedences = self.precedences
      result.terminal_symbols = self.terminal_symbols
      result.nonterminal_symbols = self.nonterminal_symbols
      result.productions = self.productions
      result.first_nonterminal = self.first_nonterminal
      result.start_nonterminal = self.start_nonterminal
+    result.associativities = self.associativities
      return result
    def repr_serialize(self, params):
      element.Element.repr_serialize(self, params)
@@ -2564,10 +2542,6 @@ class PYACC(element.Element):
            ', '.join([repr(i) for i in self.prologue_text])
          )
        )
-    if self.precedences != -1:
-      params.append(
-        'precedences = {0:s}'.format(repr(self.precedences))
-      )
      if len(self.terminal_symbols):
        params.append(
          'terminal_symbols = [{0:s}]'.format(
@@ -2594,6 +2568,12 @@ class PYACC(element.Element):
        params.append(
          'start_nonterminal = {0:s}'.format(repr(self.start_nonterminal))
        )
+    if len(self.associativities):
+      params.append(
+        'associativities = [{0:s}]'.format(
+          ', '.join([repr(i) for i in self.associativities])
+        )
+      )
    def __repr__(self):
      params = []
      self.repr_serialize(params)
@@ -2656,7 +2636,10 @@ class PYACC(element.Element):
          )
        ],
        # precedences
-      ([], [], [1], [(-1, -1)]),
+      # (terminal_breaks, terminal_prec, nonterminal_breaks, nonterminal_prec)
+      ([], [], [1], [-1]),
+      # associativities (indexed by *_prec value)
+      self.associativities,
        # n_terminals
        max([0] + [i.character_set[-1] for i in self.terminal_symbols]),
        # eof_terminal
@@ -2667,14 +2650,15 @@ class PYACC(element.Element):
      character0 = 0
      for character1, _, precedence in sorted(
        [
-        j
+        k
          for i in self.terminal_symbols
-        for j in [
-          (i.character_set[0], True, (-1, -1)),
-          (i.character_set[1], False, (i.precedence, i.associativity))
+        for j in range(0, len(i.character_set), 2)
+        for k in [
+          (i.character_set[j], True, -1),
+          (i.character_set[j + 1], False, i.precedence)
          ]
        ] +
-      [(_lr1.n_terminals, True, (-1, -1))]
+      [(_lr1.n_terminals, True, -1)]
      ):
        if character1 > character0:
          if len(_lr1.precedences[1]) and _lr1.precedences[1][-1] == precedence:
diff --git a/lr1.py b/lr1.py

index 2e5dc67..7fba214 100644 (file)
--- a/lr1.py
+++ b/lr1.py
@@ -9,10 +9,15 @@ import sys
  n_characters = 0x100
  
  class LR1:
+  ASSOCIATIVITY_RIGHT = 0
+  ASSOCIATIVITY_LEFT = 1
+  ASSOCIATIVITY_NON = 2
+
    def __init__(
      self,
      productions = [],
      precedences = ([], [], [], []),
+    associativities = [],
      n_terminals = n_characters + 1,
      eof_terminal = n_characters
    ):
@@ -41,14 +46,17 @@ class LR1:
      #   where start_index, end_index are indices into list of character_set,
      #   and tag, kwargs will be passed to apply_markup() hence factory(),
      #   noting that markup has to be applied in reverse order of the list
-    # precedences:
-    #   (terminal_breaks, terminal_precs, nonterminal_breaks, nonterminal_precs)
-    #   similar to action/goto tables but *_precs is list of (prec, assoc) pairs
+    # precedences: (terminal_breaks, terminal_precs, nonterminal_breaks,
+    #   nonterminal_precs) encoded similarly to the action and goto tables
+    # associativities: indexed by value from (non)terminal_precs, contains
+    #   -1 for not specified (equal precedence is a compile time error) or an
+    #   ASSOCIATIVITY_* value (NON means equal precedence is run time error)
      # n_terminals: offset to apply to productions[] index to get symbol
      #   (character set code), also symbol for productions[0] = start production
      # eof_terminal: usually == n_terminals - 1 (must be valid terminal value)
      self.productions = productions
      self.precedences = precedences
+    self.associativities = associativities
      self.n_terminals = n_terminals
      self.eof_terminal = eof_terminal
  
@@ -366,8 +374,8 @@ class LR1:
              assert j < len(self.precedences[0])
              if terminal1 > self.precedences[0][j]:
                terminal1 = self.precedences[0][j]
-            shift_precedence, shift_associativity = self.precedences[1][j]
-            reduce_precedence, reduce_associativity = self.precedences[3][
+            shift_precedence = self.precedences[1][j]
+            reduce_precedence = self.precedences[3][
                bisect.bisect_right(self.precedences[2], reduction)
              ]
              if shift_precedence == -1 or reduce_precedence == -1:
@@ -378,25 +386,25 @@ class LR1:
                    reduction
                  )
                )
-              action = add_state(next_items, next_item_to_index) * 2
+              action = add_state(next_items, next_item_to_index) * 2 # shift
              elif shift_precedence > reduce_precedence:
-              action = add_state(next_items, next_item_to_index) * 2
+              action = add_state(next_items, next_item_to_index) * 2 # shift
              elif shift_precedence < reduce_precedence:
-              action = reduction * 2 + 1 # odd means reduce
+              action = reduction * 2 + 1 # reduce
              else:
-              assert shift_associativity == reduce_associativity
-              if shift_associativity == -1: # precedence
-                assert False
-              elif shift_associativity == 0: # right
-                action = add_state(next_items, next_item_to_index) * 2
-              elif shift_associativity == 1: # left
-                action = reduction * 2 + 1 # odd means reduce
-              else: # nonassoc
+              associativity = self.associativities[shift_precedence]
+              if associativity == LR1.ASSOCIATIVITY_RIGHT:
+                action = add_state(next_items, next_item_to_index) * 2 # shift
+              elif associativity == LR1.ASSOCIATIVITY_LEFT:
+                action = reduction * 2 + 1 # reduce
+              elif associativity == LR1.ASSOCIATIVITY_NON: # run time error
                  action = -1
+              else:
+                assert False # not specified (compile time error)
            else:
-            action = reduction * 2 + 1 # odd means reduce
+            action = reduction * 2 + 1 # reduce
          elif len(next_items) != 0:
-          action = add_state(next_items, next_item_to_index) * 2
+          action = add_state(next_items, next_item_to_index) * 2 # shift
          else:
            action = -1
          state_desc[0].append(terminal1)
@@ -495,8 +503,8 @@ class LR1:
              assert j < len(self.precedences[0])
              if terminal1 > self.precedences[0][j]:
                terminal1 = self.precedences[0][j]
-            shift_precedence, shift_associativity = self.precedences[1][j]
-            reduce_precedence, reduce_associativity = self.precedences[3][
+            shift_precedence = self.precedences[1][j]
+            reduce_precedence = self.precedences[3][
                bisect.bisect_right(self.precedences[2], reduction)
              ]
              if shift_precedence == -1 or reduce_precedence == -1:
@@ -507,25 +515,25 @@ class LR1:
                    reduction
                  )
                )
-              action = add_state(next_items, next_item_to_index) * 2
+              action = add_state(next_items, next_item_to_index) * 2 # shift
              elif shift_precedence > reduce_precedence:
-              action = add_state(next_items, next_item_to_index) * 2
+              action = add_state(next_items, next_item_to_index) * 2 # shift
              elif shift_precedence < reduce_precedence:
-              action = reduction * 2 + 1 # odd means reduce
+              action = reduction * 2 + 1 # reduce
              else:
-              assert shift_associativity == reduce_associativity
-              if shift_associativity == -1: # precedence
-                assert False
-              elif shift_associativity == 0: # right
-                action = add_state(next_items, next_item_to_index) * 2
-              elif shift_associativity == 1: # left
-                action = reduction * 2 + 1 # odd means reduce
-              else: # nonassoc
+              associativity = self.associativities[shift_precedence]
+              if associativity == LR1.ASSOCIATIVITY_RIGHT:
+                action = add_state(next_items, next_item_to_index) * 2 # shift
+              elif associativity == LR1.ASSOCIATIVITY_LEFT:
+                action = reduction * 2 + 1 # reduce
+              elif associativity == LR1.ASSOCIATIVITY_NON: # run time error
                  action = -1
+              else:
+                assert False # not specified (compile time error)
            else:
-            action = reduction * 2 + 1 # odd means reduce
+            action = reduction * 2 + 1 # reduce
          elif len(next_items) != 0:
-          action = add_state(next_items, next_item_to_index) * 2
+          action = add_state(next_items, next_item_to_index) * 2 # shift
          else:
            action = -1
          state_desc[0].append(terminal1)
@@ -554,9 +562,10 @@ class LR1:
      return _lr1dfa
  
    def __repr__(self):
-    return 'lr1.LR1({0:s}, {1:s}, {2:d}, {3:d})'.format(
+    return 'lr1.LR1({0:s}, {1:s}, {2:s}, {3:d}, {4:d})'.format(
        repr(self.productions),
        repr(self.precedences),
+      repr(self.associativities),
        self.n_terminals,
        self.eof_terminal
      )
author	Nick Downing <downing.nick@gmail.com>
	Thu, 19 Jul 2018 00:05:43 +0000 (10:05 +1000)
committer	Nick Downing <downing.nick@gmail.com>
	Thu, 19 Jul 2018 00:05:43 +0000 (10:05 +1000)