Add interactive parser ability, tidy up the Python test and skeleton slightly

author Nick Downing <downing.nick@gmail.com>

Tue, 11 Sep 2018 10:36:17 +0000 (20:36 +1000)

committer Nick Downing <downing.nick@gmail.com>

Tue, 11 Sep 2018 10:36:17 +0000 (20:36 +1000)
author Nick Downing <downing.nick@gmail.com>
Tue, 11 Sep 2018 10:36:17 +0000 (20:36 +1000)
committer Nick Downing <downing.nick@gmail.com>
Tue, 11 Sep 2018 10:36:17 +0000 (20:36 +1000)
diff --git a/lr1.py b/lr1.py

index 7fba214..c17def1 100644 (file)
--- a/lr1.py
+++ b/lr1.py
@@ -338,7 +338,10 @@ class LR1:
  
      while len(_lr1dfa.states) < len(state_to_items):
        items = state_to_items[len(_lr1dfa.states)]
-      state_desc = ([], [], [], [])
+      terminal_breaks = []
+      actions = []
+      nonterminal_breaks = []
+      gotos = []
  
        def add_state(next_items, next_item_to_index):
          self.lookahead_item_set_closure(next_items, next_item_to_index)
@@ -407,8 +410,8 @@ class LR1:
            action = add_state(next_items, next_item_to_index) * 2 # shift
          else:
            action = -1
-        state_desc[0].append(terminal1)
-        state_desc[1].append(action)
+        terminal_breaks.append(terminal1)
+        actions.append(action)
          terminal = terminal1
  
        nonterminal = 0
@@ -421,10 +424,28 @@ class LR1:
            goto = add_state(next_items, next_item_to_index)
          else:
            goto = -1
-        state_desc[2].append(nonterminal1)
-        state_desc[3].append(goto)
+        nonterminal_breaks.append(nonterminal1)
+        gotos.append(goto)
          nonterminal = nonterminal1
  
+      # test for a default reduce action, useful for interactive parsers
+      valid_actions = [i for i in actions if i != -1]
+      default_reduce = -1
+      if len(valid_actions):
+        valid_action = valid_actions[0]
+        if (
+          (valid_action & 1) and
+          all([i == valid_action for i in valid_actions[1:]])
+        ):
+          default_reduce = valid_action >> 1
+
+      state_desc = (
+        terminal_breaks,
+        actions,
+        nonterminal_breaks,
+        gotos,
+        default_reduce
+      )
        _lr1dfa.states.append(state_desc)
      return _lr1dfa
  
@@ -456,7 +477,10 @@ class LR1:
        i = queue[qhead]
        in_queue[i] = False
        items = state_to_items[i]
-      state_desc = ([], [], [], [])
+      terminal_breaks = []
+      actions = []
+      nonterminal_breaks = []
+      gotos = []
  
        def add_state(next_items, next_item_to_index):
          self.lookahead_item_set_closure(next_items, next_item_to_index)
@@ -536,8 +560,8 @@ class LR1:
            action = add_state(next_items, next_item_to_index) * 2 # shift
          else:
            action = -1
-        state_desc[0].append(terminal1)
-        state_desc[1].append(action)
+        terminal_breaks.append(terminal1)
+        actions.append(action)
          terminal = terminal1
  
        nonterminal = 0
@@ -550,10 +574,28 @@ class LR1:
            goto = add_state(next_items, next_item_to_index)
          else:
            goto = -1
-        state_desc[2].append(nonterminal1)
-        state_desc[3].append(goto)
+        nonterminal_breaks.append(nonterminal1)
+        gotos.append(goto)
          nonterminal = nonterminal1
  
+      # test for a default reduce action, useful for interactive parsers
+      valid_actions = [i for i in actions if i != -1]
+      default_reduce = -1
+      if len(valid_actions):
+        valid_action = valid_actions[0]
+        if (
+          (valid_action & 1) and
+          all([i == valid_action for i in valid_actions[1:]])
+        ):
+          default_reduce = valid_action >> 1
+
+      state_desc = (
+        terminal_breaks,
+        actions,
+        nonterminal_breaks,
+        gotos,
+        default_reduce
+      )
        if i < len(_lr1dfa.states):
          _lr1dfa.states[i] = state_desc
        else:
diff --git a/lr1dfa.py b/lr1dfa.py

index a5d4086..03cb711 100644 (file)
--- a/lr1dfa.py
+++ b/lr1dfa.py
@@ -17,7 +17,7 @@ class LR1DFA:
      eof_terminal = n_characters
    ):
      # states: list of state_desc
-    # state_desc: (terminal breaks, actions, nonterminal breaks, gotos)
+    # state_desc: (terminal br, actions, nonterminal br, gotos, default reduce)
      # action: shift = new state * 2, reduce = production * 2 + 1, error = -1
      # goto: reduce = production, error = -1 (but error can't really happen)
      # productions: list of production
@@ -80,7 +80,7 @@ class LR1DFA:
        numpy.int16
      )
      for i in range(len(self.states)):
-      terminal_breaks, actions, nonterminal_breaks, gotos = self.states[i]
+      terminal_breaks, actions, nonterminal_breaks, gotos, _ = self.states[i]
  
        terminal0 = 0
        for j in range(len(terminal_breaks)):
diff --git a/skel/skel_py.py b/skel/skel_py.py

index 028f25f..c80d2e3 100644 (file)
--- a/skel/skel_py.py
+++ b/skel/skel_py.py
@@ -1,5 +1,4 @@
  import bisect
-import lex_yy
  
  # GENERATE SECTION1
  
@@ -24,39 +23,42 @@ def yyparse():
    state = 0
    yystack = []
    yylval = None
-  yytoken = lex_yy.yylex()
-  print('yytoken', yytoken, 'yylval', yylval)
+  yytoken = -1
    while True:
-    print('state', state, 'yystack', yystack)
-    action = yy_lr1dfa_states[state][1][
-      bisect.bisect_right(yy_lr1dfa_states[state][0], yytoken)
-    ]
-    if action == -1:
-      raise Exception('syntax error')
-    if (action & 1) == 0:
-      yystack.append((state, yylval))
-      state = action >> 1
-      print('shift', state)
-      yylval = None
-      yytoken = lex_yy.yylex()
-      print('yytoken', yytoken, 'yylval', yylval)
-    else:
-      yystack.append((state, None))
-      reduce = action >> 1
-      print('reduce', reduce)
-      len_symbols, ref_data = yy_lr1dfa_productions[reduce]
-      base = len(yystack) - len_symbols - 1
-      yyval = yystack[base][1]
-      ref_data()
-      del yystack[base + 1:]
-      if reduce == 0:
-        assert base == 0
-        return
-      state = yystack[-1][0]
-      yystack[-1] = (state, yyval)
-      state = yy_lr1dfa_states[state][3][
-        bisect.bisect_right(yy_lr1dfa_states[state][2], reduce)
+    #print('state', state, 'yystack', yystack)
+    reduce = yy_lr1dfa_states[state][4]
+    if reduce == -1:
+      if yytoken == -1:
+        yylval = None
+        yylloc = None
+        yytoken = yylex()
+        #print('yytoken', yytoken, 'yylval', yylval, 'yylloc', yylloc)
+      action = yy_lr1dfa_states[state][1][
+        bisect.bisect_right(yy_lr1dfa_states[state][0], yytoken)
        ]
-      assert state != -1
+      if action == -1:
+        raise Exception('syntax error')
+      if (action & 1) == 0:
+        yystack.append((state, yylval, yylloc))
+        state = action >> 1
+        #print('shift', state)
+        yytoken = -1
+        continue
+      reduce = action >> 1
+    #print('reduce', reduce)
+    len_symbols, ref_data = yy_lr1dfa_productions[reduce]
+    base = len(yystack) - len_symbols
+    yystack.append((state, None, None))
+    state, yyval, yyloc = yystack[base]
+    ref_data()
+    del yystack[base:]
+    if reduce == 0:
+      assert base == 0
+      break
+    yystack.append((state, yyval, yyloc))
+    state = yy_lr1dfa_states[state][3][
+      bisect.bisect_right(yy_lr1dfa_states[state][2], reduce)
+    ]
+    assert state != -1
  
  # GENERATE SECTION3
diff --git a/tests/cal.py b/tests/cal.py

index d2978ba..d43ab5c 100755 (executable)
--- a/tests/cal.py
+++ b/tests/cal.py
@@ -1,6 +1,8 @@
  #!/usr/bin/env python3
  
+import sys
  import y_tab
  
-print('Enter the expression: ', end = '')
+sys.stdout.write('Enter the expression: ')
+sys.stdout.flush()
  y_tab.yyparse()
diff --git a/tests/cal_py.l b/tests/cal_py.l

index fcee582..272fba6 100644 (file)
--- a/tests/cal_py.l
+++ b/tests/cal_py.l
@@ -4,8 +4,6 @@ import y_tab
  
  DIGIT [0-9]+\.?|[0-9]*\.[0-9]+
  
-%option noecs nometa-ecs noyywrap reject yymore
-
  %%
  
  [ ]
diff --git a/tests/cal_py.y b/tests/cal_py.y

index f2b532a..4fea55a 100644 (file)
--- a/tests/cal_py.y
+++ b/tests/cal_py.y
@@ -1,4 +1,5 @@
  %{
+from lex_yy import yylex
  import sys
  %}
  %token NUM
@@ -10,8 +11,7 @@ import sys
  %%
  
  S : S E '\n' {
-  print('Answer:', $2)
-  print('Enter:')
+  sys.stdout.write('Answer: {0:g}\nEnter:\n'.format($2))
  }
    | S '\n'
    |
@@ -30,9 +30,7 @@ E : E '+' E { $$ = $1 + $3 }
    | NUM
    ;
  %%
-# this is section 3
  
  def yyerror(s):
-  print(s)
+  sys.stdout.write('{0:s}\n'.format(s))
    sys.exit(1)
-
author	Nick Downing <downing.nick@gmail.com>
	Tue, 11 Sep 2018 10:36:17 +0000 (20:36 +1000)
committer	Nick Downing <downing.nick@gmail.com>
	Tue, 11 Sep 2018 10:36:17 +0000 (20:36 +1000)
lr1.py		patch \| blob \| history
lr1dfa.py		patch \| blob \| history
skel/skel_py.py		patch \| blob \| history
tests/cal.py		patch \| blob \| history
tests/cal_py.l		patch \| blob \| history
tests/cal_py.y		patch \| blob \| history