Minor bug fixes to get it to scan something, enhance test program to print flex state...
authorNick Downing <downing.nick@gmail.com>
Thu, 28 Jun 2018 09:51:34 +0000 (19:51 +1000)
committerNick Downing <downing.nick@gmail.com>
Thu, 28 Jun 2018 09:51:34 +0000 (19:51 +1000)
.gitignore
ast.py
plex.py
skel/lex.yy.c.patch
tests/Makefile
tests/cal.l
tests/cal.l.xml
tests/cal.y [new file with mode: 0644]
tests/lex.yy.c.patch [new file with mode: 0644]

index 9066073..cc5970e 100644 (file)
@@ -1,3 +1,7 @@
 __pycache__
 lex.yy.c
+tests/cal
+tests/lex.yy.c.orig
+tests/lex.yy.c
+tests/y.tab.c
 skel/lex.yy.c.orig
diff --git a/ast.py b/ast.py
index 40aba57..3bebd67 100644 (file)
--- a/ast.py
+++ b/ast.py
@@ -199,6 +199,70 @@ class Options(Option):
     def process(self, options):
       options.meta_ecs = self.value
 
+  class Reject(BoolOption):
+    # GENERATE ELEMENT() BEGIN
+    def __init__(
+      self,
+      tag = 'Options_Reject',
+      attrib = {},
+      text = '',
+      children = [],
+      value = False
+    ):
+      BoolOption.__init__(
+        self,
+        tag,
+        attrib,
+        text,
+        children,
+        value
+      )
+    def copy(self, factory = None):
+      result = BoolOption.copy(
+        self,
+        Reject if factory is None else factory
+      )
+      return result
+    def __repr__(self):
+      params = []
+      self.repr_serialize(params)
+      return 'ast.Options.Reject({0:s})'.format(', '.join(params))
+    # GENERATE END
+    def process(self, options):
+      options.reject = self.value
+
+  class YYMore(BoolOption):
+    # GENERATE ELEMENT() BEGIN
+    def __init__(
+      self,
+      tag = 'Options_YYMore',
+      attrib = {},
+      text = '',
+      children = [],
+      value = False
+    ):
+      BoolOption.__init__(
+        self,
+        tag,
+        attrib,
+        text,
+        children,
+        value
+      )
+    def copy(self, factory = None):
+      result = BoolOption.copy(
+        self,
+        YYMore if factory is None else factory
+      )
+      return result
+    def __repr__(self):
+      params = []
+      self.repr_serialize(params)
+      return 'ast.Options.YYMore({0:s})'.format(', '.join(params))
+    # GENERATE END
+    def process(self, options):
+      options.yymore = self.value
+
   class YYWrap(BoolOption):
     # GENERATE ELEMENT() BEGIN
     def __init__(
@@ -438,6 +502,8 @@ tag_to_class = {
   'Options': Options,
   'Options_ECS': Options.ECS,
   'Options_MetaECS': Options.MetaECS,
+  'Options_Reject': Options.Reject,
+  'Options_YYMore': Options.YYMore,
   'Options_YYWrap': Options.YYWrap,
   'Section2': Section2,
   'StartCondNone': StartCondNone,
diff --git a/plex.py b/plex.py
index e4bbf12..6d552ef 100755 (executable)
--- a/plex.py
+++ b/plex.py
@@ -15,6 +15,8 @@ class Options:
   def __init__(self):
     self.ecs = False
     self.meta_ecs = False
+    self.reject = True
+    self.yymore = True
     self.yywrap = True
 
 class FlexDFA:
@@ -22,10 +24,6 @@ class FlexDFA:
   YY_TRAILING_HEAD_MASK = 0x4000
 
   def __init__(self, dfa):
-    # state 0 is the jam state, the EOB state will be added at the end
-    self.states = [([], 0, 0)] # accept, base, def
-    self.entries = [(0, 0)] * 0x101 # nxt, chk
-
     # this is basically just a renumbering
 
     # state numbers in the DFA become base/def numbers in the FlexDFA,
@@ -48,6 +46,10 @@ class FlexDFA:
     # a dummy rule that accepts the null string and executes EOB action
     eob_state = len(dfa.start_action)
 
+    # state 0 is the jam state, the EOB state will be added later on
+    self.states = [([], 0, 0)] # accept, base, def
+    self.entries = [(eob_state, 0)] + [(0, 0)] * 0x100 # nxt, chk
+
     # full_entries[i, j] is transition on character j in state i
     # in our way of thinking, 0 is don't care and -1 is failure
     # however, in the flex way these are both 0 (don't care),
@@ -61,12 +63,14 @@ class FlexDFA:
     while len(self.states) < len(flex_state_to_action):
       action = flex_state_to_action[len(self.states)]
       state, transition = dfa.actions[action]
+      #print('state', len(self.states), 'transition', transition)
 
       # we collect marks without regard to which thread they refer to,
       # they should already be in priority order without any duplicates
       # (because the deduplication removes subsequent identical threads)
       flex_accept = []
       for j in [i[2] for i in transition if i[0] == regex.DFA.TRANSITION_MARK]:
+        #print(j)
         if j & 1:
           if (
             len(flex_accept) > 0 and
@@ -80,6 +84,7 @@ class FlexDFA:
         else:
           # mark start of (hopefully safe) trailing context
           flex_accept.append((j >> 1) | FlexDFA.YY_TRAILING_HEAD_MASK)
+      #print(flex_accept)
 
       if state in state_to_flex_base_def:
         flex_base, flex_def = state_to_flex_base[state]
@@ -155,6 +160,7 @@ class FlexDFA:
             )
 
       self.states.append((flex_accept, flex_base, flex_def))
+    #print(full_entries[:len(self.states), :])
 
 if len(sys.argv) < 2:
   sys.stdout.write(
@@ -243,7 +249,10 @@ expr = regex.RegexGroup(children = [regex.RegexEmpty()])
 expr.post_process(group_index = len(actions))
 expr.add_to_nfa(nfa)
 
-flex_dfa = FlexDFA(nfa.to_dfa())
+dfa = nfa.to_dfa()
+#print(dfa.start_action)
+#print(dfa.actions[2])
+flex_dfa = FlexDFA(dfa) #nfa.to_dfa())
 with open('skel/lex.yy.c', 'r') as fin:
   with open('lex.yy.c', 'w+') as fout:
     line = fin.readline()
index bcee795..e968f27 100644 (file)
@@ -1,5 +1,5 @@
 --- lex.yy.c.orig      2018-06-25 10:36:41.898822220 +1000
-+++ lex.yy.c   2018-06-28 00:00:54.431048812 +1000
++++ lex.yy.c   2018-06-28 19:47:22.872171888 +1000
 @@ -1,6 +1,3 @@
 -
 -#line 2 "lex.yy.c"
  
  yy_find_action:
                yy_current_state = *--(yy_state_ptr);
+@@ -824,7 +648,7 @@
+ find_rule: /* we branch to this label when backing up */
+               for ( ; ; ) /* until we find what rule we matched */
+                       {
+-                      if ( (yy_lp) && (yy_lp) < yy_accept[yy_current_state + 1] )
++                      if ( /*(yy_lp) &&*/ (yy_lp) < yy_accept[yy_current_state + 1] )
+                               {
+                               yy_act = yy_acclist[(yy_lp)];
+                               if ( yy_act & YY_TRAILING_HEAD_MASK ||
 @@ -866,19 +690,7 @@
  
                switch ( yy_act )
index 86f4d56..9fc8dd2 100644 (file)
@@ -1,3 +1,10 @@
-cal.l.xml: cal.l
-       ../../bootstrap_flex.git/src/flex $< 2>$@
-       rm -f lex.yy.c
+cal: y.tab.c lex.yy.c
+       gcc -o $@ $<
+
+lex.yy.c: cal.l
+       ../../bootstrap_flex.git/src/flex $< 2>$<.xml
+       cp $@ $@.orig
+       patch $@ <$@.patch
+
+y.tab.c: cal.y
+       ../../bootstrap_bison.git/src/bison -y $< 2>$<.xml
index 86e30e3..bbeb981 100644 (file)
@@ -4,7 +4,7 @@
 
 DIGIT [0-9]+\.?|[0-9]*\.[0-9]+
 
-%option noecs nometa-ecs noyywrap
+%option noecs nometa-ecs noyywrap reject yymore
 
 %%
 
index 2440a6b..07144b8 100644 (file)
@@ -4,7 +4,7 @@
 
 DIGIT [0-9]+\.?|[0-9]*\.[0-9]+
 
-<Options>%option <Options_ECS>noecs</Options_ECS> <Options_MetaECS>nometa-ecs</Options_MetaECS> <Options_YYWrap>noyywrap</Options_YYWrap></Options>
+<Options>%option <Options_ECS>noecs</Options_ECS> <Options_MetaECS>nometa-ecs</Options_MetaECS> <Options_YYWrap>noyywrap</Options_YYWrap> <Options_Reject value="true">reject</Options_Reject> <Options_YYMore value="true">yymore</Options_YYMore></Options>
 
 </Section1>%%<Section2>
 
diff --git a/tests/cal.y b/tests/cal.y
new file mode 100644 (file)
index 0000000..637cb82
--- /dev/null
@@ -0,0 +1,41 @@
+%{
+#include <ctype.h>
+#include <stdio.h>
+#define YYSTYPE double
+%}
+%token NUM
+
+%left '+' '-'
+%left '*' '/'
+%right UMINUS
+
+%%
+
+S : S E '\n' { printf("Answer: %g\nEnter:\n", $2); }
+  | S '\n'
+  |
+  | error '\n' { yyerror("Error: Enter once more...\n"); yyerrok; }
+  ;
+E : E '+' E { $$ = $1 + $3; }
+  | E '-' E { $$ = $1 - $3; }
+  | E '*' E { $$ = $1 * $3; }
+  | E '/' E { $$ = $1 / $3; }
+  | '(' E ')' { $$ = $2; }
+  | '-' E %prec UMINUS { $$ = -$2; }
+  | NUM
+  ;
+%%
+
+#include "lex.yy.c"
+
+int main()
+{
+  printf("Enter the expression: ");
+  yyparse();
+}
+
+yyerror(char * s)
+{
+  printf("%s\n", s);
+  exit(1);
+}
diff --git a/tests/lex.yy.c.patch b/tests/lex.yy.c.patch
new file mode 100644 (file)
index 0000000..acb01de
--- /dev/null
@@ -0,0 +1,38 @@
+--- lex.yy.c.orig      2018-06-28 19:10:44.236186452 +1000
++++ lex.yy.c   2018-06-28 19:21:49.400182046 +1000
+@@ -812,17 +812,27 @@
+               yy_current_state = (yy_start);
+               (yy_state_ptr) = (yy_state_buf);
++ printf("yy_state_buf[%d] = %d:", (int)(yy_state_ptr - yy_state_buf), yy_current_state);
++ for (yy_lp = yy_accept[yy_current_state]; yy_lp && yy_lp < yy_accept[yy_current_state + 1]; ++yy_lp)
++  printf(" %d", yy_acclist[yy_lp]);
++ printf("\n");
+               *(yy_state_ptr)++ = yy_current_state;
+ yy_match:
+               do
+                       {
+                       int yy_c = YY_SC_TO_UI(*yy_cp);
++ printf(yy_c < 0x20 ? "yy_c = '\\x%02x'\n" : "yy_c = '%c'\n", yy_c);
+                       while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
+                               {
++ printf("yy_def[%d] = %d\n", yy_current_state, yy_def[yy_current_state]);
+                               yy_current_state = (int) yy_def[yy_current_state];
+                               }
+                       yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c];
++ printf("yy_state_buf[%d] = %d:", (int)(yy_state_ptr - yy_state_buf), yy_current_state);
++ for (yy_lp = yy_accept[yy_current_state]; yy_lp && yy_lp < yy_accept[yy_current_state + 1]; ++yy_lp)
++  printf(" %d", yy_acclist[yy_lp]);
++ printf("\n");
+                       *(yy_state_ptr)++ = yy_current_state;
+                       ++yy_cp;
+                       }
+@@ -852,6 +862,7 @@
+               YY_DO_BEFORE_ACTION;
+ do_action:    /* This label is used only to access EOF actions. */
++ printf("yy_act = %d\n", yy_act);
+               switch ( yy_act )
+       { /* beginning of action switch */