Fix a bug in start condition handling, tidy, fix skeleton, add progress reports
authorNick Downing <downing.nick@gmail.com>
Mon, 23 Jul 2018 03:26:24 +0000 (13:26 +1000)
committerNick Downing <downing.nick@gmail.com>
Mon, 23 Jul 2018 03:26:41 +0000 (13:26 +1000)
ast.py
flex_dfa.py
skel/Makefile
skel/lex.yy.c.patch2 [new file with mode: 0644]

diff --git a/ast.py b/ast.py
index 6a3a755..e7b2c35 100644 (file)
--- a/ast.py
+++ b/ast.py
@@ -1010,18 +1010,16 @@ class PLex(element.Element):
               )
           plex.eof_actions_text.append(self[2][0] if len(self) > 2 else PLex.Text()) # fix this later
         elif isinstance(self[1], PLex.Section2.Rule.FLexRule):
-          if len(start_conditions) == 0:
-            start_conditions = inclusive_start_conditions
-            for i in (
-              start_conditions
-            if len(start_conditions) else
-              inclusive_start_conditions
-            ):
-              if not self[1].bol:
-                plex.start_conditions[i].rules.append(self[1])
-              plex.start_conditions[i].bol_rules.append(self[1])
-          self[1][0].post_process() # expr
-          self[1][1].post_process() # trailing context
+          for i in (
+            start_conditions
+          if len(start_conditions) else
+            inclusive_start_conditions
+          ):
+            if not self[1].bol:
+              plex.start_conditions[i].rules.append(self[1])
+            plex.start_conditions[i].bol_rules.append(self[1])
+          self[1][0].post_process() # regex
+          self[1][1].post_process() # trailing context regex
           self[1].action = len(plex.actions_text)
           plex.actions_text.append(self[2][0] if len(self) > 2 else PLex.Text()) # fix this later
         else:
@@ -1272,11 +1270,11 @@ class PLex(element.Element):
     _nfa = nfa.NFA()
     for i in self.start_conditions:
       for j in range(2):
-        expr = regex.RegexNone()
+        _regex = regex.RegexNone()
         for k in [i.rules, i.bol_rules][j]:
-          expr = regex.RegexOr(
+          _regex = regex.RegexOr(
             children = [
-              expr,
+              _regex,
               regex.RegexSequence(
                 children = [
                   k[0],
@@ -1290,7 +1288,7 @@ class PLex(element.Element):
               )
             ]
           )
-        expr = regex.RegexAnd(
+        _regex = regex.RegexAnd(
           children = [
             regex.RegexRepeat(
               count0 = 0,
@@ -1302,7 +1300,7 @@ class PLex(element.Element):
             ),
             regex.RegexOr(
               children = [
-                expr,
+                _regex,
                 regex.RegexSequence(
                   children = [
                     regex.RegexCharacter(
@@ -1320,7 +1318,7 @@ class PLex(element.Element):
             )
           ]
         )
-        expr.add_to_nfa(_nfa)
+        _regex.add_to_nfa(_nfa)
     return _nfa
 
 # GENERATE FACTORY(regex.factory) BEGIN
index 5052d69..807cd2b 100644 (file)
@@ -104,7 +104,7 @@ class FlexDFA:
             self.acclist[n_acclist - 1] != acc | FlexDFA.YY_TRAILING_HEAD_MASK
           ):
             # look back to start of trailing context, then accept
-            acc |= FlexDFA.YY_HEAD_MASK
+            acc |= FlexDFA.YY_TRAILING_MASK
           # otherwise zero length trailing context, accept immediately
         else:
           # mark start of (hopefully safe) trailing context
@@ -180,6 +180,8 @@ class FlexDFA:
     )
     order = []
     for i in range(1, n_states):
+      print('mst', i, 'of', n_states)
+
       # Prim's algorithm: find most similar (done, todo) state pair
       temp = dist[:i, i:]
       done, todo = numpy.unravel_index(numpy.argmin(temp), temp.shape)
@@ -212,6 +214,7 @@ class FlexDFA:
     n_entries = 0x101
     dupes = []
     while len(order):
+      print('order', len(order))
       state_done, state_todo = order.pop()
       indices = numpy.nonzero(
         transitions[state_todo, :] != transitions[state_done, :]
@@ -258,6 +261,7 @@ class FlexDFA:
         self.states[state_todo, 0] = start_index
         self.states[state_todo, 1] = state_done
     while len(dupes):
+      print('dupes', len(dupes))
       state_done, state_todo = dupes.pop()
       self.states[state_todo, 0] = self.states[state_done, 0]
       self.states[state_todo, 1] = state_done
@@ -267,11 +271,14 @@ class FlexDFA:
     print('n_entries', n_entries)
 
 def generate(plex, skel_file, out_file):
-  nfa = plex.to_nfa()
+  _nfa = plex.to_nfa()
+  #print(len(_nfa.states), _nfa.start_state)
   eob_expr = regex.RegexGroup(children = [regex.RegexEmpty()])
   eob_expr.post_process(len(plex.actions_text))
-  eob_expr.add_to_nfa(nfa)
-  flex_dfa = FlexDFA(nfa.to_dfa())
+  eob_expr.add_to_nfa(_nfa)
+  _dfa = _nfa.to_dfa()
+  #print(len(_dfa.states), len(_dfa.actions), _dfa.start_action)
+  flex_dfa = FlexDFA(_dfa)
 
   with open(skel_file, 'r') as fin:
     with open(out_file, 'w+') as fout:
@@ -425,7 +432,7 @@ static const flex_int16_t yy_chk[] = {{{6:s}
             [
               j
               for j in range(len(plex.start_conditions))
-              if plex.start_conditions[i].eof_action == j
+              if plex.start_conditions[j].eof_action == i
             ]
             for i in range(len(plex.eof_actions_text))
           ]
index 8103b05..3581035 100644 (file)
@@ -2,3 +2,5 @@ lex.yy.c: skel.l
        ../../bootstrap_flex.git/src/flex $<
        cp $@ $@.orig
        patch $@ <$@.patch
+       # temporary: putting in the state stack and push/pop state
+       patch $@ <$@.patch2
diff --git a/skel/lex.yy.c.patch2 b/skel/lex.yy.c.patch2
new file mode 100644 (file)
index 0000000..7653357
--- /dev/null
@@ -0,0 +1,56 @@
+--- lex.yy.c1  2018-07-23 13:08:27.703035560 +1000
++++ ../bootstrap_flex.git/src/stage1scan.c.orig        2018-07-23 13:21:31.407008010 +1000
+@@ -9883,6 +9883,14 @@
+ #endif
++        static int yy_start_stack_ptr = 0;
++        static int yy_start_stack_depth = 0;
++        static int *yy_start_stack = NULL;
++ 
++    static void yy_push_state (int new_state );
++    
++    static void yy_pop_state (void );
++ 
+ /* Amount of stuff to slurp up with each read. */
+ #ifndef YY_READ_BUF_SIZE
+ #ifdef __ia64__
+@@ -12464,6 +12472,38 @@
+       return b;
+ }
++    static void yy_push_state (int  new_state )
++{
++      if ( (yy_start_stack_ptr) >= (yy_start_stack_depth) )
++              {
++              yy_size_t new_size;
++
++              (yy_start_stack_depth) += YY_START_STACK_INCR;
++              new_size = (yy_start_stack_depth) * sizeof( int );
++
++              if ( ! (yy_start_stack) )
++                      (yy_start_stack) = (int *) yyalloc(new_size  );
++
++              else
++                      (yy_start_stack) = (int *) yyrealloc((void *) (yy_start_stack),new_size  );
++
++              if ( ! (yy_start_stack) )
++                      YY_FATAL_ERROR( "out of memory expanding start-condition stack" );
++              }
++
++      (yy_start_stack)[(yy_start_stack_ptr)++] = YY_START;
++
++      BEGIN(new_state);
++}
++
++    static void yy_pop_state  (void)
++{
++      if ( --(yy_start_stack_ptr) < 0 )
++              YY_FATAL_ERROR( "start-condition stack underflow" );
++
++      BEGIN((yy_start_stack)[(yy_start_stack_ptr)]);
++}
++
+ #ifndef YY_EXIT_FAILURE
+ #define YY_EXIT_FAILURE 2
+ #endif