Fix a bug in start condition handling, tidy, fix skeleton, add progress reports

author Nick Downing <downing.nick@gmail.com>

Mon, 23 Jul 2018 03:26:24 +0000 (13:26 +1000)

committer Nick Downing <downing.nick@gmail.com>

Mon, 23 Jul 2018 03:26:41 +0000 (13:26 +1000)
author Nick Downing <downing.nick@gmail.com>
Mon, 23 Jul 2018 03:26:24 +0000 (13:26 +1000)
committer Nick Downing <downing.nick@gmail.com>
Mon, 23 Jul 2018 03:26:41 +0000 (13:26 +1000)
diff --git a/ast.py b/ast.py

index 6a3a755..e7b2c35 100644 (file)
--- a/ast.py
+++ b/ast.py
@@ -1010,18 +1010,16 @@ class PLex(element.Element):
                )
            plex.eof_actions_text.append(self[2][0] if len(self) > 2 else PLex.Text()) # fix this later
          elif isinstance(self[1], PLex.Section2.Rule.FLexRule):
-          if len(start_conditions) == 0:
-            start_conditions = inclusive_start_conditions
-            for i in (
-              start_conditions
-            if len(start_conditions) else
-              inclusive_start_conditions
-            ):
-              if not self[1].bol:
-                plex.start_conditions[i].rules.append(self[1])
-              plex.start_conditions[i].bol_rules.append(self[1])
-          self[1][0].post_process() # expr
-          self[1][1].post_process() # trailing context
+          for i in (
+            start_conditions
+          if len(start_conditions) else
+            inclusive_start_conditions
+          ):
+            if not self[1].bol:
+              plex.start_conditions[i].rules.append(self[1])
+            plex.start_conditions[i].bol_rules.append(self[1])
+          self[1][0].post_process() # regex
+          self[1][1].post_process() # trailing context regex
            self[1].action = len(plex.actions_text)
            plex.actions_text.append(self[2][0] if len(self) > 2 else PLex.Text()) # fix this later
          else:
@@ -1272,11 +1270,11 @@ class PLex(element.Element):
      _nfa = nfa.NFA()
      for i in self.start_conditions:
        for j in range(2):
-        expr = regex.RegexNone()
+        _regex = regex.RegexNone()
          for k in [i.rules, i.bol_rules][j]:
-          expr = regex.RegexOr(
+          _regex = regex.RegexOr(
              children = [
-              expr,
+              _regex,
                regex.RegexSequence(
                  children = [
                    k[0],
@@ -1290,7 +1288,7 @@ class PLex(element.Element):
                )
              ]
            )
-        expr = regex.RegexAnd(
+        _regex = regex.RegexAnd(
            children = [
              regex.RegexRepeat(
                count0 = 0,
@@ -1302,7 +1300,7 @@ class PLex(element.Element):
              ),
              regex.RegexOr(
                children = [
-                expr,
+                _regex,
                  regex.RegexSequence(
                    children = [
                      regex.RegexCharacter(
@@ -1320,7 +1318,7 @@ class PLex(element.Element):
              )
            ]
          )
-        expr.add_to_nfa(_nfa)
+        _regex.add_to_nfa(_nfa)
      return _nfa
  
  # GENERATE FACTORY(regex.factory) BEGIN
diff --git a/flex_dfa.py b/flex_dfa.py

index 5052d69..807cd2b 100644 (file)
--- a/flex_dfa.py
+++ b/flex_dfa.py
@@ -104,7 +104,7 @@ class FlexDFA:
              self.acclist[n_acclist - 1] != acc | FlexDFA.YY_TRAILING_HEAD_MASK
            ):
              # look back to start of trailing context, then accept
-            acc |= FlexDFA.YY_HEAD_MASK
+            acc |= FlexDFA.YY_TRAILING_MASK
            # otherwise zero length trailing context, accept immediately
          else:
            # mark start of (hopefully safe) trailing context
@@ -180,6 +180,8 @@ class FlexDFA:
      )
      order = []
      for i in range(1, n_states):
+      print('mst', i, 'of', n_states)
+
        # Prim's algorithm: find most similar (done, todo) state pair
        temp = dist[:i, i:]
        done, todo = numpy.unravel_index(numpy.argmin(temp), temp.shape)
@@ -212,6 +214,7 @@ class FlexDFA:
      n_entries = 0x101
      dupes = []
      while len(order):
+      print('order', len(order))
        state_done, state_todo = order.pop()
        indices = numpy.nonzero(
          transitions[state_todo, :] != transitions[state_done, :]
@@ -258,6 +261,7 @@ class FlexDFA:
          self.states[state_todo, 0] = start_index
          self.states[state_todo, 1] = state_done
      while len(dupes):
+      print('dupes', len(dupes))
        state_done, state_todo = dupes.pop()
        self.states[state_todo, 0] = self.states[state_done, 0]
        self.states[state_todo, 1] = state_done
@@ -267,11 +271,14 @@ class FlexDFA:
      print('n_entries', n_entries)
  
  def generate(plex, skel_file, out_file):
-  nfa = plex.to_nfa()
+  _nfa = plex.to_nfa()
+  #print(len(_nfa.states), _nfa.start_state)
    eob_expr = regex.RegexGroup(children = [regex.RegexEmpty()])
    eob_expr.post_process(len(plex.actions_text))
-  eob_expr.add_to_nfa(nfa)
-  flex_dfa = FlexDFA(nfa.to_dfa())
+  eob_expr.add_to_nfa(_nfa)
+  _dfa = _nfa.to_dfa()
+  #print(len(_dfa.states), len(_dfa.actions), _dfa.start_action)
+  flex_dfa = FlexDFA(_dfa)
  
    with open(skel_file, 'r') as fin:
      with open(out_file, 'w+') as fout:
@@ -425,7 +432,7 @@ static const flex_int16_t yy_chk[] = {{{6:s}
              [
                j
                for j in range(len(plex.start_conditions))
-              if plex.start_conditions[i].eof_action == j
+              if plex.start_conditions[j].eof_action == i
              ]
              for i in range(len(plex.eof_actions_text))
            ]
diff --git a/skel/Makefile b/skel/Makefile

index 8103b05..3581035 100644 (file)
--- a/skel/Makefile
+++ b/skel/Makefile
@@ -2,3 +2,5 @@ lex.yy.c: skel.l
         ../../bootstrap_flex.git/src/flex $<
         cp $@ $@.orig
         patch $@ <$@.patch
+       # temporary: putting in the state stack and push/pop state
+       patch $@ <$@.patch2
diff --git a/skel/lex.yy.c.patch2 b/skel/lex.yy.c.patch2

new file mode 100644 (file)

index 0000000..7653357
--- /dev/null
+++ b/skel/lex.yy.c.patch2
@@ -0,0 +1,56 @@
+--- lex.yy.c1  2018-07-23 13:08:27.703035560 +1000
++++ ../bootstrap_flex.git/src/stage1scan.c.orig        2018-07-23 13:21:31.407008010 +1000
+@@ -9883,6 +9883,14 @@
+ 
+ #endif
+ 
++        static int yy_start_stack_ptr = 0;
++        static int yy_start_stack_depth = 0;
++        static int *yy_start_stack = NULL;
++ 
++    static void yy_push_state (int new_state );
++    
++    static void yy_pop_state (void );
++ 
+ /* Amount of stuff to slurp up with each read. */
+ #ifndef YY_READ_BUF_SIZE
+ #ifdef __ia64__
+@@ -12464,6 +12472,38 @@
+       return b;
+ }
+ 
++    static void yy_push_state (int  new_state )
++{
++      if ( (yy_start_stack_ptr) >= (yy_start_stack_depth) )
++              {
++              yy_size_t new_size;
++
++              (yy_start_stack_depth) += YY_START_STACK_INCR;
++              new_size = (yy_start_stack_depth) * sizeof( int );
++
++              if ( ! (yy_start_stack) )
++                      (yy_start_stack) = (int *) yyalloc(new_size  );
++
++              else
++                      (yy_start_stack) = (int *) yyrealloc((void *) (yy_start_stack),new_size  );
++
++              if ( ! (yy_start_stack) )
++                      YY_FATAL_ERROR( "out of memory expanding start-condition stack" );
++              }
++
++      (yy_start_stack)[(yy_start_stack_ptr)++] = YY_START;
++
++      BEGIN(new_state);
++}
++
++    static void yy_pop_state  (void)
++{
++      if ( --(yy_start_stack_ptr) < 0 )
++              YY_FATAL_ERROR( "start-condition stack underflow" );
++
++      BEGIN((yy_start_stack)[(yy_start_stack_ptr)]);
++}
++
+ #ifndef YY_EXIT_FAILURE
+ #define YY_EXIT_FAILURE 2
+ #endif
author	Nick Downing <downing.nick@gmail.com>
	Mon, 23 Jul 2018 03:26:24 +0000 (13:26 +1000)
committer	Nick Downing <downing.nick@gmail.com>
	Mon, 23 Jul 2018 03:26:41 +0000 (13:26 +1000)
ast.py		patch \| blob \| history
flex_dfa.py		patch \| blob \| history
skel/Makefile		patch \| blob \| history
skel/lex.yy.c.patch2	[new file with mode: 0644]	patch \| blob