Add BOL and EOF rules, not well tested yet
authorNick Downing <downing.nick@gmail.com>
Fri, 29 Jun 2018 12:43:08 +0000 (22:43 +1000)
committerNick Downing <downing.nick@gmail.com>
Fri, 29 Jun 2018 12:43:08 +0000 (22:43 +1000)
plex.py
skel/lex.yy.c.patch
skel/skel.l

diff --git a/plex.py b/plex.py
index 2d8eec1..a2712a2 100755 (executable)
--- a/plex.py
+++ b/plex.py
@@ -246,57 +246,73 @@ for i in root[0]:
     i.process(options)
 #print(options.yywrap)
 
-nfa = regex.NFA()
-
-expr = regex.RegexNone()
+class StartCondition:
+  def __init__(self, name, eof_action):
+    self.name = name
+    self.eof_action = eof_action
+start_conditions = [StartCondition('INITIAL', 0)]
+start_condition_exprs = [regex.RegexNone(), regex.RegexNone()]
 actions = []
+eof_actions = ['\t\t\t\tyyterminate();\n']
+
 assert isinstance(root[1], ast.Section2)
 for i in root[1]:
   if isinstance(i, ast.Rule):
     assert isinstance(i[0], ast.StartCondNone)
-    if isinstance(i[1], ast.BOLRule):
-      assert False
-    elif isinstance(i[1], ast.EOFRule):
-      assert False
+    rule_expr = i[1]
+    if isinstance(rule_expr, ast.EOFRule):
+      assert isinstance(i[2], regex.RegexEmpty)
+      assert start_conditions[0].eof_action is None
+      start_conditions[0].eof_action = len(eof_actions)
+      eof_actions.append(i[3])
     else:
-      expr = regex.RegexOr(
+      if isinstance(rule_expr, ast.BOLRule):
+        bol_rule = True
+        rule_expr = rule_expr[0]
+      else:
+        bol_rule = False
+      rule_expr = regex.RegexSequence(
         children = [
-          expr,
-          regex.RegexSequence(
+          rule_expr,
+          regex.RegexGroup(
             children = [
-              i[1],
-              regex.RegexGroup(
-                children = [
-                  i[2]
-                ]
-              )
+              i[2] # trailing context
             ]
           )
         ]
       )
-      actions.append(i[3])
-expr = regex.RegexAnd(
-  children = [
-    regex.RegexRepeat(
-      count0 = 0,
-      children = [
-        regex.RegexCharacterNot(
+      rule_expr.post_process(len(actions))
+      for j in range(int(bol_rule), 2):
+        start_condition_exprs[j] = regex.RegexOr(
           children = [
-            regex.RegexCharacter()
+            start_condition_exprs[j],
+            rule_expr
           ]
         )
-      ]
-    ),
-    expr
-  ]
-)
-expr.post_process()
-expr.add_to_nfa(nfa)
+      actions.append(i[3])
 
-# add EOB rule
-expr = regex.RegexGroup(children = [regex.RegexEmpty()])
-expr.post_process(group_index = len(actions))
-expr.add_to_nfa(nfa)
+nfa = regex.NFA()
+for i in range(len(start_condition_exprs)):
+  # make expr match as much as possible
+  start_condition_exprs[i] = regex.RegexAnd(
+    children = [
+      regex.RegexRepeat(
+        count0 = 0,
+        children = [
+          regex.RegexCharacter(
+            char_set = [0, 0x100]
+          )
+        ]
+      ),
+      start_condition_exprs[i]
+    ]
+  )
+  print('i', i, 'expr', repr(start_condition_exprs[i]))
+  start_condition_exprs[i].add_to_nfa(nfa)
+eob_expr = regex.RegexGroup(children = [regex.RegexEmpty()])
+eob_expr.post_process(len(actions))
+print('eob expr', repr(eob_expr))
+eob_expr.add_to_nfa(nfa)
 
 dfa = nfa.to_dfa()
 #print(dfa.start_action)
@@ -441,24 +457,46 @@ static const flex_int16_t yy_chk[] = {{{6:s}
           )
         )
       elif line == '/* GENERATE SECTION2 */\n':
+        eof_action_to_start_conditions = [
+          [
+            j
+            for j in range(len(start_conditions))
+            if start_conditions[i].eof_action == i
+          ]
+          for i in range(len(eof_actions))
+        ]
         fout.write(
           '''/* GENERATE SECTION2 BEGIN */
-{0:s}  case YY_STATE_EOF(INITIAL):
-               yyterminate();
-/* GENERATE SECTION2 END */
+{0:s}{1:s}/* GENERATE SECTION2 END */
 '''.format(
             ''.join(
               [
-                '''    case {0:d}:
-               YY_RULE_SETUP
-               {1:s}           YY_BREAK
-
+                '''case {0:d}:
+YY_RULE_SETUP
+{1:s}  YY_BREAK
 '''.format(
                   i,
                   element.get_text(actions[i], 0)
                 )
                 for i in range(len(actions))
               ]
+            ),
+            ''.join(
+              [
+                '{0:s}{1:s}'.format(
+                  ''.join(
+                    [
+                      '\t\t\tcase YY_STATE_EOF({0:s}):\n'.format(
+                        start_conditions[j].name
+                      )
+                      for j in eof_action_to_start_conditions[i]
+                    ]
+                  ),
+                  eof_actions[i]
+                )
+                for i in range(len(eof_actions))
+                if len(eof_action_to_start_conditions[i]) > 0
+              ]
             )
           )
         )
index e968f27..f2b6ae7 100644 (file)
@@ -1,5 +1,5 @@
---- lex.yy.c.orig      2018-06-25 10:36:41.898822220 +1000
-+++ lex.yy.c   2018-06-28 19:47:22.872171888 +1000
+--- lex.yy.c.orig      2018-06-29 12:12:25.644004319 +1000
++++ lex.yy.c   2018-06-29 22:32:56.627837990 +1000
 @@ -1,6 +1,3 @@
 -
 -#line 2 "lex.yy.c"
@@ -11,8 +11,8 @@
        (yy_hold_char) = *yy_cp; \
        *yy_cp = '\0'; \
        (yy_c_buf_p) = yy_cp;
--#define YY_NUM_RULES 2
--#define YY_END_OF_BUFFER 3
+-#define YY_NUM_RULES 3
+-#define YY_END_OF_BUFFER 4
 -/* This struct is not used in this scanner,
 -   but its presence is necessary. */
 -struct yy_trans_info
 -      flex_int32_t yy_verify;
 -      flex_int32_t yy_nxt;
 -      };
--static const flex_int16_t yy_acclist[15] =
+-static const flex_int16_t yy_acclist[16] =
 -    {   0,
--     8193,16385, 8193,16385,    3,    2, 8193,    2,16385, 8193,
--        2, 8193,16385, 8193
+-     8194,16386,    1, 8194,16386,    4,    3, 8194,    3,16386,
+-     8194,    3, 8194,16386, 8194
 -    } ;
 -
 -static const flex_int16_t yy_accept[11] =
 -    {   0,
--        1,    3,    5,    6,    7,   10,   12,   14,   15,   15
+-        1,    3,    6,    7,    8,   11,   13,   15,   16,   16
 -    } ;
 -
 -static const flex_int16_t yy_base[11] =
  
  #define INITIAL 0
  
-@@ -777,9 +603,7 @@
+@@ -780,9 +606,7 @@
                }
  
        {
 -#line 2 "skel.l"
 -
--#line 782 "lex.yy.c"
+-#line 785 "lex.yy.c"
 +/* GENERATE SECTION2INITIAL */
  
        while ( /*CONSTCOND*/1 )                /* loops until end-of-file is reached */
                {
-@@ -816,7 +640,7 @@
+@@ -820,7 +644,7 @@
                        *(yy_state_ptr)++ = yy_current_state;
                        ++yy_cp;
                        }
  
  yy_find_action:
                yy_current_state = *--(yy_state_ptr);
-@@ -824,7 +648,7 @@
+@@ -828,7 +652,7 @@
  find_rule: /* we branch to this label when backing up */
                for ( ; ; ) /* until we find what rule we matched */
                        {
                                {
                                yy_act = yy_acclist[(yy_lp)];
                                if ( yy_act & YY_TRAILING_HEAD_MASK ||
-@@ -866,19 +690,7 @@
+@@ -870,24 +694,7 @@
  
                switch ( yy_act )
        { /* beginning of action switch */
 -case 2:
 -YY_RULE_SETUP
 -#line 4 "skel.l"
+-
+-      YY_BREAK
+-case 3:
+-YY_RULE_SETUP
+-#line 5 "skel.l"
 -ECHO;
 -      YY_BREAK
--#line 879 "lex.yy.c"
+-#line 888 "lex.yy.c"
 -                      case YY_STATE_EOF(INITIAL):
 -                              yyterminate();
 +/* GENERATE SECTION2 */
  
        case YY_END_OF_BUFFER:
                {
-@@ -1850,4 +1662,4 @@
+@@ -1862,4 +1669,4 @@
  
  #define YYTABLES_NAME "yytables"
  
--#line 4 "skel.l"
+-#line 5 "skel.l"
 +/* GENERATE SECTION3 */
index 8f6b20b..da2a6be 100644 (file)
@@ -1,3 +1,4 @@
 %option noecs nometa-ecs reject yymore
 %%
+^""
 a*/b*