Option to build python lexer without groups (simpler skeleton, more like flex)

author Nick Downing <nick@ndcode.org>

Sat, 14 May 2022 07:00:07 +0000 (17:00 +1000)

committer Nick Downing <nick@ndcode.org>

Sat, 14 May 2022 07:00:07 +0000 (17:00 +1000)
author Nick Downing <nick@ndcode.org>
Sat, 14 May 2022 07:00:07 +0000 (17:00 +1000)
committer Nick Downing <nick@ndcode.org>
Sat, 14 May 2022 07:00:07 +0000 (17:00 +1000)
diff --git a/Makefile b/Makefile

new file mode 100644 (file)

index 0000000..0b44c84
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,14 @@
+all: ndcode
+#doc
+
+doc: ndcode
+       ${MAKE} -C $@
+
+ndcode:
+       ${MAKE} -C $@
+
+clean:
+       ${MAKE} -C ndcode clean
+       #${MAKE} -C doc clean
+
+.PHONY: all doc ndcode clean
diff --git a/env.sh b/env.sh

new file mode 100644 (file)

index 0000000..4a2b5b6
--- /dev/null
+++ b/env.sh
@@ -0,0 +1 @@
+export PYTHONPATH=`pwd`
diff --git a/ndcode/Makefile b/ndcode/Makefile

new file mode 100644 (file)

index 0000000..9707668
--- /dev/null
+++ b/ndcode/Makefile
@@ -0,0 +1,7 @@
+pilex:
+       ${MAKE} -C $@
+
+clean:
+       ${MAKE} -C pilex clean
+
+.PHONY: pilex clean
diff --git a/ndcode/pilex/cli.py b/ndcode/pilex/cli.py

index ef2b7d9..e616d6f 100755 (executable)
--- a/ndcode/pilex/cli.py
+++ b/ndcode/pilex/cli.py
@@ -21,8 +21,8 @@ import os
  import sys
  from ndcode.pilex import t_def
  from ndcode.pilex import element
-from ndcode.pilex import generate_flex
-from ndcode.pilex import generate_py
+from ndcode.pilex.generate_flex import generate_flex
+from ndcode.pilex.generate_py import generate_py
  
  EXIT_SUCCESS = 0
  EXIT_FAILURE = 1
@@ -32,8 +32,8 @@ def main():
    try:
      opts, args = getopt.getopt(
        sys.argv[1:],
-      'eo:pS:',
-      ['element', 'outfile=', 'python', 'skel=']
+      'ego:pS:',
+      ['element', 'groups', 'outfile=', 'python', 'skel=']
      )
    except getopt.GetoptError as err:
      sys.stderr.write('{0:s}\n'.format(str(err)))
@@ -41,11 +41,14 @@ def main():
  
    out_file = None
    _element = False
+  groups = False
    python = False
    skel_file = None
    for opt, arg in opts:
      if opt == '-e' or opt == '--element':
        _element = True
+    elif opt == '-g' or opt == '--groups':
+      groups = True
      elif opt == '-o' or opt == '--outfile':
        out_file = arg
      elif opt == '-p' or opt == '--python':
@@ -75,12 +78,13 @@ def main():
        _ast = y_tab.yyparse(t_def.AST)
    #element.serialize(_ast, 'a.xml', 'utf-8')
    #_ast = element.deserialize('a.xml', t_def.factory, 'utf-8')
-  _ast.post_process()
+  _ast.post_process(python)
    #element.serialize(_ast, 'b.xml', 'utf-8')
    #_ast = element.deserialize('b.xml', t_def.factory, 'utf-8')
-  (generate_py.generate_py if python else generate_flex.generate_flex)(
+  (generate_py if python else generate_flex)(
      _ast,
      _element,
+    groups,
      home_dir,
      skel_file,
      out_file
diff --git a/ndcode/pilex/generate_flex.py b/ndcode/pilex/generate_flex.py

index d48dc9b..2e37688 100644 (file)
--- a/ndcode/pilex/generate_flex.py
+++ b/ndcode/pilex/generate_flex.py
@@ -17,7 +17,10 @@
  import os
  from ndcode.pilex import regex
  
-def generate_flex(_ast, _element, home_dir, skel_file, out_file):
+def generate_flex(_ast, _element, groups, home_dir, skel_file, out_file):
+  assert not _element
+  assert not groups
+
    # generate group_ref_data which emulates the old way where
    # start = even, end = odd, remaining bits = flex rule index,
    # ignoring user-defined groups by putting start = end = -1:
diff --git a/ndcode/pilex/generate_py.py b/ndcode/pilex/generate_py.py

index a21c43b..7f29bc8 100644 (file)
--- a/ndcode/pilex/generate_py.py
+++ b/ndcode/pilex/generate_py.py
@@ -53,7 +53,243 @@ def ast_text_to_python(ast_text, indent):
  def regex_text_to_python(regex_text, indent):
    return text_to_python(regex_text.get_text(), indent)
  
-def generate_py(_ast, _element, home_dir, skel_file, out_file):
+def generate_py(_ast, _element, groups, home_dir, skel_file, out_file):
+  if not groups:
+    # simplest way, which uses a direct Python port of the flex skeleton
+    # therefore it uses flex tables (most code taken from generate_flex.py)
+    assert not _element
+
+    # generate group_ref_data which emulates the old way where
+    # start = even, end = odd, remaining bits = flex rule index,
+    # ignoring user-defined groups by putting start = end = -1:
+    group_ref_data = []
+    for i in range(len(_ast.flex_rules)):
+      group_ref_data.extend(
+        [(-1, -1) for j in range(len(_ast.flex_rules[i].groups0))] +
+        [(i * 2, i * 2 + 1)] +
+        [(-1, -1) for j in range(len(_ast.flex_rules[i].groups1))]
+      )
+
+    _nfa = _ast.to_nfa(group_ref_data)
+
+    # end of buffer expression (do here because only necessary for flex)
+    eob_regex = regex.RegexGroup(children = [regex.RegexEmpty()])
+    eob_groups = []
+    eob_regex.post_process(eob_groups, caseless = _ast.children[0].caseless)
+    assert len(eob_groups) == 1
+    eob_regex.add_to_nfa(
+      _nfa,
+      [(len(_ast.actions_text) * 2, len(_ast.actions_text) * 2 + 1)]
+    )
+
+    _flex_dfa = _nfa.to_dfa().to_flex_dfa()
+
+    if skel_file is None:
+      skel_file = os.path.join(home_dir, 'skel/skel_py.py')
+    if out_file is None:
+      out_file = (
+        _ast.children[0].outfile
+      if len(_ast.children[0].outfile) else
+        'lex_{0:s}.py'.format(_ast.children[0].prefix)
+      )
+    with open(skel_file, 'r') as fin:
+      with open(out_file, 'w+') as fout:
+        line = fin.readline()
+        while len(line):
+          if line == '# GENERATE SECTION1\n':
+            fout.write(
+              '''# GENERATE SECTION1 BEGIN
+{0:s}# GENERATE END
+'''.format(
+                ''.join(
+                  [
+                    ast_text_to_python(i, '')
+                    for i in _ast.children[0].code_blocks_text
+                  ]
+                )
+              )
+            )
+          elif line == '# GENERATE STARTCONDDECL\n':
+            fout.write(
+              '''# GENERATE STARTCONDDECL BEGIN
+{0:s}# GENERATE END
+'''.format(
+                ''.join(
+                  [
+                    '{0:s} = {1:d}\n'.format(
+                      _ast.start_conditions[i].name,
+                      i
+                    )
+                    for i in range(len(_ast.start_conditions))
+                  ]
+                )
+              )
+            )
+          elif line == '# GENERATE SECTION2\n':
+            fout.write(
+              '''# GENERATE SECTION2 BEGIN
+YY_END_OF_BUFFER = {0:d}
+yy_acclist = [{1:s}
+]
+yy_accept = [{2:s}
+]
+yy_base = [{3:s}
+]
+yy_def = [{4:s}
+]
+yy_nxt = [{5:s}
+]
+yy_chk = [{6:s}
+]
+{7:s}yy_actions = [{8:s}
+]
+{9:s}yy_eof_actions = [{10:s}
+]
+# GENERATE END
+'''.format(
+                len(_ast.actions_text),
+                ','.join(
+                  [
+                    '\n  {0:s}'.format(
+                      ', '.join(
+                        [
+                          '{0:5d}'.format(j)
+                          for j in _flex_dfa.acclist[i:i + 10]
+                        ]
+                      )
+                    )
+                    for i in range(0, _flex_dfa.acclist.shape[0], 10)
+                  ]
+                ),
+                ','.join(
+                  [
+                    '\n  {0:s}'.format(
+                      ', '.join(
+                        [
+                          '{0:5d}'.format(j)
+                          for j in _flex_dfa.accept[i:i + 10]
+                        ]
+                      )
+                    )
+                    for i in range(0, _flex_dfa.accept.shape[0], 10)
+                  ]
+                ),
+                ','.join(
+                  [
+                    '\n  {0:s}'.format(
+                      ', '.join(
+                        [
+                          '{0:5d}'.format(j)
+                          for j in _flex_dfa.states[i:i + 10, 0]
+                        ]
+                      )
+                    )
+                    for i in range(0, _flex_dfa.states.shape[0], 10)
+                  ]
+                ),
+                ','.join(
+                  [
+                    '\n  {0:s}'.format(
+                      ', '.join(
+                        [
+                          '{0:5d}'.format(j)
+                          for j in _flex_dfa.states[i:i + 10, 1]
+                        ]
+                      )
+                    )
+                    for i in range(0, _flex_dfa.states.shape[0], 10)
+                  ]
+                ),
+                ','.join(
+                  [
+                    '\n  {0:s}'.format(
+                      ', '.join(
+                        [
+                          '{0:5d}'.format(j)
+                          for j in _flex_dfa.entries[i:i + 10, 0]
+                        ]
+                      )
+                    )
+                    for i in range(0, _flex_dfa.entries.shape[0], 10)
+                  ]
+                ),
+                ','.join(
+                  [
+                    '\n  {0:s}'.format(
+                      ', '.join(
+                        [
+                          '{0:5d}'.format(j)
+                          for j in _flex_dfa.entries[i:i + 10, 1]
+                        ]
+                      )
+                    )
+                    for i in range(0, _flex_dfa.entries.shape[0], 10)
+                  ]
+                ),
+                ''.join(
+                  [
+                    '''def yy_action{0:d}():
+{1:s}  raise YYContinue()
+'''.format(
+                      i,
+                      ast_text_to_python(_ast.actions_text[i], '  ')
+                    )
+                    for i in range(len(_ast.actions_text))
+                  ]
+                ),
+                ','.join(
+                  [
+                    '\n  yy_action{0:d}'.format(i)
+                    for i in range(len(_ast.actions_text))
+                  ]
+                ),
+                ''.join(
+                  [
+                    '''def yy_eof_action{0:d}():
+{1:s}  return 0
+'''.format(
+                      i,
+                      ast_text_to_python(_ast.eof_actions_text[i], '  ')
+                    )
+                    for i in range(len(_ast.eof_actions_text))
+                  ]
+                ),
+                ','.join(
+                  [
+                    '\n  yy_eof_action{0:d}'.format(i.eof_action)
+                    for i in _ast.start_conditions
+                  ]
+                )
+              )
+            )
+          elif line == '  # GENERATE SECTION2INITIAL\n':
+            fout.write(
+              '''  # GENERATE SECTION2INITIAL BEGIN
+{0:s}  # GENERATE END
+'''.format(
+                ''.join(
+                  [
+                    ast_text_to_python(i, '  ')
+                    for i in _ast.children[1].code_blocks_text
+                  ]
+                )
+              )
+            )
+          elif line == '# GENERATE SECTION3\n':
+            fout.write(
+              '''# GENERATE SECTION3 BEGIN
+{0:s}# GENERATE END
+'''.format(
+                '' if len(_ast.children) < 3 else ast_text_to_python(_ast.children[2], '')
+              )
+            )
+          else:
+            #if _ast.children[0].prefix != 'yy':
+            #  line = line.replace('yywrap', '{0:s}wrap'.format(_ast.children[0].prefix))
+            fout.write(line)
+          line = fin.readline()
+    return
+
    # generate group action function names (ref_data) and body text
    group_ref_data = []
    group_rules_text = []
@@ -182,7 +418,7 @@ yy_action = yy_action{0:d}
    if skel_file is None:
      skel_file = os.path.join(
        home_dir,
-      'skel/skel_py_element.py' if _element else 'skel/skel_py.py'
+      'skel/skel_py_element.py' if _element else 'skel/skel_py_groups.py'
      )
    if out_file is None:
      out_file = (
diff --git a/ndcode/pilex/skel/skel_flex.c b/ndcode/pilex/skel/skel_flex.c

index 36ccadf..b83d213 100644 (file)
--- a/ndcode/pilex/skel/skel_flex.c
+++ b/ndcode/pilex/skel/skel_flex.c
@@ -1,6 +1,6 @@
  #define  YY_INT_ALIGNED short int
  
-/* A lexical scanner generated by plex */
+/* A lexical scanner generated by pilex */
  
  /* GENERATE PREFIX */
  
@@ -374,9 +374,9 @@ static void yynoreturn yy_fatal_error ( const char* msg  );
  /* GENERATE TABLES */
  
  /* Table of booleans, true if rule could match eol. */
-static const flex_int32_t yy_rule_can_match_eol[4] =
+/*static const flex_int32_t yy_rule_can_match_eol[4] =
      {   0,
-0, 0, 0,     };
+0, 0, 0,     };*/
  
  extern int yy_flex_debug;
  int yy_flex_debug = 0;
@@ -727,7 +727,7 @@ find_rule: /* we branch to this label when backing up */
  
                 YY_DO_BEFORE_ACTION;
  
-               if ( yy_act != YY_END_OF_BUFFER && yy_rule_can_match_eol[yy_act] )
+               if ( yy_act != YY_END_OF_BUFFER /*&& yy_rule_can_match_eol[yy_act]*/ )
                         {
                         int yyl;
                         for ( yyl = (yy_more_len); yyl < yyleng; ++yyl )
diff --git a/ndcode/pilex/skel/skel_flex.c.patch b/ndcode/pilex/skel/skel_flex.c.patch

index b926700..81f2777 100644 (file)
--- a/ndcode/pilex/skel/skel_flex.c.patch
+++ b/ndcode/pilex/skel/skel_flex.c.patch
@@ -1,12 +1,12 @@
---- skel_flex.c.orig   2018-08-10 22:20:19.412002384 +1000
-+++ skel_flex.c        2018-08-10 22:27:02.848008378 +1000
+--- skel_flex.c.orig   2020-09-27 23:07:53.072425893 +1000
++++ skel_flex.c        2020-09-27 23:08:37.504423504 +1000
  @@ -1,8 +1,8 @@
  -
  -
   #define  YY_INT_ALIGNED short int
   
  -/* A lexical scanner generated by flex */
-+/* A lexical scanner generated by plex */
++/* A lexical scanner generated by pilex */
  +
  +/* GENERATE PREFIX */
   
@@ -20,7 +20,7 @@
   typedef flex_uint8_t YY_CHAR;
   
   FILE *yyin = NULL, *yyout = NULL;
-@@ -369,179 +370,8 @@
+@@ -369,184 +370,13 @@
         (yy_hold_char) = *yy_cp; \
         *yy_cp = '\0'; \
         (yy_c_buf_p) = yy_cp;
@@ -200,7 +200,14 @@
  +/* GENERATE TABLES */
   
   /* Table of booleans, true if rule could match eol. */
- static const flex_int32_t yy_rule_can_match_eol[4] =
+-static const flex_int32_t yy_rule_can_match_eol[4] =
++/*static const flex_int32_t yy_rule_can_match_eol[4] =
+     {   0,
+-0, 0, 0,     };
++0, 0, 0,     };*/
+ 
+ extern int yy_flex_debug;
+ int yy_flex_debug = 0;
  @@ -577,7 +407,9 @@
   #define YY_RESTORE_YY_MORE_OFFSET
   char *yytext;
@@ -239,6 +246,15 @@
                                 {
                                 yy_act = yy_acclist[(yy_lp)];
                                 if ( yy_act & YY_TRAILING_HEAD_MASK ||
+@@ -895,7 +727,7 @@
+ 
+               YY_DO_BEFORE_ACTION;
+ 
+-              if ( yy_act != YY_END_OF_BUFFER && yy_rule_can_match_eol[yy_act] )
++              if ( yy_act != YY_END_OF_BUFFER /*&& yy_rule_can_match_eol[yy_act]*/ )
+                       {
+                       int yyl;
+                       for ( yyl = (yy_more_len); yyl < yyleng; ++yyl )
  @@ -909,20 +741,7 @@
   
                 switch ( yy_act )
diff --git a/ndcode/pilex/skel/skel_py.py b/ndcode/pilex/skel/skel_py.py

index 6b228b6..9e9dfe8 100644 (file)
--- a/ndcode/pilex/skel/skel_py.py
+++ b/ndcode/pilex/skel/skel_py.py
@@ -27,6 +27,9 @@
  import bisect
  import sys
  
+YY_TRAILING_MASK = 0x2000
+YY_TRAILING_HEAD_MASK = 0x4000
+
  # this can be redefined in SECTION1
  def YY_AT_BOL():
    return yy_buffer_stack[-1].at_bol
@@ -58,6 +61,12 @@ class YYContinue(Exception):
  class YYTerminate(Exception):
    pass
  
+# each input file is represented as a linked list in which the
+# head is a YYBufferState and the following items (if any) are
+# YYBufferBlock -- the YYBufferList parent class is defined so
+# that we can guarantee the "previous" item to any particular
+# YYBufferBlock is at least a YYBufferList and hence deletion
+# (or other operation that needs the "previous" item) is easy
  class YYBufferList:
    def __init__(self, next = None):
      self.next = next
@@ -80,21 +89,19 @@ yy_buffer_stack = [YYBufferState()]
  
  yystart = INITIAL
  yystart_stack = []
-yy_threads0 = [None]
-yy_threads1 = [None]
-yy_prefix_slop = 1
-
-yy_group_text = None
-yy_group_stack = None
-yy_groups = None
-yy_groups_by_name = None
-yy_action = None
+
  yytext = ''
  yytext_len = 0
  
+yylineno = 0
+
+yy_state_buf = []
+
  YY_NULL = 0
  
  def REJECT():
+  # can't have already committed (can't call unput then REJECT)
+  assert yytext_len == len(yytext) # cannot have already committed
    raise YYReject()
  
  def yyterminate():
@@ -102,12 +109,20 @@ def yyterminate():
  
  def yyless(i):
    global yytext, yytext_len
+  # can't have already committed (can't call unput then yyless)
+  assert yytext_len == len(yytext)
    assert yytext_len >= i
    yytext = yytext[:i]
    yytext_len = i
  
-def unput(text):
-  global yyin, yytext_len
+def yycommit():
+  # now user code cannot call REJECT() / yyless() anymore,
+  # thus we know the match was of length yytext_len and we
+  # can skip that many chars, popping buffer stack for real
+  # we leave yytext == 0 afterwards, so double commit is OK
+  # (double commit occurs if user calls the unput function)
+  global yytext_len
+  print('commit', yytext_len)
    while yytext_len:
      block = yy_buffer_stack[-1].next
      while block is None or block.pos >= len(block.text):
@@ -121,21 +136,14 @@ def unput(text):
      i = min(yytext_len, len(block.text) - block.pos)
      block.pos += i
      yytext_len -= i
+
+def unput(text):
+  yycommit() # user cannot call REJECT() / yyless() anymore
    yy_buffer_stack[-1].next = YYBufferBlock(yy_buffer_stack[-1].next, 0, text)
  
  def ECHO():
    yyout.write(yytext)
  
-def yy_rule_start():
-  global yytext, yytext_len
-  yytext = yy_group_text[:yy_group_stack[-1]]
-  yytext_len = yy_group_stack[-1]
-  del yy_group_stack[-2:]
-  YY_RULE_START()
-
-def yy_group_end():
-  pass
-
  def BEGIN(start):
    global yystart
    yystart = start
@@ -155,91 +163,54 @@ def yy_pop_state():
  # GENERATE SECTION2
  
  def yylex():
-  global \
-    yyin, \
-    yy_threads0, \
-    yy_threads1, \
-    yy_prefix_slop, \
-    yy_group_text, \
-    yy_group_stack, \
-    yy_action, \
-    yytext, \
-    yytext_len
+  global yyin, yytext, yytext_len, yylineno
  
    # GENERATE SECTION2INITIAL
  
-  while True:
-    while yytext_len:
-      block = yy_buffer_stack[-1].next
-      while block is None or block.pos >= len(block.text):
-        if block is None:
-          yy_buffer_stack.pop()
-          block = yy_buffer_stack[-1].next
-          yyin = yy_buffer_stack[-1].file_in
-        else:
-          block = block.next
-          yy_buffer_stack[-1].next = block
-      i = min(yytext_len, len(block.text) - block.pos)
-      block.pos += i
-      yytext_len -= i
+  # user may modify yyin, but only while outside yylex()
+  yy_buffer_stack[-1].file_in = yyin
  
+  # start:
+  while True:
+    # this will collect the remainder of each block as the block is
+    # exhausted, any partial block will be added after DFA is stuck
      match = ''
-    match_len = 0
  
-    del yy_threads0[yy_prefix_slop:]
-    yy_threads0.append(None)
+    # this is used to keep track of newlines, firstly during the scan
+    # and secondly when user calls REJECT() or yyless(), basically we
+    # will count newlines added/removed when it is extended/truncated
+    yytext = ''
  
+    # as we progress through the text and exhaust each input file
+    # on the buffer stack, we will "virtually" pop the stack by
+    # decrementing buffer_ptr, but actually leave the stack alone
+    # since it will be popped for real after executing the action
      buffer_ptr = len(yy_buffer_stack) - 1
+
+    # block variable keeps track of where we are in current file's
+    # linked list, if None it means we have run out of blocks from
+    # the current file and in that case we have to append another
+    # block -- we maintain the invariant block == block_prev.next
+    # so we can append the block by pointing block_prev.next at it
      block_prev = yy_buffer_stack[buffer_ptr]
      block = block_prev.next
      if block is not None:
        block_pos = block.pos
  
-    action = yy_dfa_start_action[
-      yystart * 2 + int(yy_buffer_stack[-1].at_bol)
-    ]
-    while action != -1:
-      state, transition = yy_dfa_actions[action]
-      #print('i', i, 'action', action, 'state', state, 'transition', transition)
-
-      i = yy_prefix_slop
-      assert len(yy_threads1) == yy_prefix_slop
-      for trans in transition:
-        if trans[0] == 0: #DFA.TRANSITION_POP:
-          i += trans[1]
-        elif trans[0] == 1: #DFA.TRANSITION_DUP:
-          while i < trans[1]:
-            yy_threads0[:0] = [None] * yy_prefix_slop
-            yy_threads1[:0] = [None] * yy_prefix_slop
-            i += yy_prefix_slop
-            yy_prefix_slop *= 2
-          yy_threads0[i - trans[1]:i] = yy_threads0[i:i + trans[1]]
-          i -= trans[1]
-        elif trans[0] == 2: #DFA.TRANSITION_MARK:
-          yy_threads0[i:i + trans[1]] = [
-            (match_len, trans[2], thread)
-            for thread in yy_threads0[i:i + trans[1]]
-          ]
-        elif trans[0] == 3: #DFA.TRANSITION_MOVE:
-          yy_threads1.extend(yy_threads0[i:i + trans[1]])
-          i += trans[1]
-        #elif trans[0] == DFA.TRANSITION_DEL:
-        #  del yy_threads1[-trans[1]:]
-        else:
-          assert False
-      assert i == len(yy_threads0)
-      yy_threads0, yy_threads1 = yy_threads1, yy_threads0
-      del yy_threads1[yy_prefix_slop:]
-
-      if state == 0:
-        # there is only one match, which is complete
-        assert len(yy_threads0) == yy_prefix_slop + 1
-        assert yy_dfa_states[state][2] == [0]
-        break
-
-      yy_buffer_stack[-1].file_in = yyin
+    # yy_current_state variable keeps track of where we are in the
+    # DFA, and yy_state_buf keeps track of the historical values of
+    # yy_current_state before each character -- when DFA is stuck
+    # we will look backwards in yy_state_buf for an accepting state
+    yy_current_state = yystart * 2 + 1 + YY_AT_BOL()
+    del yy_state_buf[:]
+
+    # yy_match:
+    print('yy_match')
+    while True:
+      # try to get a character
        while block is None or block_pos >= len(block.text):
          if block is None:
+          # out of blocks, read and append one from current file
            file_in = yy_buffer_stack[buffer_ptr].file_in
            text = '' if file_in is None else file_in.readline()
            if len(text):
@@ -247,73 +218,151 @@ def yylex():
              block_pos = 0
              block_prev.next = block
            else:
-            # do not re-attempt read once EOF is reached
+            # EOF for current file
+
+            # mark file as being exhausted, so we do not re-attempt read
              yy_buffer_stack[buffer_ptr].file_in = None
-            yyin = yy_buffer_stack[-1].file_in
+
+            # "virtually" pop the buffer stack, maintaining invariant
              buffer_ptr -= 1
              if buffer_ptr < 0:
-              break # EOF
+              # EOF and no more input stacked (should call yywrap here?)
+              if len(yy_state_buf):
+                # had some characters already, so go and process them first
+                break # goto yy_find_action
+              # EOF on attempt to get first character, means we are done
+              try:
+                token = yy_eof_actions[yystart]()
+                yyin = yy_buffer_stack[-1].file_in = yyin # user may modify
+                return token
+              except YYTerminate:
+                yyin = yy_buffer_stack[-1].file_in = yyin # user may modify
+                return 0
              block_prev = yy_buffer_stack[buffer_ptr]
              block = block_prev.next
              if block is not None:
                block_pos = block.pos
          else:
-          i = match_len - len(match)
+          # out of data for current block
+
+          # take most recent data from block for yytext
+          # at this point len(yy_state_buf) == number of chars scanned
+          i = len(yy_state_buf) - len(match)
            if i:
              match += block.text[block_pos - i:]
+
+          # advance past exhausted block, maintaining invariant
            block_prev = block
            block = block_prev.next
            if block is not None:
              block_pos = block.pos
-      else: 
-        #print('block_pos', block_pos, 'block.text', block.text)
-        action = yy_dfa_states[state][1][
-          bisect.bisect_right(
-            yy_dfa_states[state][0],
-            ord(block.text[block_pos])
-          )
-        ]
-        block_pos += 1
-        match_len += 1
-        continue
-      # EOF
-      if i == 0:
-        try:
-          return yy_eof_actions[yystart]()
-        except YYTerminate:
-          return 0
-      break
-
-    i = match_len - len(match)
+      else:
+        # character available
+        yy_state_buf.append(yy_current_state)
+        yy_c = ord(block.text[block_pos])
+        print('yy_c', yy_c)
+        while yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state:
+          yy_current_state = yy_def[yy_current_state];
+        yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c];
+        if yy_base[yy_current_state] != 0:
+          block_pos += 1
+          continue # goto yy_match
+
+      # we get here when either there is EOF and had some characters, or
+      # we had a character which makes DFA stuck -- do best action we can
+      break # goto yy_find_action
+
+    # if the following assertion fails there is a hole in state machine,
+    # this is not supposed to happen due to the automatic catch-all rule
+    yy_state_ptr = len(yy_state_buf) - 1
+    assert yy_state_ptr >= 0
+    yy_current_state = yy_state_buf[yy_state_ptr]
+    yy_lp = yy_accept[yy_current_state]
+
+    # take most recent data from block for yytext
+    # at this point yy_state_ptr == number of chars scanned
+    i = yy_state_ptr - len(match)
      if i:
-      assert block is not None
        match += block.text[block_pos - i:]
  
-    for i in yy_dfa_states[state][2]:
-      yy_group_text = match
-      yy_group_stack = []
-      yy_groups = None
-      yy_groups_by_name = None
-      yy_action = None
-      yytext = None
-      yytext_len = None
-
-      thread = yy_threads0[yy_prefix_slop + i]
-      #print('thread', thread)
-      while thread is not None:
-        pos, ref_data, thread = thread
-        yy_group_stack.append(pos)
-        ref_data()
-
-      try:
-        return yy_action()
-      except YYReject:
-        pass
-      except YYContinue:
-        break
-      except YYTerminate:
-        return 0
+    # yy_find_action:
+    # the following variables are used to handle trailing context
+    # suppose there is a rule like: foo/bar
+    # then state after 3 characters will be given YY_TRAILING_MASK
+    # and state after 6 characters will be given YY_TRAILING_HEAD_MASK
+    # so we look backwards for the YY_TRAILING_MASK, and when we find
+    # it, store the state after 6 characters into yy_full_XXX, then we
+    # keep looking for YY_TRAILING_HEAD_MASK, and when we find it, we
+    # execute the action -- and if it rejects then restart the search,
+    # but from the 6 characters point (yy_full_XXX) not 3 characters
+    yy_looking_for_trail_begin = 0
+    yy_full_state_ptr = None
+    yy_full_lp = None
+
+    # find_rule:
+    # we branch to this label when backing up (i.e. reject)
+    while True: # until we find what rule matched
+      if yy_lp < yy_accept[yy_current_state + 1]:
+        yy_act = yy_acclist[yy_lp]
+        if (yy_act & YY_TRAILING_HEAD_MASK) or yy_looking_for_trail_begin:
+          if yy_act == yy_looking_for_trail_begin:
+            yy_looking_for_trail_begin = 0
+            yy_act &= ~YY_TRAILING_HEAD_MASK
+            break
+        elif yy_act & YY_TRAILING_MASK:
+          yy_looking_for_trail_begin = (
+            (yy_act & ~YY_TRAILING_MASK) | YY_TRAILING_HEAD_MASK
+          )
+          yy_full_state_ptr = yy_state_ptr
+          yy_full_lp = yy_lp
+        else:
+          yy_full_state_ptr = yy_state_ptr
+          yy_full_lp = yy_lp
+          break
+        yy_lp += 1
+      else:
+        # if the following assertion fails there is a hole in state machine,
+        # this is not supposed to happen due to the automatic catch-all rule
+        yy_state_ptr -= 1
+        assert yy_state_ptr >= 0
+        yy_current_state = yy_state_buf[yy_state_ptr]
+        yy_lp = yy_accept[yy_current_state]
+
+    # at this point yy_state_ptr == number of chars needed
+    # truncate or extend yytext from match, counting added/removed newlines
+    if len(yytext) < yy_state_ptr:
+      i = match.find('\n', len(yytext), yy_state_ptr)
+      while i >= 0:
+        yylineno += 1
+        i = match.find('\n', i + 1, yy_state_ptr)
      else:
-      raise Exception('scanner jammed')
+      i = yytext.find('\n', yy_state_ptr)
+      while i >= 0:
+        yylineno -= 1
+        i = yytext.find('\n', i + 1)
+    yytext = match[:yy_state_ptr]
+
+    try:
+      yytext_len = len(yytext) # this is used by yycommit()
+      token = yy_actions[yy_act]()
+      yycommit() # user cannot call REJECT() / yyless() anymore
+      yyin = yy_buffer_stack[-1].file_in = yyin # user may modify
+      return token
+    except YYReject:
+      yy_state_ptr = yy_full_state_ptr
+      yy_current_state = yy_state_buf[yy_state_ptr]
+      yy_lp = yy_full_lp + 1
+      # goto find_rule
+    except YYContinue:
+      yycommit() # user cannot call REJECT() / yyless() anymore
+      break # goto start
+    except YYTerminate:
+      yycommit() # user cannot call REJECT() / yyless() anymore
+      yyin = yy_buffer_stack[-1].file_in = yyin # user may modify
+      return 0
+
+    # goto find_rule
+
+  # goto start
  
  # GENERATE SECTION3
diff --git a/ndcode/pilex/skel/skel_py_groups.py b/ndcode/pilex/skel/skel_py_groups.py

new file mode 100644 (file)

index 0000000..6b228b6
--- /dev/null
+++ b/ndcode/pilex/skel/skel_py_groups.py
@@ -0,0 +1,319 @@
+# Copyright (C) 2019 Nick Downing <nick@ndcode.org>
+# SPDX-License-Identifier: GPL-2.0-with-bison-exception
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 51
+# Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+# As a special exception, you may create a larger work that contains part or
+# all of the pilex lexical scanner skeleton and distribute that work under
+# terms of your choice, so long as that work isn't itself a lexical scanner
+# generator using the skeleton or a modified version thereof as a lexical
+# scanner skeleton. Alternatively, if you modify or redistribute the lexical
+# scanner skeleton itself, you may (at your option) remove this special
+# exception, which will cause the skeleton and the resulting pilex output
+# files to be licensed under the GNU General Public License without this
+# special exception.
+
+import bisect
+import sys
+
+# this can be redefined in SECTION1
+def YY_AT_BOL():
+  return yy_buffer_stack[-1].at_bol
+
+def yy_set_bol(at_bol):
+  yy_buffer_stack[-1].at_bol = at_bol
+
+def YY_USER_ACTION():
+  pass
+
+def YY_RULE_START():
+  # note that this should also be done after yyless() and REJECT(),
+  # and state should be saved in case they result in a null string,
+  # however, it doesn't seem to be in flex, maintain compatibility:
+  if len(yytext):
+    yy_set_bol(yytext[-1] == '\n')
+  YY_USER_ACTION()
+
+# GENERATE SECTION1
+
+# GENERATE STARTCONDDECL
+
+class YYReject(Exception):
+  pass
+
+class YYContinue(Exception):
+  pass
+
+class YYTerminate(Exception):
+  pass
+
+class YYBufferList:
+  def __init__(self, next = None):
+    self.next = next
+
+class YYBufferBlock(YYBufferList):
+  def __init__(self, next = None, pos = 0, text = ''):
+    YYBufferList.__init__(self, next)
+    self.pos = pos
+    self.text = text
+
+class YYBufferState(YYBufferList):
+  def __init__(self, next = None, file_in = None, at_bol = True):
+    YYBufferList.__init__(self, next)
+    self.file_in = file_in
+    self.at_bol = at_bol
+
+yyin = sys.stdin
+yyout = sys.stdout
+yy_buffer_stack = [YYBufferState()]
+
+yystart = INITIAL
+yystart_stack = []
+yy_threads0 = [None]
+yy_threads1 = [None]
+yy_prefix_slop = 1
+
+yy_group_text = None
+yy_group_stack = None
+yy_groups = None
+yy_groups_by_name = None
+yy_action = None
+yytext = ''
+yytext_len = 0
+
+YY_NULL = 0
+
+def REJECT():
+  raise YYReject()
+
+def yyterminate():
+  raise YYTerminate()
+
+def yyless(i):
+  global yytext, yytext_len
+  assert yytext_len >= i
+  yytext = yytext[:i]
+  yytext_len = i
+
+def unput(text):
+  global yyin, yytext_len
+  while yytext_len:
+    block = yy_buffer_stack[-1].next
+    while block is None or block.pos >= len(block.text):
+      if block is None:
+        yy_buffer_stack.pop()
+        block = yy_buffer_stack[-1].next
+        yyin = yy_buffer_stack[-1].file_in
+      else:
+        block = block.next
+        yy_buffer_stack[-1].next = block
+    i = min(yytext_len, len(block.text) - block.pos)
+    block.pos += i
+    yytext_len -= i
+  yy_buffer_stack[-1].next = YYBufferBlock(yy_buffer_stack[-1].next, 0, text)
+
+def ECHO():
+  yyout.write(yytext)
+
+def yy_rule_start():
+  global yytext, yytext_len
+  yytext = yy_group_text[:yy_group_stack[-1]]
+  yytext_len = yy_group_stack[-1]
+  del yy_group_stack[-2:]
+  YY_RULE_START()
+
+def yy_group_end():
+  pass
+
+def BEGIN(start):
+  global yystart
+  yystart = start
+
+def YY_START():
+  return yystart
+
+def yy_push_state(start):
+  global yystart
+  yystart_stack.append(yystart)
+  yystart = start
+
+def yy_pop_state():
+  global yystart
+  yystart = yystart_stack.pop()
+
+# GENERATE SECTION2
+
+def yylex():
+  global \
+    yyin, \
+    yy_threads0, \
+    yy_threads1, \
+    yy_prefix_slop, \
+    yy_group_text, \
+    yy_group_stack, \
+    yy_action, \
+    yytext, \
+    yytext_len
+
+  # GENERATE SECTION2INITIAL
+
+  while True:
+    while yytext_len:
+      block = yy_buffer_stack[-1].next
+      while block is None or block.pos >= len(block.text):
+        if block is None:
+          yy_buffer_stack.pop()
+          block = yy_buffer_stack[-1].next
+          yyin = yy_buffer_stack[-1].file_in
+        else:
+          block = block.next
+          yy_buffer_stack[-1].next = block
+      i = min(yytext_len, len(block.text) - block.pos)
+      block.pos += i
+      yytext_len -= i
+
+    match = ''
+    match_len = 0
+
+    del yy_threads0[yy_prefix_slop:]
+    yy_threads0.append(None)
+
+    buffer_ptr = len(yy_buffer_stack) - 1
+    block_prev = yy_buffer_stack[buffer_ptr]
+    block = block_prev.next
+    if block is not None:
+      block_pos = block.pos
+
+    action = yy_dfa_start_action[
+      yystart * 2 + int(yy_buffer_stack[-1].at_bol)
+    ]
+    while action != -1:
+      state, transition = yy_dfa_actions[action]
+      #print('i', i, 'action', action, 'state', state, 'transition', transition)
+
+      i = yy_prefix_slop
+      assert len(yy_threads1) == yy_prefix_slop
+      for trans in transition:
+        if trans[0] == 0: #DFA.TRANSITION_POP:
+          i += trans[1]
+        elif trans[0] == 1: #DFA.TRANSITION_DUP:
+          while i < trans[1]:
+            yy_threads0[:0] = [None] * yy_prefix_slop
+            yy_threads1[:0] = [None] * yy_prefix_slop
+            i += yy_prefix_slop
+            yy_prefix_slop *= 2
+          yy_threads0[i - trans[1]:i] = yy_threads0[i:i + trans[1]]
+          i -= trans[1]
+        elif trans[0] == 2: #DFA.TRANSITION_MARK:
+          yy_threads0[i:i + trans[1]] = [
+            (match_len, trans[2], thread)
+            for thread in yy_threads0[i:i + trans[1]]
+          ]
+        elif trans[0] == 3: #DFA.TRANSITION_MOVE:
+          yy_threads1.extend(yy_threads0[i:i + trans[1]])
+          i += trans[1]
+        #elif trans[0] == DFA.TRANSITION_DEL:
+        #  del yy_threads1[-trans[1]:]
+        else:
+          assert False
+      assert i == len(yy_threads0)
+      yy_threads0, yy_threads1 = yy_threads1, yy_threads0
+      del yy_threads1[yy_prefix_slop:]
+
+      if state == 0:
+        # there is only one match, which is complete
+        assert len(yy_threads0) == yy_prefix_slop + 1
+        assert yy_dfa_states[state][2] == [0]
+        break
+
+      yy_buffer_stack[-1].file_in = yyin
+      while block is None or block_pos >= len(block.text):
+        if block is None:
+          file_in = yy_buffer_stack[buffer_ptr].file_in
+          text = '' if file_in is None else file_in.readline()
+          if len(text):
+            block = YYBufferBlock(None, 0, text)
+            block_pos = 0
+            block_prev.next = block
+          else:
+            # do not re-attempt read once EOF is reached
+            yy_buffer_stack[buffer_ptr].file_in = None
+            yyin = yy_buffer_stack[-1].file_in
+            buffer_ptr -= 1
+            if buffer_ptr < 0:
+              break # EOF
+            block_prev = yy_buffer_stack[buffer_ptr]
+            block = block_prev.next
+            if block is not None:
+              block_pos = block.pos
+        else:
+          i = match_len - len(match)
+          if i:
+            match += block.text[block_pos - i:]
+          block_prev = block
+          block = block_prev.next
+          if block is not None:
+            block_pos = block.pos
+      else: 
+        #print('block_pos', block_pos, 'block.text', block.text)
+        action = yy_dfa_states[state][1][
+          bisect.bisect_right(
+            yy_dfa_states[state][0],
+            ord(block.text[block_pos])
+          )
+        ]
+        block_pos += 1
+        match_len += 1
+        continue
+      # EOF
+      if i == 0:
+        try:
+          return yy_eof_actions[yystart]()
+        except YYTerminate:
+          return 0
+      break
+
+    i = match_len - len(match)
+    if i:
+      assert block is not None
+      match += block.text[block_pos - i:]
+
+    for i in yy_dfa_states[state][2]:
+      yy_group_text = match
+      yy_group_stack = []
+      yy_groups = None
+      yy_groups_by_name = None
+      yy_action = None
+      yytext = None
+      yytext_len = None
+
+      thread = yy_threads0[yy_prefix_slop + i]
+      #print('thread', thread)
+      while thread is not None:
+        pos, ref_data, thread = thread
+        yy_group_stack.append(pos)
+        ref_data()
+
+      try:
+        return yy_action()
+      except YYReject:
+        pass
+      except YYContinue:
+        break
+      except YYTerminate:
+        return 0
+    else:
+      raise Exception('scanner jammed')
+
+# GENERATE SECTION3
diff --git a/tests/Makefile b/tests/Makefile

index b60828d..a0e3f86 100644 (file)
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -1,44 +1,51 @@
  CFLAGS += -g
  
-all: lex_yy.py cal flex0 flex1
+all: lex_yy.py lex_yy_groups.py cal flex0 flex1
  
  # Python scanner test
  lex_yy.py: cal_py.l
-       ../pilex.py --python $<
+       ../ndcode/pilex/cli.py --python $<
+lex_yy_groups.py: cal_py_groups.l
+       ../ndcode/pilex/cli.py --python --groups -o $@ $<
  
  # cal program
-cal: y.tab.o
-       ${CC} ${CFLAGS} -o $@ $<
-
-y.tab.o: y.tab.c lex.yy.c
-
-y.tab.c: cal.y
-       bison -y $<
-
-lex.yy.c: cal.l
-       ../pilex.py $<
+#cal: y.tab.o
+#      ${CC} ${CFLAGS} -o $@ $<
+#
+#y.tab.o: y.tab.c lex.yy.c
+#
+#y.tab.c: cal.y
+#      bison -y $<
+
+cal: cal.o lex.yy.o
+       ${CC} ${CFLAGS} -o $@ $^
+
+lex.yy.c: cal_c.l
+       ../ndcode/pilex/cli.py $<
         # add the patch for state machine diagnostic
         #cp $@ $@.orig
         #patch $@ <$@.patch
  
  # flex0 program
+# requires apt install libfl-dev
  flex0: flex0.o
-       ${CC} ${CFLAGS} -o $@ $< -ll
+       ${CC} ${CFLAGS} -o $@ $< -lfl
  
  flex0.o: flex0.c
  
  flex0.c: flex0.l
-       ../pilex.py -o $@ $<
+       ../ndcode/pilex/cli.py -o $@ $<
  
  # flex1 program
+# requires apt install libfl-dev
  flex1: flex1.o
-       ${CC} ${CFLAGS} -o $@ $< -ll
+       ${CC} ${CFLAGS} -o $@ $< -lfl
  
  flex1.o: flex1.c
  
  flex1.c: flex1.l
-       ../pilex.py -o $@ $<
+       ../ndcode/pilex/cli.py -o $@ $<
  
  # other
  clean:
-       rm -f *.c *.o lex_yy.py cal flex0 flex1
+       rm -f *.c *.o lex_yy.py lex_yy_groups.py cal flex0 flex1
diff --git a/tests/cal.l b/tests/cal_c.l

similarity index 87%

rename from tests/cal.l

rename to tests/cal_c.l

index bbeb981..8e080a7 100644 (file)
--- a/tests/cal.l
+++ b/tests/cal_c.l
@@ -1,5 +1,6 @@
  %{
-/* this is section 1 */
+#define NUM 0x100
+double yylval;
  %}
  
  DIGIT [0-9]+\.?|[0-9]*\.[0-9]+
diff --git a/tests/cal_groups.py b/tests/cal_groups.py

new file mode 100755 (executable)

index 0000000..dd77188
--- /dev/null
+++ b/tests/cal_groups.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python3
+
+# Copyright (C) 2019 Nick Downing <nick@ndcode.org>
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 51
+# Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+
+import lex_yy_groups
+
+token = lex_yy_groups.yylex()
+while token != lex_yy_groups.YY_NULL:
+  if token == lex_yy_groups.NUM:
+    print('NUM', lex_yy_groups.yylval)
+  else:
+    print('{0:02x}'.format(token))
+  token = lex_yy_groups.yylex()
diff --git a/tests/cal_py.l b/tests/cal_py.l

index d0879aa..c2e8d00 100644 (file)
--- a/tests/cal_py.l
+++ b/tests/cal_py.l
@@ -21,37 +21,24 @@ NUM = 0x100
  yylval = None
  %}
  
-DIGIT (?'mantissa'[0-9]+)\.?|(?'mantissa'[0-9]*)\.(?'fraction'[0-9]+)
+DIGIT [0-9]+\.?|[0-9]*\.[0-9]+
  
  %option noecs nometa-ecs noyywrap reject yymore
  
  %%
  
+       # this is section 2 initial
+
  [ ]
  {DIGIT}        {
    global yylval
-  print('yy_groups', yy_groups)
-  print('yy_groups_by_name', yy_groups_by_name)
-
-  # by Python parser:
-  #yylval = float(yytext)
-
-  # by group numbers:
-  #if yy_groups[1] is not None:
-  #  mantissa = yy_groups[1]
-  #  fraction = ''
-  #else:
-  #  mantissa = yy_groups[2]
-  #  fraction = yy_groups[3]
-  #yylval = int(mantissa + fraction) * 10. ** -len(fraction)
-
-  # by group names:
-  mantissa = yy_groups_by_name['mantissa']
-  fraction = yy_groups_by_name.get('fraction', '')
-  yylval = int(mantissa + fraction) * 10. ** -len(fraction)
-
+  yylval = float(yytext)
    return NUM
  }
  \n|.   {
    return ord(yytext[0])
  }
+
+%%
+
+# this is section 3
diff --git a/tests/cal_py_groups.l b/tests/cal_py_groups.l

new file mode 100644 (file)

index 0000000..d0879aa
--- /dev/null
+++ b/tests/cal_py_groups.l
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2019 Nick Downing <nick@ndcode.org>
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+%{
+NUM = 0x100 
+yylval = None
+%}
+
+DIGIT (?'mantissa'[0-9]+)\.?|(?'mantissa'[0-9]*)\.(?'fraction'[0-9]+)
+
+%option noecs nometa-ecs noyywrap reject yymore
+
+%%
+
+[ ]
+{DIGIT}        {
+  global yylval
+  print('yy_groups', yy_groups)
+  print('yy_groups_by_name', yy_groups_by_name)
+
+  # by Python parser:
+  #yylval = float(yytext)
+
+  # by group numbers:
+  #if yy_groups[1] is not None:
+  #  mantissa = yy_groups[1]
+  #  fraction = ''
+  #else:
+  #  mantissa = yy_groups[2]
+  #  fraction = yy_groups[3]
+  #yylval = int(mantissa + fraction) * 10. ** -len(fraction)
+
+  # by group names:
+  mantissa = yy_groups_by_name['mantissa']
+  fraction = yy_groups_by_name.get('fraction', '')
+  yylval = int(mantissa + fraction) * 10. ** -len(fraction)
+
+  return NUM
+}
+\n|.   {
+  return ord(yytext[0])
+}
diff --git a/tests/lex_yy_groups.py b/tests/lex_yy_groups.py

new file mode 100644 (file)

index 0000000..7ae9004
--- /dev/null
+++ b/tests/lex_yy_groups.py
@@ -0,0 +1,466 @@
+# Copyright (C) 2019 Nick Downing <nick@ndcode.org>
+# SPDX-License-Identifier: GPL-2.0-with-bison-exception
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 51
+# Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+# As a special exception, you may create a larger work that contains part or
+# all of the pilex lexical scanner skeleton and distribute that work under
+# terms of your choice, so long as that work isn't itself a lexical scanner
+# generator using the skeleton or a modified version thereof as a lexical
+# scanner skeleton. Alternatively, if you modify or redistribute the lexical
+# scanner skeleton itself, you may (at your option) remove this special
+# exception, which will cause the skeleton and the resulting pilex output
+# files to be licensed under the GNU General Public License without this
+# special exception.
+
+import bisect
+import sys
+
+# this can be redefined in SECTION1
+def YY_AT_BOL():
+  return yy_buffer_stack[-1].at_bol
+
+def yy_set_bol(at_bol):
+  yy_buffer_stack[-1].at_bol = at_bol
+
+def YY_USER_ACTION():
+  pass
+
+def YY_RULE_START():
+  # note that this should also be done after yyless() and REJECT(),
+  # and state should be saved in case they result in a null string,
+  # however, it doesn't seem to be in flex, maintain compatibility:
+  if len(yytext):
+    yy_set_bol(yytext[-1] == '\n')
+  YY_USER_ACTION()
+
+# GENERATE SECTION1 BEGIN
+NUM = 0x100 
+yylval = None
+# GENERATE END
+
+# GENERATE STARTCONDDECL BEGIN
+INITIAL = 0
+# GENERATE END
+
+class YYReject(Exception):
+  pass
+
+class YYContinue(Exception):
+  pass
+
+class YYTerminate(Exception):
+  pass
+
+class YYBufferList:
+  def __init__(self, next = None):
+    self.next = next
+
+class YYBufferBlock(YYBufferList):
+  def __init__(self, next = None, pos = 0, text = ''):
+    YYBufferList.__init__(self, next)
+    self.pos = pos
+    self.text = text
+
+class YYBufferState(YYBufferList):
+  def __init__(self, next = None, file_in = None, at_bol = True):
+    YYBufferList.__init__(self, next)
+    self.file_in = file_in
+    self.at_bol = at_bol
+
+yyin = sys.stdin
+yyout = sys.stdout
+yy_buffer_stack = [YYBufferState()]
+
+yystart = INITIAL
+yystart_stack = []
+yy_threads0 = [None]
+yy_threads1 = [None]
+yy_prefix_slop = 1
+
+yy_group_text = None
+yy_group_stack = None
+yy_groups = None
+yy_groups_by_name = None
+yy_action = None
+yytext = ''
+yytext_len = 0
+
+YY_NULL = 0
+
+def REJECT():
+  raise YYReject()
+
+def yyterminate():
+  raise YYTerminate()
+
+def yyless(i):
+  global yytext, yytext_len
+  assert yytext_len >= i
+  yytext = yytext[:i]
+  yytext_len = i
+
+def unput(text):
+  global yyin, yytext_len
+  while yytext_len:
+    block = yy_buffer_stack[-1].next
+    while block is None or block.pos >= len(block.text):
+      if block is None:
+        yy_buffer_stack.pop()
+        block = yy_buffer_stack[-1].next
+        yyin = yy_buffer_stack[-1].file_in
+      else:
+        block = block.next
+        yy_buffer_stack[-1].next = block
+    i = min(yytext_len, len(block.text) - block.pos)
+    block.pos += i
+    yytext_len -= i
+  yy_buffer_stack[-1].next = YYBufferBlock(yy_buffer_stack[-1].next, 0, text)
+
+def ECHO():
+  yyout.write(yytext)
+
+def yy_rule_start():
+  global yytext, yytext_len
+  yytext = yy_group_text[:yy_group_stack[-1]]
+  yytext_len = yy_group_stack[-1]
+  del yy_group_stack[-2:]
+  YY_RULE_START()
+
+def yy_group_end():
+  pass
+
+def BEGIN(start):
+  global yystart
+  yystart = start
+
+def YY_START():
+  return yystart
+
+def yy_push_state(start):
+  global yystart
+  yystart_stack.append(yystart)
+  yystart = start
+
+def yy_pop_state():
+  global yystart
+  yystart = yystart_stack.pop()
+
+# GENERATE SECTION2 BEGIN
+def yy_action0():
+  raise YYContinue()
+def yy_action1():
+  global yylval
+  print('yy_groups', yy_groups)
+  print('yy_groups_by_name', yy_groups_by_name)
+
+  # by Python parser:
+  #yylval = float(yytext)
+
+  # by group numbers:
+  #if yy_groups[1] is not None:
+  #  mantissa = yy_groups[1]
+  #  fraction = ''
+  #else:
+  #  mantissa = yy_groups[2]
+  #  fraction = yy_groups[3]
+  #yylval = int(mantissa + fraction) * 10. ** -len(fraction)
+
+  # by group names:
+  mantissa = yy_groups_by_name['mantissa']
+  fraction = yy_groups_by_name.get('fraction', '')
+  yylval = int(mantissa + fraction) * 10. ** -len(fraction)
+
+  return NUM
+  raise YYContinue()
+def yy_action2():
+  return ord(yytext[0])
+  raise YYContinue()
+def yy_action3():
+  ECHO()
+  raise YYContinue()
+def yy_rule0():
+  global yy_groups, yy_groups_by_name, yy_action
+  yy_groups = [yy_group_text[:yy_group_stack[-1]]]
+  yy_groups_by_name = {}
+  yy_action = yy_action0
+def yy_rule1():
+  global yy_groups, yy_groups_by_name, yy_action
+  yy_groups = [yy_group_text[:yy_group_stack[-1]], None, None, None]
+  yy_groups_by_name = {}
+  yy_action = yy_action1
+def yy_rule2():
+  global yy_groups, yy_groups_by_name, yy_action
+  yy_groups = [yy_group_text[:yy_group_stack[-1]]]
+  yy_groups_by_name = {}
+  yy_action = yy_action2
+def yy_rule3():
+  global yy_groups, yy_groups_by_name, yy_action
+  yy_groups = [yy_group_text[:yy_group_stack[-1]]]
+  yy_groups_by_name = {}
+  yy_action = yy_action3
+def yy_group0():
+  yy_temp = yy_group_text[yy_group_stack[-1]:yy_group_stack[-2]]
+  yy_groups[1] = yy_temp
+  yy_groups_by_name['mantissa'] = yy_temp
+  del yy_group_stack[-2:]
+def yy_group1():
+  yy_temp = yy_group_text[yy_group_stack[-1]:yy_group_stack[-2]]
+  yy_groups[2] = yy_temp
+  yy_groups_by_name['mantissa'] = yy_temp
+  del yy_group_stack[-2:]
+def yy_group2():
+  yy_temp = yy_group_text[yy_group_stack[-1]:yy_group_stack[-2]]
+  yy_groups[3] = yy_temp
+  yy_groups_by_name['fraction'] = yy_temp
+  del yy_group_stack[-2:]
+yy_dfa_states = [([256], [0], [0]), ([10, 11, 32, 33, 46, 47, 48, 58, 256], [3,
+4, 3, 5, 3, 6, 3, 7, 3], []), ([10, 11, 32, 33, 46, 47, 48, 58, 256], [3, 4, 3,
+5, 3, 8, 3, 9, 3], []), ([48, 58, 256], [10, 11, 10], [1]), ([46, 47, 48, 58,
+256], [12, 13, 12, 14, 12], [4]), ([48, 58, 256], [10, 15, 10], [1]), ([46, 47,
+48, 58, 256], [12, 16, 12, 17, 12], [4]), ([48, 58, 256], [10, 11, 10], [1]), (
+[48, 58, 256], [10, 15, 10], [1])]
+yy_dfa_actions = [(0, []), (1, [(1, 1), (1, 1), (1, 1), (1, 1), (3, 1), (1, 1),
+(2, 1, yy_group0), (3, 1), (2, 1, yy_group1), (1, 1), (3, 1), (2, 1,
+yy_group_end), (3, 1), (1, 1), (3, 1), (3, 1), (3, 1), (1, 1), (1, 1), (1, 1),
+(0, 1), (1, 1), (2, 1, yy_group0), (0, 1), (2, 1, yy_group1), (1, 1), (0, 1), (
+2, 1, yy_group_end), (0, 1), (1, 1), (0, 1), (0, 1), (0, 1)]), (2, [(1, 1), (1,
+1), (1, 1), (1, 1), (3, 1), (1, 1), (2, 1, yy_group0), (3, 1), (2, 1, yy_group1
+), (1, 1), (3, 1), (2, 1, yy_group_end), (3, 1), (1, 1), (3, 1), (3, 1), (3, 1
+), (1, 1), (1, 1), (1, 1), (0, 1), (1, 1), (2, 1, yy_group0), (0, 1), (2, 1,
+yy_group1), (1, 1), (0, 1), (2, 1, yy_group_end), (0, 1), (1, 1), (0, 1), (0, 1
+), (0, 1)]), (0, [(1, 7), (0, 5), (2, 1, yy_rule_start), (2, 1, yy_rule2), (0,
+1), (2, 1, yy_rule_start), (2, 1, yy_rule3), (0, 6), (2, 1, yy_rule_start), (2,
+1, yy_rule2), (3, 1), (2, 1, yy_rule_start), (2, 1, yy_rule3), (0, 1)]), (0, [(
+1, 7), (0, 4), (2, 1, yy_rule_start), (2, 1, yy_rule2), (0, 2), (2, 1,
+yy_rule_start), (2, 1, yy_rule3), (0, 5), (2, 1, yy_rule_start), (2, 1,
+yy_rule2), (3, 1), (0, 1), (2, 1, yy_rule_start), (2, 1, yy_rule3), (0, 1)]), (
+0, [(1, 7), (2, 1, yy_rule_start), (2, 1, yy_rule0), (0, 5), (2, 1,
+yy_rule_start), (2, 1, yy_rule2), (0, 1), (2, 1, yy_rule_start), (2, 1,
+yy_rule3), (0, 1), (2, 1, yy_rule_start), (2, 1, yy_rule0), (3, 1), (0, 4), (2,
+1, yy_rule_start), (2, 1, yy_rule2), (0, 1), (2, 1, yy_rule_start), (2, 1,
+yy_rule3), (0, 1)]), (3, [(1, 7), (0, 3), (2, 1, yy_group2), (3, 1), (0, 1), (
+2, 1, yy_rule_start), (2, 1, yy_rule2), (0, 1), (2, 1, yy_rule_start), (2, 1,
+yy_rule3), (0, 4), (2, 1, yy_group2), (0, 2), (2, 1, yy_rule_start), (2, 1,
+yy_rule2), (3, 1), (2, 1, yy_rule_start), (2, 1, yy_rule3), (0, 1)]), (4, [(1,
+7), (0, 1), (1, 1), (3, 1), (2, 1, yy_group_end), (1, 1), (3, 1), (2, 1,
+yy_rule_start), (2, 1, yy_rule1), (0, 1), (1, 1), (3, 1), (2, 1, yy_group_end),
+(3, 1), (0, 2), (2, 1, yy_rule_start), (2, 1, yy_rule2), (0, 1), (2, 1,
+yy_rule_start), (2, 1, yy_rule3), (0, 2), (1, 1), (0, 1), (2, 1, yy_group_end),
+(1, 1), (0, 1), (2, 1, yy_rule_start), (2, 1, yy_rule1), (3, 1), (1, 1), (0, 1
+), (2, 1, yy_group_end), (0, 3), (2, 1, yy_rule_start), (2, 1, yy_rule2), (0, 1
+), (2, 1, yy_rule_start), (2, 1, yy_rule3), (0, 1)]), (5, [(1, 7), (0, 3), (2,
+1, yy_group2), (3, 1), (0, 1), (2, 1, yy_rule_start), (2, 1, yy_rule2), (0, 1),
+(2, 1, yy_rule_start), (2, 1, yy_rule3), (0, 4), (2, 1, yy_group2), (0, 2), (2,
+1, yy_rule_start), (2, 1, yy_rule2), (3, 1), (2, 1, yy_rule_start), (2, 1,
+yy_rule3), (0, 1)]), (6, [(1, 7), (0, 1), (1, 1), (3, 1), (2, 1, yy_group_end),
+(1, 1), (3, 1), (2, 1, yy_rule_start), (2, 1, yy_rule1), (0, 1), (1, 1), (3, 1
+), (2, 1, yy_group_end), (3, 1), (0, 2), (2, 1, yy_rule_start), (2, 1, yy_rule2
+), (0, 1), (2, 1, yy_rule_start), (2, 1, yy_rule3), (0, 2), (1, 1), (0, 1), (2,
+1, yy_group_end), (1, 1), (0, 1), (2, 1, yy_rule_start), (2, 1, yy_rule1), (3,
+1), (1, 1), (0, 1), (2, 1, yy_group_end), (0, 3), (2, 1, yy_rule_start), (2, 1,
+yy_rule2), (0, 1), (2, 1, yy_rule_start), (2, 1, yy_rule3), (0, 1)]), (0, [(1,
+1), (0, 2), (3, 1)]), (7, [(1, 1), (1, 1), (3, 1), (2, 1, yy_group_end), (2, 1,
+yy_rule_start), (2, 1, yy_rule1), (0, 1), (1, 1), (0, 1), (2, 1, yy_group_end),
+(2, 1, yy_rule_start), (2, 1, yy_rule1), (3, 1), (0, 1)]), (0, [(1, 4), (0, 8),
+(3, 1)]), (3, [(1, 4), (0, 1), (2, 1, yy_rule_start), (2, 1, yy_rule1), (0, 2),
+(2, 1, yy_group2), (3, 1), (0, 1), (2, 1, yy_rule_start), (2, 1, yy_rule1), (3,
+1), (0, 1), (2, 1, yy_group2), (0, 2)]), (4, [(1, 4), (1, 1), (3, 1), (2, 1,
+yy_group_end), (1, 1), (3, 1), (2, 1, yy_rule_start), (2, 1, yy_rule1), (0, 2),
+(1, 1), (3, 1), (2, 1, yy_group_end), (3, 1), (0, 1), (1, 1), (0, 1), (2, 1,
+yy_group_end), (1, 1), (0, 1), (2, 1, yy_rule_start), (2, 1, yy_rule1), (3, 1),
+(0, 1), (1, 1), (0, 1), (2, 1, yy_group_end), (0, 3)]), (8, [(1, 1), (1, 1), (
+3, 1), (2, 1, yy_group_end), (2, 1, yy_rule_start), (2, 1, yy_rule1), (0, 1), (
+1, 1), (0, 1), (2, 1, yy_group_end), (2, 1, yy_rule_start), (2, 1, yy_rule1), (
+3, 1), (0, 1)]), (5, [(1, 4), (0, 1), (2, 1, yy_rule_start), (2, 1, yy_rule1),
+(0, 2), (2, 1, yy_group2), (3, 1), (0, 1), (2, 1, yy_rule_start), (2, 1,
+yy_rule1), (3, 1), (0, 1), (2, 1, yy_group2), (0, 2)]), (6, [(1, 4), (1, 1), (
+3, 1), (2, 1, yy_group_end), (1, 1), (3, 1), (2, 1, yy_rule_start), (2, 1,
+yy_rule1), (0, 2), (1, 1), (3, 1), (2, 1, yy_group_end), (3, 1), (0, 1), (1, 1
+), (0, 1), (2, 1, yy_group_end), (1, 1), (0, 1), (2, 1, yy_rule_start), (2, 1,
+yy_rule1), (3, 1), (0, 1), (1, 1), (0, 1), (2, 1, yy_group_end), (0, 3)])]
+yy_dfa_start_action = [1, 2]
+def yy_eof_action0():
+  yyterminate()
+  return 0
+yy_eof_actions = [
+  yy_eof_action0
+]
+# GENERATE END
+
+def yylex():
+  global \
+    yyin, \
+    yy_threads0, \
+    yy_threads1, \
+    yy_prefix_slop, \
+    yy_group_text, \
+    yy_group_stack, \
+    yy_action, \
+    yytext, \
+    yytext_len
+
+  # GENERATE SECTION2INITIAL BEGIN
+  # GENERATE END
+
+  while True:
+    while yytext_len:
+      block = yy_buffer_stack[-1].next
+      while block is None or block.pos >= len(block.text):
+        if block is None:
+          yy_buffer_stack.pop()
+          block = yy_buffer_stack[-1].next
+          yyin = yy_buffer_stack[-1].file_in
+        else:
+          block = block.next
+          yy_buffer_stack[-1].next = block
+      i = min(yytext_len, len(block.text) - block.pos)
+      block.pos += i
+      yytext_len -= i
+
+    match = ''
+    match_len = 0
+
+    del yy_threads0[yy_prefix_slop:]
+    yy_threads0.append(None)
+
+    buffer_ptr = len(yy_buffer_stack) - 1
+    block_prev = yy_buffer_stack[buffer_ptr]
+    block = block_prev.next
+    if block is not None:
+      block_pos = block.pos
+
+    action = yy_dfa_start_action[
+      yystart * 2 + int(yy_buffer_stack[-1].at_bol)
+    ]
+    while action != -1:
+      state, transition = yy_dfa_actions[action]
+      #print('i', i, 'action', action, 'state', state, 'transition', transition)
+
+      i = yy_prefix_slop
+      assert len(yy_threads1) == yy_prefix_slop
+      for trans in transition:
+        if trans[0] == 0: #DFA.TRANSITION_POP:
+          i += trans[1]
+        elif trans[0] == 1: #DFA.TRANSITION_DUP:
+          while i < trans[1]:
+            yy_threads0[:0] = [None] * yy_prefix_slop
+            yy_threads1[:0] = [None] * yy_prefix_slop
+            i += yy_prefix_slop
+            yy_prefix_slop *= 2
+          yy_threads0[i - trans[1]:i] = yy_threads0[i:i + trans[1]]
+          i -= trans[1]
+        elif trans[0] == 2: #DFA.TRANSITION_MARK:
+          yy_threads0[i:i + trans[1]] = [
+            (match_len, trans[2], thread)
+            for thread in yy_threads0[i:i + trans[1]]
+          ]
+        elif trans[0] == 3: #DFA.TRANSITION_MOVE:
+          yy_threads1.extend(yy_threads0[i:i + trans[1]])
+          i += trans[1]
+        #elif trans[0] == DFA.TRANSITION_DEL:
+        #  del yy_threads1[-trans[1]:]
+        else:
+          assert False
+      assert i == len(yy_threads0)
+      yy_threads0, yy_threads1 = yy_threads1, yy_threads0
+      del yy_threads1[yy_prefix_slop:]
+
+      if state == 0:
+        # there is only one match, which is complete
+        assert len(yy_threads0) == yy_prefix_slop + 1
+        assert yy_dfa_states[state][2] == [0]
+        break
+
+      yy_buffer_stack[-1].file_in = yyin
+      while block is None or block_pos >= len(block.text):
+        if block is None:
+          file_in = yy_buffer_stack[buffer_ptr].file_in
+          text = '' if file_in is None else file_in.readline()
+          if len(text):
+            block = YYBufferBlock(None, 0, text)
+            block_pos = 0
+            block_prev.next = block
+          else:
+            # do not re-attempt read once EOF is reached
+            yy_buffer_stack[buffer_ptr].file_in = None
+            yyin = yy_buffer_stack[-1].file_in
+            buffer_ptr -= 1
+            if buffer_ptr < 0:
+              break # EOF
+            block_prev = yy_buffer_stack[buffer_ptr]
+            block = block_prev.next
+            if block is not None:
+              block_pos = block.pos
+        else:
+          i = match_len - len(match)
+          if i:
+            match += block.text[block_pos - i:]
+          block_prev = block
+          block = block_prev.next
+          if block is not None:
+            block_pos = block.pos
+      else: 
+        #print('block_pos', block_pos, 'block.text', block.text)
+        action = yy_dfa_states[state][1][
+          bisect.bisect_right(
+            yy_dfa_states[state][0],
+            ord(block.text[block_pos])
+          )
+        ]
+        block_pos += 1
+        match_len += 1
+        continue
+      # EOF
+      if i == 0:
+        try:
+          return yy_eof_actions[yystart]()
+        except YYTerminate:
+          return 0
+      break
+
+    i = match_len - len(match)
+    if i:
+      assert block is not None
+      match += block.text[block_pos - i:]
+
+    for i in yy_dfa_states[state][2]:
+      yy_group_text = match
+      yy_group_stack = []
+      yy_groups = None
+      yy_groups_by_name = None
+      yy_action = None
+      yytext = None
+      yytext_len = None
+
+      thread = yy_threads0[yy_prefix_slop + i]
+      #print('thread', thread)
+      while thread is not None:
+        pos, ref_data, thread = thread
+        yy_group_stack.append(pos)
+        ref_data()
+
+      try:
+        return yy_action()
+      except YYReject:
+        pass
+      except YYContinue:
+        break
+      except YYTerminate:
+        return 0
+    else:
+      raise Exception('scanner jammed')
+
+# GENERATE SECTION3 BEGIN
+# GENERATE END
author	Nick Downing <nick@ndcode.org>
	Sat, 14 May 2022 07:00:07 +0000 (17:00 +1000)
committer	Nick Downing <nick@ndcode.org>
	Sat, 14 May 2022 07:00:07 +0000 (17:00 +1000)
Makefile	[new file with mode: 0644]	patch \| blob
env.sh	[new file with mode: 0644]	patch \| blob
ndcode/Makefile	[new file with mode: 0644]	patch \| blob
ndcode/pilex/cli.py		patch \| blob \| history
ndcode/pilex/generate_flex.py		patch \| blob \| history
ndcode/pilex/generate_py.py		patch \| blob \| history
ndcode/pilex/skel/skel_flex.c		patch \| blob \| history
ndcode/pilex/skel/skel_flex.c.patch		patch \| blob \| history
ndcode/pilex/skel/skel_py.py		patch \| blob \| history
ndcode/pilex/skel/skel_py_groups.py	[new file with mode: 0644]	patch \| blob
tests/Makefile		patch \| blob \| history
tests/cal_c.l	[moved from tests/cal.l with 87% similarity]	patch \| blob \| history
tests/cal_groups.py	[new file with mode: 0755]	patch \| blob
tests/cal_py.l		patch \| blob \| history
tests/cal_py_groups.l	[new file with mode: 0644]	patch \| blob
tests/lex_yy_groups.py	[new file with mode: 0644]	patch \| blob