Implement numbered and named groups with support from pilex (not via user code)

author Nick Downing <downing.nick@gmail.com>

Wed, 26 Sep 2018 05:33:33 +0000 (15:33 +1000)

committer Nick Downing <downing.nick@gmail.com>

Wed, 26 Sep 2018 05:33:33 +0000 (15:33 +1000)
author Nick Downing <downing.nick@gmail.com>
Wed, 26 Sep 2018 05:33:33 +0000 (15:33 +1000)
committer Nick Downing <downing.nick@gmail.com>
Wed, 26 Sep 2018 05:33:33 +0000 (15:33 +1000)
diff --git a/generate_py.py b/generate_py.py

index 1b83417..0085154 100644 (file)
--- a/generate_py.py
+++ b/generate_py.py
@@ -40,10 +40,9 @@ def regex_text_to_python(regex_text, indent):
  def generate_py(_ast, _element, home_dir, skel_file, out_file):
    # generate group action function names (ref_data) and body text
    group_ref_data = []
-  group_actions_text = []
    group_rules_text = []
+  group_actions_text = []
    for i in _ast.flex_rules:
-    # add actions for capturing groups in ordinary regex
      group_ref_data.extend(
        [
          (
@@ -51,38 +50,69 @@ def generate_py(_ast, _element, home_dir, skel_file, out_file):
            'yy_group_end'
          )
          for j in range(len(i.groups0))
+      ] +
+      [
+        (
+          'yy_rule_start',
+          'yy_rule{0:d}'.format(len(group_rules_text))
+        )
+      ] +
+      [
+        (
+          'yy_group{0:d}'.format(len(group_actions_text) + j),
+          'yy_group_end'
+        )
+        for j in range(len(i.groups1))
        ]
      )
-    group_actions_text.extend([j[0] for j in i.groups0])
-
-    # add group for the rule, recognizing this matches the rule
-    group_ref_data.append(
-      (
-        'yy_rule_start',
-        'yy_rule{0:d}'.format(len(group_rules_text))
-      )
-    )
+    groups = i.groups0 + i.groups1
      group_rules_text.append(
        regex.Text(
-        text = '''global yy_action
-yy_action = yy_action{0:d}
-'''.format(i.action)
+        text = '''global yy_groups, yy_groups_by_name, yy_action
+yy_groups = [yy_group_text[:yy_group_stack[-1]]{0:s}]
+yy_groups_by_name = {{}}
+yy_action = yy_action{1:d}
+'''.format(
+          ', None' * len(groups),
+          i.action
+        )
        )
      )
-
-    # add actions for capturing groups in trailing context regex
-    group_ref_data.extend(
+    group_actions_text.extend(
        [
          (
-          'yy_group{0:d}'.format(len(group_actions_text) + j),
-          'yy_group_end'
+          groups[j][0]
+        if isinstance(groups[j], regex.RegexGroupAction) else
+          regex.Text(
+            text = '''if yy_groups[{0:d}] is None:
+  yy_groups[{1:d}] = yy_group_text[yy_group_stack[-1]:yy_group_stack[-2]]
+  if '{2:s}' not in yy_groups_by_name:
+    yy_groups_by_name['{3:s}'] = yy_groups[{4:d}]
+del yy_group_stack[-2:]
+'''.format(
+              j + 1,
+              j + 1,
+              groups[j][0].get_text(),
+              groups[j][0].get_text(),
+              j + 1
+            )
+          )
+        if isinstance(groups[j], regex.RegexGroupName) else
+          regex.Text(
+            text = '''if yy_groups[{0:d}] is None:
+  yy_groups[{1:d}] = yy_group_text[yy_group_stack[-1]:yy_group_stack[-2]]
+del yy_group_stack[-2:]
+'''.format(
+              j + 1,
+              j + 1
+            )
+          )
          )
-        for j in range(len(i.groups1))
+        for j in range(len(groups))
        ]
      )
-    group_actions_text.extend([j[0] for j in i.groups1])
  
-  # add group for default rule, recognizing this matches the rule
+  # add group for default rule
    group_ref_data.append(
      (
        'yy_rule_start',
@@ -91,9 +121,13 @@ yy_action = yy_action{0:d}
    )
    group_rules_text.append(
      regex.Text(
-      text = '''global yy_action
+      text = '''global yy_groups, yy_groups_by_name, yy_action
+yy_groups = [yy_group_text[:yy_group_stack[-1]]]
+yy_groups_by_name = {{}}
  yy_action = yy_action{0:d}
-'''.format(_ast.default_action)
+'''.format(
+        _ast.default_action
+      )
      )
    )
  
diff --git a/skel/skel_py.py b/skel/skel_py.py

index 9e3a911..816aaf1 100644 (file)
--- a/skel/skel_py.py
+++ b/skel/skel_py.py
@@ -40,6 +40,8 @@ yy_prefix_slop = 1
  
  yy_group_text = None
  yy_group_stack = None
+yy_groups = None
+yy_groups_by_name = None
  yy_action = None
  yytext = ''
  yytext_len = 0
@@ -75,8 +77,9 @@ def unput(text):
  
  def yy_rule_start():
    global yytext, yytext_len
-  yytext_len = yy_group_stack.pop()
-  yytext = yy_group_text[:yytext_len]
+  yytext = yy_group_text[:yy_group_stack[-1]]
+  yytext_len = yy_group_stack[-1]
+  del yy_group_stack[-2:]
  
  def yy_group_end():
    pass
@@ -216,6 +219,8 @@ def yylex():
      for i in yy_dfa_states[state][2]:
        yy_group_text = match
        yy_group_stack = []
+      yy_groups = None
+      yy_groups_by_name = None
        yy_action = None
        yytext = None
        yytext_len = None
@@ -227,10 +232,6 @@ def yylex():
          yy_group_stack.append(pos)
          ref_data()
  
-      #print('yytext', yytext)
-      #print('yytext_len', yytext_len)
-      #print('yy_action', yy_action)
-
        try:
          return yy_action()
        except YYReject:
diff --git a/tests/cal_py.l b/tests/cal_py.l

index 1429385..a4fadeb 100644 (file)
--- a/tests/cal_py.l
+++ b/tests/cal_py.l
@@ -1,48 +1,35 @@
  %{
  NUM = 0x100 
  yylval = None
-groups = None
-groups_by_name = {}
-def gc(n):
-  global groups, groups_by_name
-  groups = [None for i in range(n)]
-  groups_by_name = {}
-  del yy_group_stack[-2:]
-def g(n, name = None):
-  if groups[n] is None:
-    groups[n] = yy_group_text[yy_group_stack[-1]:yy_group_stack[-2]]
-    if name is not None and name not in groups_by_name:
-      groups_by_name[name] = groups[n]
-  del yy_group_stack[-2:]
  %}
  
-DIGIT (?{g(1, 'mantissa')}[0-9]+)\.?|(?{g(2, 'mantissa')}[0-9]*)\.(?{g(3, 'fraction')}[0-9]+)
+DIGIT (?'mantissa'[0-9]+)\.?|(?'mantissa'[0-9]*)\.(?'fraction'[0-9]+)
  
  %option noecs nometa-ecs noyywrap reject yymore
  
  %%
  
  [ ]
-(?{g(0)}{DIGIT})(?{gc(4)}"")   {
+{DIGIT}        {
    global yylval
-  print('groups', groups)
-  print('groups_by_name', groups_by_name)
+  print('yy_groups', yy_groups)
+  print('yy_groups_by_name', yy_groups_by_name)
  
    # by Python parser:
    #yylval = float(yytext)
  
    # by group numbers:
-  #if groups[1] is not None:
-  #  mantissa = groups[1]
+  #if yy_groups[1] is not None:
+  #  mantissa = yy_groups[1]
    #  fraction = ''
    #else:
-  #  mantissa = groups[2]
-  #  fraction = groups[3]
+  #  mantissa = yy_groups[2]
+  #  fraction = yy_groups[3]
    #yylval = int(mantissa + fraction) * 10 ** -len(fraction)
  
    # by group names:
-  mantissa = groups_by_name['mantissa']
-  fraction = groups_by_name.get('fraction', '')
+  mantissa = yy_groups_by_name['mantissa']
+  fraction = yy_groups_by_name.get('fraction', '')
    yylval = int(mantissa + fraction) * 10 ** -len(fraction)
  
    return NUM
author	Nick Downing <downing.nick@gmail.com>
	Wed, 26 Sep 2018 05:33:33 +0000 (15:33 +1000)
committer	Nick Downing <downing.nick@gmail.com>
	Wed, 26 Sep 2018 05:33:33 +0000 (15:33 +1000)
generate_py.py		patch \| blob \| history
skel/skel_py.py		patch \| blob \| history
tests/cal_py.l		patch \| blob \| history