From f0e381e7e467e390dbd871db3caec2ac8575425e Mon Sep 17 00:00:00 2001
From: Nick Downing <nick@ndcode.org>
Date: Sat, 12 Jan 2019 14:12:16 +1100
Subject: [PATCH] Reinstate the translation based on the new pilex/piyacc
 parser, tidy up a bit

---
 ast.py         |  57 +++++++++++----
 c_to_python.py |  23 ++----
 element.py     |  10 +++
 l_to_python.py |  37 +++++-----
 tests/scan.l   | 186 ++++++++++++++++++++++++++++++++++++-------------
 5 files changed, 215 insertions(+), 98 deletions(-)

diff --git a/ast.py b/ast.py
index cb04951..f2aceef 100644
--- a/ast.py
+++ b/ast.py
@@ -6,7 +6,7 @@ class Context:
     self,
     indent = '',
     enclosing_loop = None,
-    initial = False,
+    #initial = False,
     translate_identifier = {
       'NULL': 'None',
       'false': 'False',
@@ -16,10 +16,38 @@ class Context:
   ):
     self.indent = '  '
     self.enclosing_loop = enclosing_loop
-    self.initial = initial # whether to add declared identifiers as 'self.'
+    #self.initial = initial # whether to add declared identifiers as 'self.'
     self.translate_identifier = translate_identifier
 
 class AST(element.Element):
+  class Text(element.Element):
+    # GENERATE ELEMENT() BEGIN
+    def __init__(
+      self,
+      tag = 'AST_Text',
+      attrib = {},
+      text = '',
+      children = []
+    ):
+      element.Element.__init__(
+        self,
+        tag,
+        attrib,
+        text,
+        children
+      )
+    def copy(self, factory = None):
+      result = element.Element.copy(
+        self,
+        Text if factory is None else factory
+      )
+      return result
+    def __repr__(self):
+      params = []
+      self.repr_serialize(params)
+      return 'ast.AST.Text({0:s})'.format(', '.join(params))
+    # GENERATE END
+
   class Element(element.Element):
     # GENERATE ELEMENT() BEGIN
     def __init__(
@@ -105,7 +133,7 @@ class AST(element.Element):
       return 'ast.AST.DeclarationOrStatement({0:s})'.format(', '.join(params))
     # GENERATE END
     def translate(self, context):
-      Element.translate(self, context)
+      AST.Element.translate(self, context)
       element.set_text(
         self,
         0,
@@ -528,11 +556,11 @@ class AST(element.Element):
     def translate(self, context):
       assert len(self) == 3
       self[0].translate(context)
-      initial_save = context.initial
-      context.initial = False
+      #initial_save = context.initial
+      #context.initial = False
       self[1].translate(context)
       self[2].translate(context)
-      context.initial = initial_save
+      #context.initial = initial_save
       element.set_text(self, 0, '')
       element.set_text(self, 1, '(')
       element.set_text(self, 2, '')
@@ -568,10 +596,10 @@ class AST(element.Element):
     def translate(self, context):
       assert len(self) == 2
       self[0].translate(context)
-      initial_save = context.initial
-      context.initial = False
+      #initial_save = context.initial
+      #context.initial = False
       self[1].translate(context)
-      context.initial = initial_save
+      #context.initial = initial_save
       element.set_text(self, 0, '')
       element.set_text(self, 1, '(')
       element.set_text(self, 2, ')')
@@ -604,11 +632,11 @@ class AST(element.Element):
       return 'ast.AST.DeclaratorIdentifier({0:s})'.format(', '.join(params))
     # GENERATE END
     def translate(self, context):
-      if context.initial:
-        text = element.get_text(self[0], 0)
-        assert text not in context.translate_identifier
-        context.translate_identifier[text] = 'self.{0:s}'.format(text)
-      Declarator.translate(self, context)
+      #if context.initial:
+      #  text = element.get_text(self[0], 0)
+      #  assert text not in context.translate_identifier
+      #  context.translate_identifier[text] = 'self.{0:s}'.format(text)
+      AST.Declarator.translate(self, context)
 
   class DeclaratorPointer(Declarator):
     # GENERATE ELEMENT() BEGIN
@@ -4058,6 +4086,7 @@ class AST(element.Element):
 # GENERATE FACTORY(element.Element) BEGIN
 tag_to_class = {
   'AST': AST,
+  'AST_Text': AST.Text,
   'AST_Element': AST.Element,
   'AST_DeclarationOrStatement': AST.DeclarationOrStatement,
   'AST_AlignAsExpression': AST.AlignAsExpression,
diff --git a/c_to_python.py b/c_to_python.py
index 549a072..a60fed5 100644
--- a/c_to_python.py
+++ b/c_to_python.py
@@ -1,24 +1,15 @@
-import ansi_c_yylex
-import ansi_c_yyparse
 import ast
 import element
+import lex_yy
 #import xml.etree.ElementTree
+import y_tab
 
 def c_to_python(context, text):
-  root = ast.BlockItemList(text = text)
-  ansi_c_yyparse.yyparse(
-    root,
-    0,
-    0,
-    ast.factory,
-    ansi_c_yylex.yylex(
-      root,
-      0,
-      0,
-      ast.factory,
-      iter([])
-    )
-  )
+  lex_yy.yyin = None
+  lex_yy.yy_buffer_stack = [lex_yy.YYBufferState(None, None)]
+  lex_yy.yytext_len = 0
+  lex_yy.unput(text)
+  root = y_tab.yyparse(ast.AST.BlockItemList)
   root.translate(context)
   #print('@@@')
   #xml.etree.ElementTree.dump(root)
diff --git a/element.py b/element.py
index 2121e08..2d02217 100644
--- a/element.py
+++ b/element.py
@@ -160,6 +160,16 @@ def set_text(root, i, text):
   else:
     root[i - 1].tail = text
 
+def to_text(root):
+  return ''.join(
+    [
+      j
+      for i in range(len(root))
+      for j in [get_text(root, i), to_text(root[i])]
+    ] +
+    [get_text(root, len(root))]
+  )
+
 def concatenate(children, factory = Element, *args, **kwargs):
   root = factory(*args, **kwargs)
   for child in children:
diff --git a/l_to_python.py b/l_to_python.py
index 4f11487..0411558 100755
--- a/l_to_python.py
+++ b/l_to_python.py
@@ -27,20 +27,20 @@ def my_rstrip(text, indent):
   return text[:i].rstrip('\t ') + indent + text[i:]
 
 context = ast.Context()
-context.translate_identifier['BEGIN'] = 'self.BEGIN'
-context.translate_identifier['yylval'] = 'ref_data.yylval'
-context.translate_identifier['yytext'] = 'self.yytext'
-context.translate_identifier['yy_pop_state'] = 'self.yy_pop_state'
-context.translate_identifier['yy_push_state'] = 'self.yy_push_state'
+#context.translate_identifier['BEGIN'] = 'self.BEGIN'
+#context.translate_identifier['yylval'] = 'ref_data.yylval'
+#context.translate_identifier['yytext'] = 'self.yytext'
+#context.translate_identifier['yy_pop_state'] = 'self.yy_pop_state'
+#context.translate_identifier['yy_push_state'] = 'self.yy_push_state'
 
 actions = []
 with open('a.c', 'w') as fout:
   def extract(i, parent, indent):
-    if i.tag == 'PLex_Section1Or2_CodeBlock':
+    if i.tag == 'AST_Section1Or2_CodeBlock':
       initial = True
-    elif i.tag == 'PLex_Section2_Rule_Action':
+    elif i.tag == 'AST_Section2_Rule_Action':
       if len(i) == 0: # continued actions
-        assert parent.tag == 'PLex_Section2_Rule'
+        assert parent.tag == 'AST_Section2_Rule'
         assert len(parent) == 3
         element.set_text(
           parent,
@@ -51,17 +51,17 @@ with open('a.c', 'w') as fout:
       initial = False
     else:
       child_indent = indent
-      if i.tag == 'PLex_Section1_StartConditions':
+      if i.tag == 'AST_Section1_StartConditions':
         for j in i:
-          assert j.tag == 'PLex_Name'
+          assert j.tag == 'AST_Name'
           text = element.get_text(j, 0)
           context.translate_identifier[text] = 'DFA.{0:s}'.format(text)
       elif (
-        i.tag == 'PLex_Section2_Rule' or
-        i.tag == 'PLex_Section2_Rule_FLexRule'
+        i.tag == 'AST_Section2_Rule' or
+        i.tag == 'AST_Section2_Rule_FLexRule'
       ):
         element.set_text(i, 0, element.get_text(i, 0).lstrip('\t '))
-      elif i.tag == 'PLex_Section2_CompoundRule':
+      elif i.tag == 'AST_Section2_CompoundRule':
         child_indent += '  '
         element.set_text(
           i,
@@ -83,7 +83,7 @@ with open('a.c', 'w') as fout:
       for j in i:
         extract(j, i, child_indent)
       return
-    assert i[0].tag == 'PLex_Text' and len(i[0]) == 0
+    assert i[0].tag == 'AST_Text' and len(i[0]) == 0
     text = element.to_text(i[0])
 
     j = 0
@@ -137,13 +137,13 @@ with open('a.i') as fin:
       else:
         lines.append(line)
       line = fin.readline()
-    context.indent = indent + '  #'
+    context.indent = indent + '  '
     context.initial = initial
     text = c_to_python.c_to_python(context, ''.join(lines))
     if initial:
       element.set_text(i[0], 0, text)
     else:
-      assert parent.tag == 'PLex_Section2_Rule'
+      assert parent.tag == 'AST_Section2_Rule'
       assert len(parent) == 3
       prefix = element.get_text(parent, 2).rstrip('\t ')
       if len(text) == 0:
@@ -152,11 +152,10 @@ with open('a.i') as fin:
         pass
       elif text.index('\n') == len(text) - 1:
         prefix += ' /*COLUMN32*/ '
-        text = 'pass {0:s}'.format(text.lstrip('\t '))
+        text = text.lstrip('\t ')
       else:
         prefix += ' '
-        text = '{{\n{0:s}  pass\n{1:s}{2:s}}}\n'.format(
-          indent,
+        text = '{{\n{0:s}{1:s}}}\n'.format(
           text,
           indent
         )
diff --git a/tests/scan.l b/tests/scan.l
index 8e21e78..73e1b5f 100644
--- a/tests/scan.l
+++ b/tests/scan.l
@@ -81,9 +81,9 @@ extern const char *escaped_qstart, *escaped_qend;
          { \
 	strncpy( nmstr, yytext, sizeof(nmstr) ); \
  piece_pack(); \
- piece_append("<PLex_Name>"); \
+ piece_append("<AST_Name>"); \
  piece_escape(yytext, strlen(yytext)); \
- piece_append("</PLex_Name>"); \
+ piece_append("</AST_Name>"); \
  piece_pack(); \
  return ~NAME; \
  /*	return NAME;*/ \
@@ -121,14 +121,14 @@ extern const char *escaped_qstart, *escaped_qend;
     add_action(M4QSTART); \
     yy_push_state(CODEBLOCK); \
     if ((indented_code = x)) ACTION_ECHO; \
- piece_append("<PLex_Section1Or2_CodeBlock>"); \
+ piece_append("<AST_Section1Or2_CodeBlock>"); \
 } while(0)
 
 #define END_CODEBLOCK do { \
     yy_pop_state();\
     add_action(M4QEND); \
     if (!indented_code) line_directive_out(NULL, 0);\
- piece_append("</PLex_Section1Or2_CodeBlock>"); \
+ piece_append("</AST_Section1Or2_CodeBlock>"); \
 } while (0)
 
 /* Nick */
@@ -158,6 +158,8 @@ static void markup_option(const char *name, int sense);
 %x COMMENT_DISCARD CODE_COMMENT
 %x SECT3_NOESCAPE
 %x CHARACTER_CONSTANT
+/* Nick extra rules for action groups */
+%x ACTION_GROUP ELEMENT_GROUP DOUBLE_QUOTED SINGLE_QUOTED
 
 WS		[[:blank:]]+
 OPTWS		[[:blank:]]*
@@ -192,12 +194,12 @@ M4QEND      "]""]"
 
 
 <INITIAL>{
-	^{WS}		START_CODEBLOCK(true); piece_append("<PLex_Text>");
+	^{WS}		START_CODEBLOCK(true); piece_append("<AST_Text>");
 	^"/*"		add_action("/*[""["); yy_push_state( COMMENT );
 	^#{OPTWS}line{WS}	yy_push_state( LINEDIR );
 	^"%s"{NAME}?	return SCDECL;
 	^"%x"{NAME}?	return XSCDECL;
-	^"%{".*{NL}	START_CODEBLOCK(false); piece_flush(strlen(yytext)); piece_append("<PLex_Text>");
+	^"%{".*{NL}	START_CODEBLOCK(false); piece_flush(strlen(yytext)); piece_append("<AST_Text>");
     ^"%top"[[:blank:]]*"{"[[:blank:]]*{NL}    {
                 brace_start_line = linenum;
                 ++linenum;
@@ -217,10 +219,10 @@ M4QEND      "]""]"
 			line_directive_out(NULL, 1);
 			BEGIN(SECT2PROLOG);
 #if 1
- piece_append("</PLex_Section1>");
+ piece_append("</AST_Section1>");
  piece_pack();
  piece_escape(yytext, strlen(yytext));
- piece_append("<PLex_Section2>");
+ piece_append("<AST_Section2>");
  piece_pack();
  return ~SECTEND;
 #else
@@ -228,8 +230,8 @@ M4QEND      "]""]"
 #endif
 			}
 
-	^"%pointer".*{NL}	yytext_is_array = false; ++linenum; piece_append("<PLex_Section1_Options><PLex_Section1_Options_Array>"); piece_flush(strlen(yytext) - 1); piece_append("</PLex_Section1_Options_Array></PLex_Section1_Options>"); 
-	^"%array".*{NL}		yytext_is_array = true; ++linenum; piece_append("<PLex_Section1_Options><PLex_Section1_Options_Array value=\"true\">"); piece_flush(strlen(yytext) - 1); piece_append("</PLex_Section1_Options_Array></PLex_Section1_Options>");
+	^"%pointer".*{NL}	yytext_is_array = false; ++linenum; piece_append("<AST_Section1_Options><AST_Section1_Options_Array>"); piece_flush(strlen(yytext) - 1); piece_append("</AST_Section1_Options_Array></AST_Section1_Options>"); 
+	^"%array".*{NL}		yytext_is_array = true; ++linenum; piece_append("<AST_Section1_Options><AST_Section1_Options_Array value=\"true\">"); piece_flush(strlen(yytext) - 1); piece_append("</AST_Section1_Options_Array></AST_Section1_Options>");
 
 	^"%option"	BEGIN(OPTION); return TOK_OPTION;
 
@@ -304,13 +306,13 @@ M4QEND      "]""]"
 }
 
 <CODEBLOCK>{
-	^"%}".*{NL}	++linenum; piece_append("</PLex_Text>"); piece_flush(strlen(yytext)); END_CODEBLOCK;
+	^"%}".*{NL}	++linenum; piece_append("</AST_Text>"); piece_flush(strlen(yytext)); END_CODEBLOCK;
 	[^\n%\[\]]*         ACTION_ECHO;
         .		ACTION_ECHO;
 	{NL}		{
 			++linenum;
 			ACTION_ECHO;
-			if ( indented_code ) { piece_flush(strlen(yytext)); piece_append("</PLex_Text>"); END_CODEBLOCK; }
+			if ( indented_code ) { piece_flush(strlen(yytext)); piece_append("</AST_Text>"); END_CODEBLOCK; }
 			}
 }
 
@@ -512,9 +514,9 @@ M4QEND      "]""]"
 			nmstr[strlen( nmstr ) - 1] = '\0';
 #if 1
  piece_pack();
- piece_append("<PLex_String>\"<PLex_Text>");
+ piece_append("<AST_String>\"<AST_Text>");
  piece_escape(yytext + 1, strlen(yytext + 1) - 1);
- piece_append("</PLex_Text>\"</PLex_String>");
+ piece_append("</AST_Text>\"</AST_String>");
  piece_pack();
  return ~NAME; /* actually a misnomer */
 #else
@@ -536,7 +538,7 @@ M4QEND      "]""]"
 	^"%{".*	++bracelevel; yyless( 2 );	/* eat only %{ */
 	^"%}".*	--bracelevel; yyless( 2 );	/* eat only %} */
 
-	^{WS} START_CODEBLOCK(true); piece_append("<PLex_Text>"); /* indented code in prolog */
+	^{WS} START_CODEBLOCK(true); piece_append("<AST_Text>"); /* indented code in prolog */
 
 	^{NOT_WS}.*	{
         /* non-indented code */
@@ -548,7 +550,7 @@ M4QEND      "]""]"
             BEGIN(SECT2);
         } else {
             START_CODEBLOCK(true);
- piece_append("<PLex_Text>");
+ piece_append("<AST_Text>");
         }
     }
 
@@ -560,7 +562,7 @@ M4QEND      "]""]"
 			sectnum = 0;
 #if 1
  piece_pack();
- piece_append("</PLex_Section2>");
+ piece_append("</AST_Section2>");
  piece_pack();
  return ~YY_NULL;
 #else
@@ -578,18 +580,20 @@ M4QEND      "]""]"
 			bracelevel = 1;
 			BEGIN(PERCENT_BRACE_ACTION);
  piece_flush(strlen(yytext) - 2);
- piece_append("<PLex_Section1Or2_CodeBlock>");
+ piece_append("<AST_Section1Or2_CodeBlock>");
  piece_flush(2);
- piece_append("<PLex_Text>");
+ piece_append("<AST_Text>");
 			}
 
 	^{OPTWS}"<"	    {
                         /* Allow "<" to appear in (?x) patterns. */
                         if (!sf_skip_ws())
                             BEGIN(SC);
+ /* here we know yytext is not used by parser, so OK to destroy it */
+ piece_flush(strlen(yytext) - 1);
                         return '<';
                     }
-	^{OPTWS}"^"	return '^';
+	^{OPTWS}"^"	/* here we know yytext is not used by parser, so OK to destroy it */ piece_flush(strlen(yytext) - 1); return '^';
 	\"		BEGIN(QUOTE); return '"';
 	"{"/[[:digit:]]	{
 			BEGIN(NUM);
@@ -604,7 +608,7 @@ M4QEND      "]""]"
 			bracelevel = 1;
 			BEGIN(PERCENT_BRACE_ACTION);
  piece_flush(strlen(yytext) - 2);
- piece_append("<PLex_Section2_Rule_Action>");
+ piece_append("<AST_Section2_Rule_Action>");
 
 			if ( in_rule )
 				{
@@ -614,7 +618,7 @@ M4QEND      "]""]"
  piece_pack();
  piece_escape(yytext, 2);
  piece_pack();
- piece_append("<PLex_Text>");
+ piece_append("<AST_Text>");
  return ~'\n';
 #else
 				return '\n';
@@ -624,7 +628,7 @@ M4QEND      "]""]"
  abort();
 #else
  piece_flush(strlen(yytext));
- piece_append("<PLex_Text>");
+ piece_append("<AST_Text>");
 #endif
 			}
 	{WS}"|".*{NL}	{
@@ -644,9 +648,9 @@ M4QEND      "]""]"
   ;
  piece_flush(i);
  piece_pack();
- piece_append("<PLex_Section2_Rule_Action continued=\"true\">");
+ piece_append("<AST_Section2_Rule_Action continued=\"true\">");
  piece_escape(yytext, strlen(yytext));
- piece_append("</PLex_Section2_Rule_Action>");
+ piece_append("</AST_Section2_Rule_Action>");
  piece_pack();
  return ~'\n';
 #else
@@ -692,7 +696,7 @@ M4QEND      "]""]"
  piece_pack();
  piece_escape(yytext, strlen(yytext));
  piece_pack();
- piece_append("<PLex_Section2_Rule_Action><PLex_Text>");
+ piece_append("<AST_Section2_Rule_Action><AST_Text>");
  return ~'\n';
 #else
                     return '\n';
@@ -724,7 +728,7 @@ M4QEND      "]""]"
  piece_pack();
  piece_escape(yytext, strlen(yytext));
  piece_pack();
- piece_append("<PLex_Section2_Rule_Action><PLex_Text>");
+ piece_append("<AST_Section2_Rule_Action><AST_Text>");
  return ~'\n';
 #else
                     return '\n';
@@ -734,7 +738,7 @@ M4QEND      "]""]"
 			}
 
 	^{OPTWS}"<<EOF>>"	|
-	"<<EOF>>"	return EOF_OP;
+	"<<EOF>>"	/* here we know yytext is not used by parser, so OK to destroy it */ piece_flush(strlen(yytext) - 7); return EOF_OP;
 
 	^"%%".*		{
 			sectnum = 3;
@@ -742,10 +746,10 @@ M4QEND      "]""]"
 			outn("/* Begin user sect3 */");
 #if 1
  piece_pack();
- piece_append("</PLex_Section2>");
+ piece_append("</AST_Section2>");
  piece_escape(yytext, strlen(yytext));
  piece_pack();
- piece_append("<PLex_Section3>");
+ piece_append("<AST_Section3>");
  return ~YY_NULL;
 #else
 			yyterminate(); /* to stop the parser */
@@ -854,6 +858,10 @@ nmstr[yyleng - 2 - end_is_ws] = '\0';  /* chop trailing brace */
 					{
 					unput(')');
 					PUT_BACK_STRING(nmdefptr, 0);
+ if (!lex_compat && !posix_compat) {
+  unput(':');
+  unput('?');
+ }
 					unput('(');
 					}
 				}
@@ -905,7 +913,7 @@ nmstr[yyleng - 2 - end_is_ws] = '\0';  /* chop trailing brace */
 			sectnum = 0;
 #if 1
  piece_pack();
- piece_append("</PLex_Section2>");
+ piece_append("</AST_Section2>");
  piece_pack();
  return ~YY_NULL;
 #else
@@ -943,14 +951,47 @@ nmstr[yyleng - 2 - end_is_ws] = '\0';  /* chop trailing brace */
 }
 
 <GROUP_WITH_PARAMS>{
-    ":"     BEGIN(SECT2);
+    /* Nick extra rules for named groups */
+    "'"{NAME}"'" |
+    "<"{NAME}">" {
+                BEGIN(SECT2);
+#if 1
+ /* here we know yytext is not used by parser, so OK to destroy it */
+ piece_flush(1);
+ piece_pack();
+ piece_append("<RegexGroupName_Text>");
+ piece_flush(strlen(yytext) - 1);
+ piece_append("</RegexGroupName_Text>");
+ piece_pack();
+ piece_flush(1);
+ return ~NAME;
+#else
+                return NAME;
+#endif
+            }
+    /* Nick extra rules for action groups */
+    "A{" {
+                BEGIN(SECT2);
+                yy_push_state(ACTION_GROUP);
+		bracelevel = 1;
+ piece_flush(strlen(yytext));
+ piece_append("<RegexGroupAction_Text>");
+            }
+    "E{" {
+                BEGIN(SECT2);
+                yy_push_state(ELEMENT_GROUP);
+		bracelevel = 1;
+ piece_flush(strlen(yytext));
+ piece_append("<RegexGroupElement_Text>");
+            }
+    ":"     BEGIN(SECT2); return ':'; /* Nick added return, unnumbered group */
     "-"     BEGIN(GROUP_MINUS_PARAMS);
     i       sf_set_case_ins(1);
     s       sf_set_dot_all(1);
     x       sf_set_skip_ws(1);
 }
 <GROUP_MINUS_PARAMS>{
-    ":"     BEGIN(SECT2);
+    ":"     BEGIN(SECT2); return ':'; /* Nick added return, unnumbered group */
     i       sf_set_case_ins(0);
     s       sf_set_dot_all(0);
     x       sf_set_skip_ws(0);
@@ -1038,7 +1079,7 @@ nmstr[yyleng - 2 - end_is_ws] = '\0';  /* chop trailing brace */
 
 
 <PERCENT_BRACE_ACTION>{
-	{OPTWS}"%}".*		bracelevel = 0; piece_append("</PLex_Text>");
+	{OPTWS}"%}".*		bracelevel = 0; piece_append("</AST_Text>");
 
 	<ACTION>"/*"		ACTION_ECHO; yy_push_state( CODE_COMMENT );
 
@@ -1063,9 +1104,9 @@ nmstr[yyleng - 2 - end_is_ws] = '\0';  /* chop trailing brace */
 
  piece_flush(strlen(yytext));
  if (doing_codeblock)
-  piece_append("</PLex_Section1Or2_CodeBlock>");
+  piece_append("</AST_Section1Or2_CodeBlock>");
  else
-  markup_action("</PLex_Section2_Rule_Action>");
+  markup_action("</AST_Section2_Rule_Action>");
             doing_rule_action = doing_codeblock = false;
             BEGIN(SECT2);
         }
@@ -1090,7 +1131,7 @@ nmstr[yyleng - 2 - end_is_ws] = '\0';  /* chop trailing brace */
  {
                       add_action( "\tYY_BREAK]""]\n" );
   piece_flush(strlen(yytext));
-  markup_action("</PLex_Text></PLex_Section2_Rule_Action>");
+  markup_action("</AST_Text></AST_Section2_Rule_Action>");
  }
 
                    doing_rule_action = false;
@@ -1111,11 +1152,13 @@ nmstr[yyleng - 2 - end_is_ws] = '\0';  /* chop trailing brace */
 <ACTION_STRING,CHARACTER_CONSTANT>{
         (\\\n)*         ACTION_ECHO;
 	\\(\\\n)*.	ACTION_ECHO;
-	{NL}	++linenum; ACTION_ECHO; if (bracelevel <= 0) { BEGIN(SECT2); piece_flush(strlen(yytext)); if (doing_rule_action) markup_action("</PLex_Text></PLex_Section2_Rule_Action>"); } else { BEGIN(ACTION); }
+	{NL}	++linenum; ACTION_ECHO; if (bracelevel <= 0) { BEGIN(SECT2); piece_flush(strlen(yytext)); if (doing_rule_action) markup_action("</AST_Text></AST_Section2_Rule_Action>"); } else { BEGIN(ACTION); }
         .	ACTION_ECHO;
 }
 
-<COMMENT,CODE_COMMENT,COMMENT_DISCARD,ACTION,ACTION_STRING,CHARACTER_CONSTANT><<EOF>>	{
+ /* Nick extra rules for action groups */
+ /* Nick added: ACTION_GROUP,ELEMENT_GROUP,DOUBLE_QUOTED,SINGLE_QUOTED */
+<COMMENT,CODE_COMMENT,COMMENT_DISCARD,ACTION,ACTION_STRING,CHARACTER_CONSTANT,ACTION_GROUP,ELEMENT_GROUP,DOUBLE_QUOTED,SINGLE_QUOTED><<EOF>>	{
 			synerr( _( "EOF encountered inside an action" ) );
 			yyterminate();
 			}
@@ -1143,7 +1186,7 @@ nmstr[yyleng - 2 - end_is_ws] = '\0';  /* chop trailing brace */
         sectnum = 0;
 #if 1
  piece_pack();
- piece_append("</PLex_Section3>");
+ piece_append("</AST_Section3>");
  piece_pack();
  return ~YY_NULL;
 #else
@@ -1160,7 +1203,7 @@ nmstr[yyleng - 2 - end_is_ws] = '\0';  /* chop trailing brace */
        sectnum = 0;
 #if 1
  piece_pack();
- piece_append("</PLex_Section3>");
+ piece_append("</AST_Section3>");
  piece_pack();
  return ~YY_NULL;
 #else
@@ -1168,6 +1211,51 @@ nmstr[yyleng - 2 - end_is_ws] = '\0';  /* chop trailing brace */
 #endif
     }
 }
+
+ /* Nick extra rules for action groups */
+<ACTION_GROUP,ELEMENT_GROUP>{
+	"{"		++bracelevel;
+}
+<ACTION_GROUP>{
+	"}"		{
+			    if (--bracelevel == 0) {
+			        yy_pop_state();
+ piece_append("</RegexGroupAction_Text>");
+			        return TOK_ACTION_GROUP;
+			    }
+			}
+}
+<ELEMENT_GROUP>{
+	"}"		{
+			    if (--bracelevel == 0) {
+			        yy_pop_state();
+ piece_append("</RegexGroupElement_Text>");
+			        return TOK_ELEMENT_GROUP;
+			    }
+			}
+}
+<ACTION_GROUP,ELEMENT_GROUP>{
+	"'"		yy_push_state(SINGLE_QUOTED);
+	\"		yy_push_state(DOUBLE_QUOTED);
+	"/*"		yy_push_state(COMMENT_DISCARD);
+}
+<SINGLE_QUOTED>{
+	[^\[\]\'\\\n]+
+        \'		yy_pop_state();
+}
+<DOUBLE_QUOTED>{
+	[^\[\]\"\\\n]+
+	\"		yy_pop_state();
+}
+<SINGLE_QUOTED,DOUBLE_QUOTED>{
+        (\\\n)*
+	\\(\\\n)*.
+}
+<ACTION_GROUP,ELEMENT_GROUP,SINGLE_QUOTED,DOUBLE_QUOTED>{
+	{NL}		++linenum;
+        .
+}
+
 <*>.|\n			format_synerr( _( "bad character: %s" ), yytext );
 
 %%
@@ -1292,16 +1380,16 @@ int flexscan(void) {
 }
 
 static void markup_action(const char *text) {
- /* append to last token text so it appears inside <PLex_Section2_Rule>..</PLex_Section2_Rule> */
+ /* append to last token text so it appears inside <AST_Section2_Rule>..</AST_Section2_Rule> */
  /* a problem here is that Rule has already been reduced (marked up), */
  /* because we returned a '\n' token when we detected start of action, */
- /* hence we need to move the closing </PLex_Section2_Rule> tag over to our right */
+ /* hence we need to move the closing </AST_Section2_Rule> tag over to our right */
  int i = strlen(piece[--piece0]);
- if (i < 21 || strcmp(piece[piece0] + i - 21, "</PLex_Section2_Rule>") != 0)
+ if (i < 20 || strcmp(piece[piece0] + i - 20, "</AST_Section2_Rule>") != 0)
   abort();
- piece[piece0][i - 21] = 0;
+ piece[piece0][i - 20] = 0;
  piece_append(text);
- piece_append("</PLex_Section2_Rule>");
+ piece_append("</AST_Section2_Rule>");
  piece_pack();
 }
 
@@ -1311,12 +1399,12 @@ static void markup_option(const char *name, int sense) {
  int i = piece1;
  while (--i >= piece0 && strcmp(piece[i], "no") == 0)
   ;
- sprintf(piece_temp, "<PLex_Section1_Options_%s%s>", name, sense ? " value=\"true\"" : "");
+ sprintf(piece_temp, "<AST_Section1_Options_%s%s>", name, sense ? " value=\"true\"" : "");
  piece_insert(i + 1, piece_temp);
  piece_flush(strlen(yytext));
- sprintf(piece_temp, "</PLex_Section1_Options_%s>", name);
+ sprintf(piece_temp, "</AST_Section1_Options_%s>", name);
  piece_append(piece_temp);
- /* append to last token text so it appears inside <PLex_Section1_Options>..</PLex_Section1_Options> */
+ /* append to last token text so it appears inside <AST_Section1_Options>..</AST_Section1_Options> */
  --piece0;
  piece_pack();
 }
-- 
2.34.1