Reinstate the translation based on the new pilex/piyacc parser, tidy up a bit

author Nick Downing <nick@ndcode.org>

Sat, 12 Jan 2019 03:12:16 +0000 (14:12 +1100)

committer Nick Downing <nick@ndcode.org>

Sat, 12 Jan 2019 03:12:16 +0000 (14:12 +1100)
author Nick Downing <nick@ndcode.org>
Sat, 12 Jan 2019 03:12:16 +0000 (14:12 +1100)
committer Nick Downing <nick@ndcode.org>
Sat, 12 Jan 2019 03:12:16 +0000 (14:12 +1100)
diff --git a/ast.py b/ast.py

index cb04951..f2aceef 100644 (file)
--- a/ast.py
+++ b/ast.py
@@ -6,7 +6,7 @@ class Context:
      self,
      indent = '',
      enclosing_loop = None,
-    initial = False,
+    #initial = False,
      translate_identifier = {
        'NULL': 'None',
        'false': 'False',
@@ -16,10 +16,38 @@ class Context:
    ):
      self.indent = '  '
      self.enclosing_loop = enclosing_loop
-    self.initial = initial # whether to add declared identifiers as 'self.'
+    #self.initial = initial # whether to add declared identifiers as 'self.'
      self.translate_identifier = translate_identifier
  
  class AST(element.Element):
+  class Text(element.Element):
+    # GENERATE ELEMENT() BEGIN
+    def __init__(
+      self,
+      tag = 'AST_Text',
+      attrib = {},
+      text = '',
+      children = []
+    ):
+      element.Element.__init__(
+        self,
+        tag,
+        attrib,
+        text,
+        children
+      )
+    def copy(self, factory = None):
+      result = element.Element.copy(
+        self,
+        Text if factory is None else factory
+      )
+      return result
+    def __repr__(self):
+      params = []
+      self.repr_serialize(params)
+      return 'ast.AST.Text({0:s})'.format(', '.join(params))
+    # GENERATE END
+
    class Element(element.Element):
      # GENERATE ELEMENT() BEGIN
      def __init__(
@@ -105,7 +133,7 @@ class AST(element.Element):
        return 'ast.AST.DeclarationOrStatement({0:s})'.format(', '.join(params))
      # GENERATE END
      def translate(self, context):
-      Element.translate(self, context)
+      AST.Element.translate(self, context)
        element.set_text(
          self,
          0,
@@ -528,11 +556,11 @@ class AST(element.Element):
      def translate(self, context):
        assert len(self) == 3
        self[0].translate(context)
-      initial_save = context.initial
-      context.initial = False
+      #initial_save = context.initial
+      #context.initial = False
        self[1].translate(context)
        self[2].translate(context)
-      context.initial = initial_save
+      #context.initial = initial_save
        element.set_text(self, 0, '')
        element.set_text(self, 1, '(')
        element.set_text(self, 2, '')
@@ -568,10 +596,10 @@ class AST(element.Element):
      def translate(self, context):
        assert len(self) == 2
        self[0].translate(context)
-      initial_save = context.initial
-      context.initial = False
+      #initial_save = context.initial
+      #context.initial = False
        self[1].translate(context)
-      context.initial = initial_save
+      #context.initial = initial_save
        element.set_text(self, 0, '')
        element.set_text(self, 1, '(')
        element.set_text(self, 2, ')')
@@ -604,11 +632,11 @@ class AST(element.Element):
        return 'ast.AST.DeclaratorIdentifier({0:s})'.format(', '.join(params))
      # GENERATE END
      def translate(self, context):
-      if context.initial:
-        text = element.get_text(self[0], 0)
-        assert text not in context.translate_identifier
-        context.translate_identifier[text] = 'self.{0:s}'.format(text)
-      Declarator.translate(self, context)
+      #if context.initial:
+      #  text = element.get_text(self[0], 0)
+      #  assert text not in context.translate_identifier
+      #  context.translate_identifier[text] = 'self.{0:s}'.format(text)
+      AST.Declarator.translate(self, context)
  
    class DeclaratorPointer(Declarator):
      # GENERATE ELEMENT() BEGIN
@@ -4058,6 +4086,7 @@ class AST(element.Element):
  # GENERATE FACTORY(element.Element) BEGIN
  tag_to_class = {
    'AST': AST,
+  'AST_Text': AST.Text,
    'AST_Element': AST.Element,
    'AST_DeclarationOrStatement': AST.DeclarationOrStatement,
    'AST_AlignAsExpression': AST.AlignAsExpression,
diff --git a/c_to_python.py b/c_to_python.py

index 549a072..a60fed5 100644 (file)
--- a/c_to_python.py
+++ b/c_to_python.py
@@ -1,24 +1,15 @@
-import ansi_c_yylex
-import ansi_c_yyparse
  import ast
  import element
+import lex_yy
  #import xml.etree.ElementTree
+import y_tab
  
  def c_to_python(context, text):
-  root = ast.BlockItemList(text = text)
-  ansi_c_yyparse.yyparse(
-    root,
-    0,
-    0,
-    ast.factory,
-    ansi_c_yylex.yylex(
-      root,
-      0,
-      0,
-      ast.factory,
-      iter([])
-    )
-  )
+  lex_yy.yyin = None
+  lex_yy.yy_buffer_stack = [lex_yy.YYBufferState(None, None)]
+  lex_yy.yytext_len = 0
+  lex_yy.unput(text)
+  root = y_tab.yyparse(ast.AST.BlockItemList)
    root.translate(context)
    #print('@@@')
    #xml.etree.ElementTree.dump(root)
diff --git a/element.py b/element.py

index 2121e08..2d02217 100644 (file)
--- a/element.py
+++ b/element.py
@@ -160,6 +160,16 @@ def set_text(root, i, text):
    else:
      root[i - 1].tail = text
  
+def to_text(root):
+  return ''.join(
+    [
+      j
+      for i in range(len(root))
+      for j in [get_text(root, i), to_text(root[i])]
+    ] +
+    [get_text(root, len(root))]
+  )
+
  def concatenate(children, factory = Element, *args, **kwargs):
    root = factory(*args, **kwargs)
    for child in children:
diff --git a/l_to_python.py b/l_to_python.py

index 4f11487..0411558 100755 (executable)
--- a/l_to_python.py
+++ b/l_to_python.py
@@ -27,20 +27,20 @@ def my_rstrip(text, indent):
    return text[:i].rstrip('\t ') + indent + text[i:]
  
  context = ast.Context()
-context.translate_identifier['BEGIN'] = 'self.BEGIN'
-context.translate_identifier['yylval'] = 'ref_data.yylval'
-context.translate_identifier['yytext'] = 'self.yytext'
-context.translate_identifier['yy_pop_state'] = 'self.yy_pop_state'
-context.translate_identifier['yy_push_state'] = 'self.yy_push_state'
+#context.translate_identifier['BEGIN'] = 'self.BEGIN'
+#context.translate_identifier['yylval'] = 'ref_data.yylval'
+#context.translate_identifier['yytext'] = 'self.yytext'
+#context.translate_identifier['yy_pop_state'] = 'self.yy_pop_state'
+#context.translate_identifier['yy_push_state'] = 'self.yy_push_state'
  
  actions = []
  with open('a.c', 'w') as fout:
    def extract(i, parent, indent):
-    if i.tag == 'PLex_Section1Or2_CodeBlock':
+    if i.tag == 'AST_Section1Or2_CodeBlock':
        initial = True
-    elif i.tag == 'PLex_Section2_Rule_Action':
+    elif i.tag == 'AST_Section2_Rule_Action':
        if len(i) == 0: # continued actions
-        assert parent.tag == 'PLex_Section2_Rule'
+        assert parent.tag == 'AST_Section2_Rule'
          assert len(parent) == 3
          element.set_text(
            parent,
@@ -51,17 +51,17 @@ with open('a.c', 'w') as fout:
        initial = False
      else:
        child_indent = indent
-      if i.tag == 'PLex_Section1_StartConditions':
+      if i.tag == 'AST_Section1_StartConditions':
          for j in i:
-          assert j.tag == 'PLex_Name'
+          assert j.tag == 'AST_Name'
            text = element.get_text(j, 0)
            context.translate_identifier[text] = 'DFA.{0:s}'.format(text)
        elif (
-        i.tag == 'PLex_Section2_Rule' or
-        i.tag == 'PLex_Section2_Rule_FLexRule'
+        i.tag == 'AST_Section2_Rule' or
+        i.tag == 'AST_Section2_Rule_FLexRule'
        ):
          element.set_text(i, 0, element.get_text(i, 0).lstrip('\t '))
-      elif i.tag == 'PLex_Section2_CompoundRule':
+      elif i.tag == 'AST_Section2_CompoundRule':
          child_indent += '  '
          element.set_text(
            i,
@@ -83,7 +83,7 @@ with open('a.c', 'w') as fout:
        for j in i:
          extract(j, i, child_indent)
        return
-    assert i[0].tag == 'PLex_Text' and len(i[0]) == 0
+    assert i[0].tag == 'AST_Text' and len(i[0]) == 0
      text = element.to_text(i[0])
  
      j = 0
@@ -137,13 +137,13 @@ with open('a.i') as fin:
        else:
          lines.append(line)
        line = fin.readline()
-    context.indent = indent + '  #'
+    context.indent = indent + '  '
      context.initial = initial
      text = c_to_python.c_to_python(context, ''.join(lines))
      if initial:
        element.set_text(i[0], 0, text)
      else:
-      assert parent.tag == 'PLex_Section2_Rule'
+      assert parent.tag == 'AST_Section2_Rule'
        assert len(parent) == 3
        prefix = element.get_text(parent, 2).rstrip('\t ')
        if len(text) == 0:
@@ -152,11 +152,10 @@ with open('a.i') as fin:
          pass
        elif text.index('\n') == len(text) - 1:
          prefix += ' /*COLUMN32*/ '
-        text = 'pass {0:s}'.format(text.lstrip('\t '))
+        text = text.lstrip('\t ')
        else:
          prefix += ' '
-        text = '{{\n{0:s}  pass\n{1:s}{2:s}}}\n'.format(
-          indent,
+        text = '{{\n{0:s}{1:s}}}\n'.format(
            text,
            indent
          )
diff --git a/tests/scan.l b/tests/scan.l

index 8e21e78..73e1b5f 100644 (file)
--- a/tests/scan.l
+++ b/tests/scan.l
@@ -81,9 +81,9 @@ extern const char *escaped_qstart, *escaped_qend;
           { \
         strncpy( nmstr, yytext, sizeof(nmstr) ); \
   piece_pack(); \
- piece_append("<PLex_Name>"); \
+ piece_append("<AST_Name>"); \
   piece_escape(yytext, strlen(yytext)); \
- piece_append("</PLex_Name>"); \
+ piece_append("</AST_Name>"); \
   piece_pack(); \
   return ~NAME; \
   /*    return NAME;*/ \
@@ -121,14 +121,14 @@ extern const char *escaped_qstart, *escaped_qend;
      add_action(M4QSTART); \
      yy_push_state(CODEBLOCK); \
      if ((indented_code = x)) ACTION_ECHO; \
- piece_append("<PLex_Section1Or2_CodeBlock>"); \
+ piece_append("<AST_Section1Or2_CodeBlock>"); \
  } while(0)
  
  #define END_CODEBLOCK do { \
      yy_pop_state();\
      add_action(M4QEND); \
      if (!indented_code) line_directive_out(NULL, 0);\
- piece_append("</PLex_Section1Or2_CodeBlock>"); \
+ piece_append("</AST_Section1Or2_CodeBlock>"); \
  } while (0)
  
  /* Nick */
@@ -158,6 +158,8 @@ static void markup_option(const char *name, int sense);
  %x COMMENT_DISCARD CODE_COMMENT
  %x SECT3_NOESCAPE
  %x CHARACTER_CONSTANT
+/* Nick extra rules for action groups */
+%x ACTION_GROUP ELEMENT_GROUP DOUBLE_QUOTED SINGLE_QUOTED
  
  WS             [[:blank:]]+
  OPTWS          [[:blank:]]*
@@ -192,12 +194,12 @@ M4QEND      "]""]"
  
  
  <INITIAL>{
-       ^{WS}           START_CODEBLOCK(true); piece_append("<PLex_Text>");
+       ^{WS}           START_CODEBLOCK(true); piece_append("<AST_Text>");
         ^"/*"           add_action("/*[""["); yy_push_state( COMMENT );
         ^#{OPTWS}line{WS}       yy_push_state( LINEDIR );
         ^"%s"{NAME}?    return SCDECL;
         ^"%x"{NAME}?    return XSCDECL;
-       ^"%{".*{NL}     START_CODEBLOCK(false); piece_flush(strlen(yytext)); piece_append("<PLex_Text>");
+       ^"%{".*{NL}     START_CODEBLOCK(false); piece_flush(strlen(yytext)); piece_append("<AST_Text>");
      ^"%top"[[:blank:]]*"{"[[:blank:]]*{NL}    {
                  brace_start_line = linenum;
                  ++linenum;
@@ -217,10 +219,10 @@ M4QEND      "]""]"
                         line_directive_out(NULL, 1);
                         BEGIN(SECT2PROLOG);
  #if 1
- piece_append("</PLex_Section1>");
+ piece_append("</AST_Section1>");
   piece_pack();
   piece_escape(yytext, strlen(yytext));
- piece_append("<PLex_Section2>");
+ piece_append("<AST_Section2>");
   piece_pack();
   return ~SECTEND;
  #else
@@ -228,8 +230,8 @@ M4QEND      "]""]"
  #endif
                         }
  
-       ^"%pointer".*{NL}       yytext_is_array = false; ++linenum; piece_append("<PLex_Section1_Options><PLex_Section1_Options_Array>"); piece_flush(strlen(yytext) - 1); piece_append("</PLex_Section1_Options_Array></PLex_Section1_Options>"); 
-       ^"%array".*{NL}         yytext_is_array = true; ++linenum; piece_append("<PLex_Section1_Options><PLex_Section1_Options_Array value=\"true\">"); piece_flush(strlen(yytext) - 1); piece_append("</PLex_Section1_Options_Array></PLex_Section1_Options>");
+       ^"%pointer".*{NL}       yytext_is_array = false; ++linenum; piece_append("<AST_Section1_Options><AST_Section1_Options_Array>"); piece_flush(strlen(yytext) - 1); piece_append("</AST_Section1_Options_Array></AST_Section1_Options>"); 
+       ^"%array".*{NL}         yytext_is_array = true; ++linenum; piece_append("<AST_Section1_Options><AST_Section1_Options_Array value=\"true\">"); piece_flush(strlen(yytext) - 1); piece_append("</AST_Section1_Options_Array></AST_Section1_Options>");
  
         ^"%option"      BEGIN(OPTION); return TOK_OPTION;
  
@@ -304,13 +306,13 @@ M4QEND      "]""]"
  }
  
  <CODEBLOCK>{
-       ^"%}".*{NL}     ++linenum; piece_append("</PLex_Text>"); piece_flush(strlen(yytext)); END_CODEBLOCK;
+       ^"%}".*{NL}     ++linenum; piece_append("</AST_Text>"); piece_flush(strlen(yytext)); END_CODEBLOCK;
         [^\n%\[\]]*         ACTION_ECHO;
          .              ACTION_ECHO;
         {NL}            {
                         ++linenum;
                         ACTION_ECHO;
-                       if ( indented_code ) { piece_flush(strlen(yytext)); piece_append("</PLex_Text>"); END_CODEBLOCK; }
+                       if ( indented_code ) { piece_flush(strlen(yytext)); piece_append("</AST_Text>"); END_CODEBLOCK; }
                         }
  }
  
@@ -512,9 +514,9 @@ M4QEND      "]""]"
                         nmstr[strlen( nmstr ) - 1] = '\0';
  #if 1
   piece_pack();
- piece_append("<PLex_String>\"<PLex_Text>");
+ piece_append("<AST_String>\"<AST_Text>");
   piece_escape(yytext + 1, strlen(yytext + 1) - 1);
- piece_append("</PLex_Text>\"</PLex_String>");
+ piece_append("</AST_Text>\"</AST_String>");
   piece_pack();
   return ~NAME; /* actually a misnomer */
  #else
@@ -536,7 +538,7 @@ M4QEND      "]""]"
         ^"%{".* ++bracelevel; yyless( 2 );      /* eat only %{ */
         ^"%}".* --bracelevel; yyless( 2 );      /* eat only %} */
  
-       ^{WS} START_CODEBLOCK(true); piece_append("<PLex_Text>"); /* indented code in prolog */
+       ^{WS} START_CODEBLOCK(true); piece_append("<AST_Text>"); /* indented code in prolog */
  
         ^{NOT_WS}.*     {
          /* non-indented code */
@@ -548,7 +550,7 @@ M4QEND      "]""]"
              BEGIN(SECT2);
          } else {
              START_CODEBLOCK(true);
- piece_append("<PLex_Text>");
+ piece_append("<AST_Text>");
          }
      }
  
@@ -560,7 +562,7 @@ M4QEND      "]""]"
                         sectnum = 0;
  #if 1
   piece_pack();
- piece_append("</PLex_Section2>");
+ piece_append("</AST_Section2>");
   piece_pack();
   return ~YY_NULL;
  #else
@@ -578,18 +580,20 @@ M4QEND      "]""]"
                         bracelevel = 1;
                         BEGIN(PERCENT_BRACE_ACTION);
   piece_flush(strlen(yytext) - 2);
- piece_append("<PLex_Section1Or2_CodeBlock>");
+ piece_append("<AST_Section1Or2_CodeBlock>");
   piece_flush(2);
- piece_append("<PLex_Text>");
+ piece_append("<AST_Text>");
                         }
  
         ^{OPTWS}"<"         {
                          /* Allow "<" to appear in (?x) patterns. */
                          if (!sf_skip_ws())
                              BEGIN(SC);
+ /* here we know yytext is not used by parser, so OK to destroy it */
+ piece_flush(strlen(yytext) - 1);
                          return '<';
                      }
-       ^{OPTWS}"^"     return '^';
+       ^{OPTWS}"^"     /* here we know yytext is not used by parser, so OK to destroy it */ piece_flush(strlen(yytext) - 1); return '^';
         \"              BEGIN(QUOTE); return '"';
         "{"/[[:digit:]] {
                         BEGIN(NUM);
@@ -604,7 +608,7 @@ M4QEND      "]""]"
                         bracelevel = 1;
                         BEGIN(PERCENT_BRACE_ACTION);
   piece_flush(strlen(yytext) - 2);
- piece_append("<PLex_Section2_Rule_Action>");
+ piece_append("<AST_Section2_Rule_Action>");
  
                         if ( in_rule )
                                 {
@@ -614,7 +618,7 @@ M4QEND      "]""]"
   piece_pack();
   piece_escape(yytext, 2);
   piece_pack();
- piece_append("<PLex_Text>");
+ piece_append("<AST_Text>");
   return ~'\n';
  #else
                                 return '\n';
@@ -624,7 +628,7 @@ M4QEND      "]""]"
   abort();
  #else
   piece_flush(strlen(yytext));
- piece_append("<PLex_Text>");
+ piece_append("<AST_Text>");
  #endif
                         }
         {WS}"|".*{NL}   {
@@ -644,9 +648,9 @@ M4QEND      "]""]"
    ;
   piece_flush(i);
   piece_pack();
- piece_append("<PLex_Section2_Rule_Action continued=\"true\">");
+ piece_append("<AST_Section2_Rule_Action continued=\"true\">");
   piece_escape(yytext, strlen(yytext));
- piece_append("</PLex_Section2_Rule_Action>");
+ piece_append("</AST_Section2_Rule_Action>");
   piece_pack();
   return ~'\n';
  #else
@@ -692,7 +696,7 @@ M4QEND      "]""]"
   piece_pack();
   piece_escape(yytext, strlen(yytext));
   piece_pack();
- piece_append("<PLex_Section2_Rule_Action><PLex_Text>");
+ piece_append("<AST_Section2_Rule_Action><AST_Text>");
   return ~'\n';
  #else
                      return '\n';
@@ -724,7 +728,7 @@ M4QEND      "]""]"
   piece_pack();
   piece_escape(yytext, strlen(yytext));
   piece_pack();
- piece_append("<PLex_Section2_Rule_Action><PLex_Text>");
+ piece_append("<AST_Section2_Rule_Action><AST_Text>");
   return ~'\n';
  #else
                      return '\n';
@@ -734,7 +738,7 @@ M4QEND      "]""]"
                         }
  
         ^{OPTWS}"<<EOF>>"       |
-       "<<EOF>>"       return EOF_OP;
+       "<<EOF>>"       /* here we know yytext is not used by parser, so OK to destroy it */ piece_flush(strlen(yytext) - 7); return EOF_OP;
  
         ^"%%".*         {
                         sectnum = 3;
@@ -742,10 +746,10 @@ M4QEND      "]""]"
                         outn("/* Begin user sect3 */");
  #if 1
   piece_pack();
- piece_append("</PLex_Section2>");
+ piece_append("</AST_Section2>");
   piece_escape(yytext, strlen(yytext));
   piece_pack();
- piece_append("<PLex_Section3>");
+ piece_append("<AST_Section3>");
   return ~YY_NULL;
  #else
                         yyterminate(); /* to stop the parser */
@@ -854,6 +858,10 @@ nmstr[yyleng - 2 - end_is_ws] = '\0';  /* chop trailing brace */
                                         {
                                         unput(')');
                                         PUT_BACK_STRING(nmdefptr, 0);
+ if (!lex_compat && !posix_compat) {
+  unput(':');
+  unput('?');
+ }
                                         unput('(');
                                         }
                                 }
@@ -905,7 +913,7 @@ nmstr[yyleng - 2 - end_is_ws] = '\0';  /* chop trailing brace */
                         sectnum = 0;
  #if 1
   piece_pack();
- piece_append("</PLex_Section2>");
+ piece_append("</AST_Section2>");
   piece_pack();
   return ~YY_NULL;
  #else
@@ -943,14 +951,47 @@ nmstr[yyleng - 2 - end_is_ws] = '\0';  /* chop trailing brace */
  }
  
  <GROUP_WITH_PARAMS>{
-    ":"     BEGIN(SECT2);
+    /* Nick extra rules for named groups */
+    "'"{NAME}"'" |
+    "<"{NAME}">" {
+                BEGIN(SECT2);
+#if 1
+ /* here we know yytext is not used by parser, so OK to destroy it */
+ piece_flush(1);
+ piece_pack();
+ piece_append("<RegexGroupName_Text>");
+ piece_flush(strlen(yytext) - 1);
+ piece_append("</RegexGroupName_Text>");
+ piece_pack();
+ piece_flush(1);
+ return ~NAME;
+#else
+                return NAME;
+#endif
+            }
+    /* Nick extra rules for action groups */
+    "A{" {
+                BEGIN(SECT2);
+                yy_push_state(ACTION_GROUP);
+               bracelevel = 1;
+ piece_flush(strlen(yytext));
+ piece_append("<RegexGroupAction_Text>");
+            }
+    "E{" {
+                BEGIN(SECT2);
+                yy_push_state(ELEMENT_GROUP);
+               bracelevel = 1;
+ piece_flush(strlen(yytext));
+ piece_append("<RegexGroupElement_Text>");
+            }
+    ":"     BEGIN(SECT2); return ':'; /* Nick added return, unnumbered group */
      "-"     BEGIN(GROUP_MINUS_PARAMS);
      i       sf_set_case_ins(1);
      s       sf_set_dot_all(1);
      x       sf_set_skip_ws(1);
  }
  <GROUP_MINUS_PARAMS>{
-    ":"     BEGIN(SECT2);
+    ":"     BEGIN(SECT2); return ':'; /* Nick added return, unnumbered group */
      i       sf_set_case_ins(0);
      s       sf_set_dot_all(0);
      x       sf_set_skip_ws(0);
@@ -1038,7 +1079,7 @@ nmstr[yyleng - 2 - end_is_ws] = '\0';  /* chop trailing brace */
  
  
  <PERCENT_BRACE_ACTION>{
-       {OPTWS}"%}".*           bracelevel = 0; piece_append("</PLex_Text>");
+       {OPTWS}"%}".*           bracelevel = 0; piece_append("</AST_Text>");
  
         <ACTION>"/*"            ACTION_ECHO; yy_push_state( CODE_COMMENT );
  
@@ -1063,9 +1104,9 @@ nmstr[yyleng - 2 - end_is_ws] = '\0';  /* chop trailing brace */
  
   piece_flush(strlen(yytext));
   if (doing_codeblock)
-  piece_append("</PLex_Section1Or2_CodeBlock>");
+  piece_append("</AST_Section1Or2_CodeBlock>");
   else
-  markup_action("</PLex_Section2_Rule_Action>");
+  markup_action("</AST_Section2_Rule_Action>");
              doing_rule_action = doing_codeblock = false;
              BEGIN(SECT2);
          }
@@ -1090,7 +1131,7 @@ nmstr[yyleng - 2 - end_is_ws] = '\0';  /* chop trailing brace */
   {
                        add_action( "\tYY_BREAK]""]\n" );
    piece_flush(strlen(yytext));
-  markup_action("</PLex_Text></PLex_Section2_Rule_Action>");
+  markup_action("</AST_Text></AST_Section2_Rule_Action>");
   }
  
                     doing_rule_action = false;
@@ -1111,11 +1152,13 @@ nmstr[yyleng - 2 - end_is_ws] = '\0';  /* chop trailing brace */
  <ACTION_STRING,CHARACTER_CONSTANT>{
          (\\\n)*         ACTION_ECHO;
         \\(\\\n)*.      ACTION_ECHO;
-       {NL}    ++linenum; ACTION_ECHO; if (bracelevel <= 0) { BEGIN(SECT2); piece_flush(strlen(yytext)); if (doing_rule_action) markup_action("</PLex_Text></PLex_Section2_Rule_Action>"); } else { BEGIN(ACTION); }
+       {NL}    ++linenum; ACTION_ECHO; if (bracelevel <= 0) { BEGIN(SECT2); piece_flush(strlen(yytext)); if (doing_rule_action) markup_action("</AST_Text></AST_Section2_Rule_Action>"); } else { BEGIN(ACTION); }
          .      ACTION_ECHO;
  }
  
-<COMMENT,CODE_COMMENT,COMMENT_DISCARD,ACTION,ACTION_STRING,CHARACTER_CONSTANT><<EOF>>  {
+ /* Nick extra rules for action groups */
+ /* Nick added: ACTION_GROUP,ELEMENT_GROUP,DOUBLE_QUOTED,SINGLE_QUOTED */
+<COMMENT,CODE_COMMENT,COMMENT_DISCARD,ACTION,ACTION_STRING,CHARACTER_CONSTANT,ACTION_GROUP,ELEMENT_GROUP,DOUBLE_QUOTED,SINGLE_QUOTED><<EOF>>   {
                         synerr( _( "EOF encountered inside an action" ) );
                         yyterminate();
                         }
@@ -1143,7 +1186,7 @@ nmstr[yyleng - 2 - end_is_ws] = '\0';  /* chop trailing brace */
          sectnum = 0;
  #if 1
   piece_pack();
- piece_append("</PLex_Section3>");
+ piece_append("</AST_Section3>");
   piece_pack();
   return ~YY_NULL;
  #else
@@ -1160,7 +1203,7 @@ nmstr[yyleng - 2 - end_is_ws] = '\0';  /* chop trailing brace */
         sectnum = 0;
  #if 1
   piece_pack();
- piece_append("</PLex_Section3>");
+ piece_append("</AST_Section3>");
   piece_pack();
   return ~YY_NULL;
  #else
@@ -1168,6 +1211,51 @@ nmstr[yyleng - 2 - end_is_ws] = '\0';  /* chop trailing brace */
  #endif
      }
  }
+
+ /* Nick extra rules for action groups */
+<ACTION_GROUP,ELEMENT_GROUP>{
+       "{"             ++bracelevel;
+}
+<ACTION_GROUP>{
+       "}"             {
+                           if (--bracelevel == 0) {
+                               yy_pop_state();
+ piece_append("</RegexGroupAction_Text>");
+                               return TOK_ACTION_GROUP;
+                           }
+                       }
+}
+<ELEMENT_GROUP>{
+       "}"             {
+                           if (--bracelevel == 0) {
+                               yy_pop_state();
+ piece_append("</RegexGroupElement_Text>");
+                               return TOK_ELEMENT_GROUP;
+                           }
+                       }
+}
+<ACTION_GROUP,ELEMENT_GROUP>{
+       "'"             yy_push_state(SINGLE_QUOTED);
+       \"              yy_push_state(DOUBLE_QUOTED);
+       "/*"            yy_push_state(COMMENT_DISCARD);
+}
+<SINGLE_QUOTED>{
+       [^\[\]\'\\\n]+
+        \'             yy_pop_state();
+}
+<DOUBLE_QUOTED>{
+       [^\[\]\"\\\n]+
+       \"              yy_pop_state();
+}
+<SINGLE_QUOTED,DOUBLE_QUOTED>{
+        (\\\n)*
+       \\(\\\n)*.
+}
+<ACTION_GROUP,ELEMENT_GROUP,SINGLE_QUOTED,DOUBLE_QUOTED>{
+       {NL}            ++linenum;
+        .
+}
+
  <*>.|\n                        format_synerr( _( "bad character: %s" ), yytext );
  
  %%
@@ -1292,16 +1380,16 @@ int flexscan(void) {
  }
  
  static void markup_action(const char *text) {
- /* append to last token text so it appears inside <PLex_Section2_Rule>..</PLex_Section2_Rule> */
+ /* append to last token text so it appears inside <AST_Section2_Rule>..</AST_Section2_Rule> */
   /* a problem here is that Rule has already been reduced (marked up), */
   /* because we returned a '\n' token when we detected start of action, */
- /* hence we need to move the closing </PLex_Section2_Rule> tag over to our right */
+ /* hence we need to move the closing </AST_Section2_Rule> tag over to our right */
   int i = strlen(piece[--piece0]);
- if (i < 21 || strcmp(piece[piece0] + i - 21, "</PLex_Section2_Rule>") != 0)
+ if (i < 20 || strcmp(piece[piece0] + i - 20, "</AST_Section2_Rule>") != 0)
    abort();
- piece[piece0][i - 21] = 0;
+ piece[piece0][i - 20] = 0;
   piece_append(text);
- piece_append("</PLex_Section2_Rule>");
+ piece_append("</AST_Section2_Rule>");
   piece_pack();
  }
  
@@ -1311,12 +1399,12 @@ static void markup_option(const char *name, int sense) {
   int i = piece1;
   while (--i >= piece0 && strcmp(piece[i], "no") == 0)
    ;
- sprintf(piece_temp, "<PLex_Section1_Options_%s%s>", name, sense ? " value=\"true\"" : "");
+ sprintf(piece_temp, "<AST_Section1_Options_%s%s>", name, sense ? " value=\"true\"" : "");
   piece_insert(i + 1, piece_temp);
   piece_flush(strlen(yytext));
- sprintf(piece_temp, "</PLex_Section1_Options_%s>", name);
+ sprintf(piece_temp, "</AST_Section1_Options_%s>", name);
   piece_append(piece_temp);
- /* append to last token text so it appears inside <PLex_Section1_Options>..</PLex_Section1_Options> */
+ /* append to last token text so it appears inside <AST_Section1_Options>..</AST_Section1_Options> */
   --piece0;
   piece_pack();
  }
author	Nick Downing <nick@ndcode.org>
	Sat, 12 Jan 2019 03:12:16 +0000 (14:12 +1100)
committer	Nick Downing <nick@ndcode.org>
	Sat, 12 Jan 2019 03:12:16 +0000 (14:12 +1100)
ast.py		patch \| blob \| history
c_to_python.py		patch \| blob \| history
element.py		patch \| blob \| history
l_to_python.py		patch \| blob \| history
tests/scan.l		patch \| blob \| history