Implement BOL detection in Python skeleton (for ^ operator) and various other functio...

author Nick Downing <nick@ndcode.org>

Mon, 14 Jan 2019 01:24:19 +0000 (12:24 +1100)

committer Nick Downing <nick@ndcode.org>

Mon, 14 Jan 2019 01:24:19 +0000 (12:24 +1100)
author Nick Downing <nick@ndcode.org>
Mon, 14 Jan 2019 01:24:19 +0000 (12:24 +1100)
committer Nick Downing <nick@ndcode.org>
Mon, 14 Jan 2019 01:24:19 +0000 (12:24 +1100)
diff --git a/dfa.py b/dfa.py

index 2ba88f8..7a7597b 100644 (file)
--- a/dfa.py
+++ b/dfa.py
@@ -135,27 +135,30 @@ class DFA:
        accept[n_states] = n_acclist
        accept_set = set()
        for k in [j for i in threads0[prefix_slop:] for j in i]:
-        acc = k >> 1
-        if k & 1:
-          if (acc | flex_dfa.FlexDFA.YY_TRAILING_HEAD_MASK) not in accept_set:
-            # look back to start of trailing context, then accept
-            acc |= flex_dfa.FlexDFA.YY_TRAILING_MASK
-          # otherwise zero length trailing context, accept immediately
-        else:
-          # mark start of (hopefully safe) trailing context
-          acc |= flex_dfa.FlexDFA.YY_TRAILING_HEAD_MASK
-        if acc not in accept_set:
-          if n_acclist >= acclist.shape[0]:
-            # extend acclist
-            new_acclist = numpy.zeros(
-              (acclist.shape[0] * 2,),
-              numpy.uint16
-            )
-            new_acclist[:acclist.shape[0]] = acclist
-            acclist = new_acclist
-          acclist[n_acclist] = acc
-          n_acclist += 1
-          accept_set.add(acc)
+        if k != -1: # ignore user-defined groups
+          acc = k >> 1
+          if k & 1:
+            if (
+              (acc | flex_dfa.FlexDFA.YY_TRAILING_HEAD_MASK) not in accept_set
+            ):
+              # look back to start of trailing context, then accept
+              acc |= flex_dfa.FlexDFA.YY_TRAILING_MASK
+            # otherwise zero length trailing context, accept immediately
+          else:
+            # mark start of (hopefully safe) trailing context
+            acc |= flex_dfa.FlexDFA.YY_TRAILING_HEAD_MASK
+          if acc not in accept_set:
+            if n_acclist >= acclist.shape[0]:
+              # extend acclist
+              new_acclist = numpy.zeros(
+                (acclist.shape[0] * 2,),
+                numpy.uint16
+              )
+              new_acclist[:acclist.shape[0]] = acclist
+              acclist = new_acclist
+            acclist[n_acclist] = acc
+            n_acclist += 1
+            accept_set.add(acc)
  
        # calculate transition row from self.state character-to-action table
        if n_states >= transitions.shape[0]:
diff --git a/element.py b/element.py

index 1f8e84f..2d02217 100644 (file)
--- a/element.py
+++ b/element.py
@@ -1,3 +1,19 @@
+# Copyright (C) 2018 Nick Downing <nick@ndcode.org>
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 51
+# Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+
  import xml.etree.ElementTree
  
  class Element(xml.etree.ElementTree._Element_Py):
@@ -144,14 +160,20 @@ def set_text(root, i, text):
    else:
      root[i - 1].tail = text
  
-def to_end_relative(root, pos, off):
-  assert pos >= 0 and off >= 0
-  off -= len(get_text(root, pos))
-  pos -= len(root) + 1
-  return pos, off
-
-def to_start_relative(root, pos, off):
-  assert pos < 0 and off <= 0
-  pos += len(root) + 1
-  off += len(get_text(root, pos))
-  return pos, off
+def to_text(root):
+  return ''.join(
+    [
+      j
+      for i in range(len(root))
+      for j in [get_text(root, i), to_text(root[i])]
+    ] +
+    [get_text(root, len(root))]
+  )
+
+def concatenate(children, factory = Element, *args, **kwargs):
+  root = factory(*args, **kwargs)
+  for child in children:
+    i = len(root)
+    set_text(root, i, get_text(root, i) + get_text(child, 0))
+    root[i:] = child[:]
+  return root
diff --git a/generate_flex.py b/generate_flex.py

index a9eb4fb..6a95781 100644 (file)
--- a/generate_flex.py
+++ b/generate_flex.py
@@ -2,12 +2,28 @@ import os
  import regex
  
  def generate_flex(_ast, _element, home_dir, skel_file, out_file):
-  _nfa = _ast.to_nfa()
+  # generate group_ref_data which emulates the old way where
+  # start = even, end = odd, remaining bits = flex rule index,
+  # ignoring user-defined groups by putting start = end = -1:
+  group_ref_data = []
+  for i in range(len(_ast.flex_rules)):
+    group_ref_data.extend(
+      [(-1, -1) for j in range(len(_ast.flex_rules[i].groups0))] +
+      [(i * 2, i * 2 + 1)] +
+      [(-1, -1) for j in range(len(_ast.flex_rules[i].groups1))]
+    )
+
+  _nfa = _ast.to_nfa(group_ref_data)
  
-  # end of buffer expression (do it here because only necessary for flex)
-  _regex = regex.RegexGroup(children = [regex.RegexEmpty()])
-  _regex.post_process(len(_ast.actions_text))
-  _regex.add_to_nfa(_nfa)
+  # end of buffer expression (do here because only necessary for flex)
+  eob_regex = regex.RegexGroup(children = [regex.RegexEmpty()])
+  eob_groups = []
+  eob_regex.post_process(eob_groups, caseless = _ast[0].caseless)
+  assert len(eob_groups) == 1
+  eob_regex.add_to_nfa(
+    _nfa,
+    [(len(_ast.flex_rules) * 2, len(_ast.flex_rules) * 2 + 1)]
+  )
  
    _flex_dfa = _nfa.to_dfa().to_flex_dfa()
  
diff --git a/skel/skel_py.py b/skel/skel_py.py

index 38e2951..59d0c16 100644 (file)
--- a/skel/skel_py.py
+++ b/skel/skel_py.py
@@ -25,15 +25,17 @@ class YYBufferBlock(YYBufferList):
      self.text = text
  
  class YYBufferState(YYBufferList):
-  def __init__(self, next = None, file_in = None):
+  def __init__(self, next = None, file_in = None, at_bol = True):
      YYBufferList.__init__(self, next)
      self.file_in = file_in
+    self.at_bol = at_bol
  
  yyin = sys.stdin
  yyout = sys.stdout
-yy_buffer_stack = [YYBufferState(None, None)]
+yy_buffer_stack = [YYBufferState()]
  
  yystart = INITIAL
+yystart_stack = []
  yy_threads0 = [None]
  yy_threads1 = [None]
  yy_prefix_slop = 1
@@ -46,6 +48,8 @@ yy_action = None
  yytext = ''
  yytext_len = 0
  
+YY_NULL = 0
+
  def REJECT():
    raise YYReject()
  
@@ -75,15 +79,45 @@ def unput(text):
      yytext_len -= i
    yy_buffer_stack[-1].next = YYBufferBlock(yy_buffer_stack[-1].next, 0, text)
  
+def ECHO():
+  yyout.write(yytext)
+
  def yy_rule_start():
    global yytext, yytext_len
    yytext = yy_group_text[:yy_group_stack[-1]]
    yytext_len = yy_group_stack[-1]
    del yy_group_stack[-2:]
+  # note that this should also be done after yyless() and REJECT(),
+  # and state should be saved in case they result in a null string,
+  # however, it doesn't seem to be in flex, maintain compatibility:
+  if len(yytext):
+    yy_buffer_stack[-1].at_bol = yytext[-1] == '\n'
  
  def yy_group_end():
    pass
  
+def BEGIN(start):
+  global yystart
+  yystart = start
+
+def YY_START():
+  return yystart
+
+def yy_push_state(start):
+  global yystart
+  yystart_stack.append(yystart)
+  yystart = start
+
+def yy_pop_state():
+  global yystart
+  yystart = yystart_stack.pop()
+
+def YY_AT_BOL():
+  return yy_buffer_stack[-1].at_bol
+
+def yy_set_bol(at_bol):
+  yy_buffer_stack[-1].at_bol = at_bol
+
  # GENERATE SECTION2
  
  def yylex():
@@ -127,7 +161,9 @@ def yylex():
      if block is not None:
        block_pos = block.pos
  
-    action = yy_dfa_start_action[yystart]
+    action = yy_dfa_start_action[
+      yystart * 2 + int(yy_buffer_stack[-1].at_bol)
+    ]
      while action != -1:
        state, transition = yy_dfa_actions[action]
        #print('i', i, 'action', action, 'state', state, 'transition', transition)
diff --git a/skel/skel_py_element.py b/skel/skel_py_element.py

index f1d0faf..d65d933 100644 (file)
--- a/skel/skel_py_element.py
+++ b/skel/skel_py_element.py
@@ -26,15 +26,17 @@ class YYBufferBlock(YYBufferList):
      self.text = text
  
  class YYBufferState(YYBufferList):
-  def __init__(self, next = None, file_in = None):
+  def __init__(self, next = None, file_in = None, at_bol = True):
      YYBufferList.__init__(self, next)
      self.file_in = file_in
+    self.at_bol = at_bol
  
  yyin = sys.stdin
  yyout = sys.stdout
-yy_buffer_stack = [YYBufferState(None, None)]
+yy_buffer_stack = [YYBufferState()]
  
  yystart = INITIAL
+yystart_stack = []
  yy_threads0 = [None]
  yy_threads1 = [None]
  yy_prefix_slop = 1
@@ -51,6 +53,8 @@ yy_element_stack = None
  yy_element_token = None
  yy_element_space = None
  
+YY_NULL = 0
+
  def REJECT():
    raise YYReject()
  
@@ -80,11 +84,19 @@ def unput(text):
      yytext_len -= i
    yy_buffer_stack[-1].next = YYBufferBlock(yy_buffer_stack[-1].next, 0, text)
  
+def ECHO():
+  yyout.write(yytext)
+
  def yy_rule_start():
    global yytext, yytext_len, yy_element_stack
    yytext = yy_group_text[:yy_group_stack[-1]]
    yytext_len = yy_group_stack[-1]
    del yy_group_stack[-2:]
+  # note that this should also be done after yyless() and REJECT(),
+  # and state should be saved in case they result in a null string,
+  # however, it doesn't seem to be in flex, maintain compatibility:
+  if len(yytext):
+    yy_buffer_stack[-1].at_bol = yytext[-1] == '\n'
    yy_element_stack.append([])
  
  def yy_group_end():
@@ -102,6 +114,28 @@ def yy_group_element(pos0, pos1, stack, factory, *args, **kwargs):
    element.set_text(_element, len(_element), yy_group_text[pos0:pos1])
    return _element
  
+def BEGIN(start):
+  global yystart
+  yystart = start
+
+def YY_START():
+  return yystart
+
+def yy_push_state(start):
+  global yystart
+  yystart_stack.append(yystart)
+  yystart = start
+
+def yy_pop_state():
+  global yystart
+  yystart = yystart_stack.pop()
+
+def YY_AT_BOL():
+  return yy_buffer_stack[-1].at_bol
+
+def yy_set_bol(at_bol):
+  yy_buffer_stack[-1].at_bol = at_bol
+
  # GENERATE SECTION2
  
  def yylex():
@@ -150,7 +184,9 @@ def yylex():
      if block is not None:
        block_pos = block.pos
  
-    action = yy_dfa_start_action[yystart]
+    action = yy_dfa_start_action[
+      yystart * 2 + int(yy_buffer_stack[-1].at_bol)
+    ]
      while action != -1:
        state, transition = yy_dfa_actions[action]
        #print('i', i, 'action', action, 'state', state, 'transition', transition)
diff --git a/tests/Makefile b/tests/Makefile

index 188ae0a..4928fc0 100644 (file)
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -1,3 +1,5 @@
+CFLAGS += -g
+
  all: lex_yy.py cal flex0 flex1
  
  # Python scanner test
@@ -7,7 +9,7 @@ lex_yy.py: cal_py.l
  
  # cal program
  cal: y.tab.o
-       ${CC} -o $@ $<
+       ${CC} ${CFLAGS} -o $@ $<
  
  y.tab.o: y.tab.c lex.yy.c
  
@@ -23,7 +25,7 @@ lex.yy.c: cal.l
  
  # flex0 program
  flex0: flex0.o
-       gcc -o $@ $< -ll
+       ${CC} ${CFLAGS} -o $@ $< -ll
  
  flex0.o: flex0.c
  
@@ -33,7 +35,7 @@ flex0.c: flex0.l
  
  # flex1 program
  flex1: flex1.o
-       gcc -o $@ $< -ll
+       ${CC} ${CFLAGS} -o $@ $< -ll
  
  flex1.o: flex1.c
  
diff --git a/tests_ast/element.py b/tests_ast/element.py

index 48e18da..2d02217 100644 (file)
--- a/tests_ast/element.py
+++ b/tests_ast/element.py
@@ -1,3 +1,19 @@
+# Copyright (C) 2018 Nick Downing <nick@ndcode.org>
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 51
+# Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+
  import xml.etree.ElementTree
  
  class Element(xml.etree.ElementTree._Element_Py):
@@ -144,6 +160,16 @@ def set_text(root, i, text):
    else:
      root[i - 1].tail = text
  
+def to_text(root):
+  return ''.join(
+    [
+      j
+      for i in range(len(root))
+      for j in [get_text(root, i), to_text(root[i])]
+    ] +
+    [get_text(root, len(root))]
+  )
+
  def concatenate(children, factory = Element, *args, **kwargs):
    root = factory(*args, **kwargs)
    for child in children:
author	Nick Downing <nick@ndcode.org>
	Mon, 14 Jan 2019 01:24:19 +0000 (12:24 +1100)
committer	Nick Downing <nick@ndcode.org>
	Mon, 14 Jan 2019 01:24:19 +0000 (12:24 +1100)
dfa.py		patch \| blob \| history
element.py		patch \| blob \| history
generate_flex.py		patch \| blob \| history
skel/skel_py.py		patch \| blob \| history
skel/skel_py_element.py		patch \| blob \| history
tests/Makefile		patch \| blob \| history
tests_ast/element.py		patch \| blob \| history