Move Flex DFA generation into its own module, makes bootstrap_flex.py trivial

author Nick Downing <downing.nick@gmail.com>

Sun, 1 Jul 2018 14:56:41 +0000 (00:56 +1000)

committer Nick Downing <downing.nick@gmail.com>

Sun, 1 Jul 2018 14:56:41 +0000 (00:56 +1000)
author Nick Downing <downing.nick@gmail.com>
Sun, 1 Jul 2018 14:56:41 +0000 (00:56 +1000)
committer Nick Downing <downing.nick@gmail.com>
Sun, 1 Jul 2018 14:56:41 +0000 (00:56 +1000)
diff --git a/bootstrap_plex.py b/bootstrap_plex.py

index 9f4e777..87c8fe6 100755 (executable)
--- a/bootstrap_plex.py
+++ b/bootstrap_plex.py
@@ -2,208 +2,11 @@
  
  import ast
  import element
+import flex_dfa
  import getopt
-#import lex
-import numpy
  import os
-import re
-import regex
  import sys
-import work
  import xml.etree.ElementTree
-#import yacc
-
-class FlexDFA:
-  YY_TRAILING_MASK = 0x2000
-  YY_TRAILING_HEAD_MASK = 0x4000
-
-  def __init__(self, dfa):
-    # we use a modified version of the transition routine, we do not know
-    # how many threads are active, so we just create null threads as they
-    # are referred to (resulting threads have current marks but no history),
-    # each thread is a list in forward order, not a stack in reverse order
-    def transit(transition):
-      nonlocal threads0, threads1, prefix_slop # note: also uses i
-      j = prefix_slop
-      for trans in transition:
-        if len(threads0) < j + trans[1]:
-          threads0.extend([[] for k in range(j + trans[1] - len(threads0))])
-        if trans[0] == regex.DFA.TRANSITION_POP:
-          j += trans[1]
-        elif trans[0] == regex.DFA.TRANSITION_DUP:
-          while j < trans[1]:
-            threads0[:0] = [None] * prefix_slop
-            threads1[:0] = [None] * prefix_slop
-            j += prefix_slop
-            prefix_slop *= 2
-          threads0[j - trans[1]:j] = [
-            list(k)
-            for k in threads0[j:j + trans[1]]
-          ]
-          j -= trans[1]
-        elif trans[0] == regex.DFA.TRANSITION_MARK:
-          for k in range(j, j + trans[1]):
-            threads0[j].append(trans[2])
-        elif trans[0] == regex.DFA.TRANSITION_MOVE:
-          threads1.extend(threads0[j:j + trans[1]])
-          j += trans[1]
-        #elif trans[0] == regex.DFA.TRANSITION_DEL:
-        #  del threads1[-trans[1]:]
-        else:
-          assert False
-      assert j == len(threads0)
-      threads0, threads1 = threads1, threads0
-      del threads1[prefix_slop:]
-
-    threads0 = [None]
-    threads1 = [None]
-    prefix_slop = 1
-
-    # this is basically just a renumbering
-
-    # state numbers in the DFA become base/def numbers in the FlexDFA,
-    # obviously with gaps in the numbering depending on how things fit
-    state_to_flex_base_def = {}
-
-    # action numbers in the DFA become state numbers in the FlexDFA,
-    # with the start-action for each start-condition being copied into
-    # the correctly numbered slot (may cause duplicates), and then all
-    # actions reachable from these being copied to the subsequent slots
-    # (if start-action is reached again, uses the lower numbered copy)
-    flex_state_to_action = [0] + dfa.start_action
-    action_to_flex_state = {-1: 0} # see comment about state -1 below
-    for i in range(len(flex_state_to_action)):
-      action = flex_state_to_action[i]
-      if action not in action_to_flex_state:
-        action_to_flex_state[action] = i
-
-    # last start-condition is really end-of-buffer (EOB), it has only
-    # a dummy rule that accepts the null string and executes EOB action
-    eob_state = len(dfa.start_action)
-
-    # state 0 is the jam state, the EOB state will be added later on
-    self.states = [([], 0, 0)] # accept, base, def
-    self.entries = [(eob_state, 0)] + [(0, 0)] * 0x100 # nxt, chk
-
-    # full_entries[i, j] is transition on character j in state i
-    # in our way of thinking, 0 is don't care and -1 is failure
-    # however, in the flex way these are both 0 (don't care),
-    # the distinction being that failure has no associated action
-    # thus all entries of states are filled, with 0 as a catch-all
-    full_entries = numpy.zeros((0x100, 0x101), numpy.int16)
-    full_entries[0, 0] = eob_state # all states go to EOB on NUL
-    used = numpy.zeros(0x200, numpy.bool)
-    used[:0x101] = True # account for the jam (don't care) state
-
-    while len(self.states) < len(flex_state_to_action):
-      action = flex_state_to_action[len(self.states)]
-      state, transition = dfa.actions[action]
-      #print('state', len(self.states), 'transition', transition)
-
-      del threads0[prefix_slop:]
-      transit(transition)
-      #print(threads0[prefix_slop:])
-      flex_accept = []
-      for k in [j for i in threads0[prefix_slop:] for j in i]:
-        if k & 1:
-          if (
-            len(flex_accept) > 0 and
-            flex_accept[-1] == (k >> 1) | FlexDFA.YY_TRAILING_HEAD_MASK
-          ):
-            # zero length trailing context, accept immediately
-            flex_accept.append(k >> 1)
-          else:
-            # look back to start of trailing context, then accept
-            flex_accept.append((k >> 1) | FlexDFA.YY_HEAD_MASK)
-        else:
-          # mark start of (hopefully safe) trailing context
-          flex_accept.append((k >> 1) | FlexDFA.YY_TRAILING_HEAD_MASK)
-      #print(flex_accept)
-
-      if state in state_to_flex_base_def:
-        flex_base, flex_def = state_to_flex_base[state]
-      else:
-        # extend full_entries array if required
-        if len(self.states) >= full_entries.shape[0]:
-          new_full_entries = numpy.zeros(
-            (full_entries.shape[0] * 2, 0x101),
-            numpy.int16
-          )
-          new_full_entries[:full_entries.shape[0], :] = full_entries
-          full_entries = new_full_entries
-
-        # calculate full entry from dfa.state char-to-action table
-        breaks, actions, _ = dfa.states[state]
-        char0 = 0
-        for i in range(len(breaks)):
-          char1 = breaks[i]
-          next_action = actions[i]
-          if next_action in action_to_flex_state:
-            next_flex_state = action_to_flex_state[next_action]
-          else:
-            next_flex_state = len(flex_state_to_action)
-            action_to_flex_state[next_action] = next_flex_state
-            flex_state_to_action.append(next_action)
-          full_entries[len(self.states), char0:char1] = next_flex_state
-          char0 = char1
-        assert char0 == 0x100
-
-        # remap NUL transition to 0x100 and replace with EOB transition
-        full_entries[len(self.states), 0x100] = \
-          full_entries[len(self.states), 0]
-        full_entries[len(self.states), 0] = eob_state
-        #print(len(self.states), full_entries[len(self.states), :])
-
-        # find most similar/earliest existing state to use as default
-        mask = (
-          full_entries[len(self.states):len(self.states) + 1, :] !=
-          full_entries[:len(self.states), :]
-        )
-        diff = numpy.sum(mask, 1)
-        flex_def = numpy.argmin(diff, 0)
-        if diff[flex_def] == 0: # exactly matching state
-          # can't use the normal similarity mechanism here, because it
-          # will choose a base of 0 (happens to correspond to the jam
-          # state) and this will make flex's inner loop abort early
-          # ... highlights various issues with flex's tables format,
-          # for instance that duplicate states may be unavoidable when
-          # start conditions are used, and that checking the base is
-          # not an ideal way to check if a state has the same transitions
-          # as the jam state, in fact a state can only share the same base
-          # as another state by COINCIDENCE due to the yy_chk[] issue!
-          # ... fix this by merging indistinguishable states (except for
-          # duplicate start conditions, which may have to use this hack)
-          flex_base = self.states[flex_def][1]
-        else:
-          mask = mask[flex_def, :]
-
-          # make sure used array is at least large enough to find a spot
-          while used.shape[0] < len(self.entries) + 0x101:
-            new_used = numpy.zeros((used.shape[0] * 2,), numpy.bool)
-            new_used[:used.shape[0]] = used
-            used = new_used
-
-          # find a suitable spot and store differences from default state
-          flex_base = 0
-          while flex_base < len(self.entries):
-            if not numpy.any(used[flex_base:flex_base + 0x101] & mask):
-              break
-            flex_base += 1
-          used[flex_base:flex_base + 0x101] |= mask
-          if len(self.entries) < flex_base + 0x101:
-            self.entries.extend(
-              [(0xffff, 0xffff)] * (flex_base + 0x101 - len(self.entries))
-            )
-          for i in numpy.nonzero(mask)[0]:
-            assert self.entries[flex_base + i] == (0xffff, 0xffff)
-            self.entries[flex_base + i] = (
-              full_entries[len(self.states), i],
-              len(self.states)
-            )
-
-      self.states.append((flex_accept, flex_base, flex_def))
-    #print(full_entries[:len(self.states), :])
-    #print(flex_state_to_action)
  
  home_dir = os.path.dirname(sys.argv[0])
  try:
@@ -233,210 +36,4 @@ in_file = args[0]
  with open(in_file) as fin:
    plex = element.deserialize(fin, ast.factory)
  plex.post_process()
-
-nfa = plex.to_nfa()
-eob_expr = regex.RegexGroup(children = [regex.RegexEmpty()])
-eob_expr.post_process(len(plex.actions))
-eob_expr.add_to_nfa(nfa)
-dfa = nfa.to_dfa()
-#print(dfa.start_action)
-#print(dfa.actions[2])
-#print(dfa.match_text('1.0 + 5', 0))
- 
-flex_dfa = FlexDFA(dfa) #nfa.to_dfa())
-with open(skel_file, 'r') as fin:
-  with open(out_file, 'w+') as fout:
-    line = fin.readline()
-    while len(line):
-      if line == '/* GENERATE SECTION1 */\n':
-        fout.write(
-          '''/* GENERATE SECTION1 BEGIN */
-{0:s}/* GENERATE SECTION1 END*/
-'''.format(
-            ''.join([element.get_text(i, 0) for i in plex[0].code_blocks])
-          )
-        )
-      elif line == '/* GENERATE STARTCONDDECL */\n':
-        fout.write(
-          '''/* GENERATE STARTCONDDECL BEGIN */
-{0:s}/* GENERATE STARTCONDDECL END*/
-'''.format(
-            ''.join(
-              [
-                '#define {0:s} {1:d}\n'.format(
-                  plex.start_conditions[i].name,
-                  i
-                )
-                for i in range(len(plex.start_conditions))
-              ]
-            )
-          )
-        )
-      elif line == '/* GENERATE TABLES */\n':
-        yy_acclist = []
-        yy_accept = [0]
-        for flex_accept, _, _ in flex_dfa.states:
-          yy_acclist.extend(flex_accept)
-          yy_accept.append(len(yy_acclist))
-        fout.write(
-          '''/* GENERATE TABLES BEGIN */
-#define YY_END_OF_BUFFER {0:d}
-static const flex_int16_t yy_acclist[] = {{{1:s}
-}};
-static const flex_int16_t yy_accept[] = {{{2:s}
-}};
-static const flex_int16_t yy_base[] = {{{3:s}
-}};
-static const flex_int16_t yy_def[] = {{{4:s}
-}};
-static const flex_int16_t yy_nxt[] = {{{5:s}
-}};
-static const flex_int16_t yy_chk[] = {{{6:s}
-}};
-/* GENERATE TABLES END */
-'''.format(
-            len(plex.actions),
-            ','.join(
-              [
-                '\n\t{0:s}'.format(
-                  ', '.join(
-                    [
-                      '{0:5d}'.format(j)
-                      for j in yy_acclist[i:i + 10]
-                    ]
-                  )
-                )
-                for i in range(0, len(yy_acclist), 10)
-              ]
-            ),
-            ','.join(
-              [
-                '\n\t{0:s}'.format(
-                  ', '.join(
-                    [
-                      '{0:5d}'.format(j)
-                      for j in yy_accept[i:i + 10]
-                    ]
-                  )
-                )
-                for i in range(0, len(yy_accept), 10)
-              ]
-            ),
-            ','.join(
-              [
-                '\n\t{0:s}'.format(
-                  ', '.join(
-                    [
-                      '{0:5d}'.format(j)
-                      for _, j, _ in flex_dfa.states[i:i + 10]
-                    ]
-                  )
-                )
-                for i in range(0, len(flex_dfa.states), 10)
-              ]
-            ),
-            ','.join(
-              [
-                '\n\t{0:s}'.format(
-                  ', '.join(
-                    [
-                      '{0:5d}'.format(j)
-                      for _, _, j in flex_dfa.states[i:i + 10]
-                    ]
-                  )
-                )
-                for i in range(0, len(flex_dfa.states), 10)
-              ]
-            ),
-            ','.join(
-              [
-                '\n\t{0:s}'.format(
-                  ', '.join(
-                    [
-                      '{0:5d}'.format(j)
-                      for j, _ in flex_dfa.entries[i:i + 10]
-                    ]
-                  )
-                )
-                for i in range(0, len(flex_dfa.entries), 10)
-              ]
-            ),
-            ','.join(
-              [
-                '\n\t{0:s}'.format(
-                  ', '.join(
-                    [
-                      '{0:5d}'.format(j)
-                      for _, j in flex_dfa.entries[i:i + 10]
-                    ]
-                  )
-                )
-                for i in range(0, len(flex_dfa.entries), 10)
-              ]
-            )
-          )
-        )
-      elif line == '/* GENERATE SECTION2INITIAL */\n':
-        fout.write(
-          '''/* GENERATE SECTION2INITIAL BEGIN */
-{0:s}/* GENERATE SECTION2INITIAL END */
-'''.format(
-            ''.join([element.get_text(i, 0) for i in plex[1].code_blocks])
-          )
-        )
-      elif line == '/* GENERATE SECTION2 */\n':
-        eof_action_to_start_conditions = [
-          [
-            j
-            for j in range(len(plex.start_conditions))
-            if plex.start_conditions[i].eof_action == j
-          ]
-          for i in range(len(plex.eof_actions))
-        ]
-        #print('eof_action_to_start_conditions', eof_action_to_start_conditions)
-        fout.write(
-          '''/* GENERATE SECTION2 BEGIN */
-{0:s}{1:s}/* GENERATE SECTION2 END */
-'''.format(
-            ''.join(
-              [
-                '''case {0:d}:
-YY_RULE_SETUP
-{1:s}  YY_BREAK
-'''.format(
-                  i,
-                  element.get_text(plex.actions[i], 0)
-                )
-                for i in range(len(plex.actions))
-              ]
-            ),
-            ''.join(
-              [
-                '{0:s}{1:s}'.format(
-                  ''.join(
-                    [
-                      '\t\t\tcase YY_STATE_EOF({0:s}):\n'.format(
-                        plex.start_conditions[j].name
-                      )
-                      for j in eof_action_to_start_conditions[i]
-                    ]
-                  ),
-                  element.get_text(plex.eof_actions[i], 0)
-                )
-                for i in range(len(plex.eof_actions))
-                if len(eof_action_to_start_conditions[i]) > 0
-              ]
-            )
-          )
-        )
-      elif line == '/* GENERATE SECTION3 */\n':
-        fout.write(
-          '''/* GENERATE SECTION3 BEGIN */
-{0:s}/*GENERATE SECTION3 END */
-'''.format(
-            '' if len(plex) < 3 else element.get_text(plex[2], 0)
-          )
-        )
-      else:
-        fout.write(line)
-      line = fin.readline()
+flex_dfa.generate(plex, skel_file, out_file)
diff --git a/flex_dfa.py b/flex_dfa.py

new file mode 100644 (file)

index 0000000..37046e8
--- /dev/null
+++ b/flex_dfa.py
@@ -0,0 +1,399 @@
+import element
+import numpy
+import regex
+
+class FlexDFA:
+  YY_TRAILING_MASK = 0x2000
+  YY_TRAILING_HEAD_MASK = 0x4000
+
+  def __init__(self, dfa):
+    # we use a modified version of the transition routine, we do not know
+    # how many threads are active, so we just create null threads as they
+    # are referred to (resulting threads have current marks but no history),
+    # each thread is a list in forward order, not a stack in reverse order
+    def transit(transition):
+      nonlocal threads0, threads1, prefix_slop # note: also uses i
+      j = prefix_slop
+      for trans in transition:
+        if len(threads0) < j + trans[1]:
+          threads0.extend([[] for k in range(j + trans[1] - len(threads0))])
+        if trans[0] == regex.DFA.TRANSITION_POP:
+          j += trans[1]
+        elif trans[0] == regex.DFA.TRANSITION_DUP:
+          while j < trans[1]:
+            threads0[:0] = [None] * prefix_slop
+            threads1[:0] = [None] * prefix_slop
+            j += prefix_slop
+            prefix_slop *= 2
+          threads0[j - trans[1]:j] = [
+            list(k)
+            for k in threads0[j:j + trans[1]]
+          ]
+          j -= trans[1]
+        elif trans[0] == regex.DFA.TRANSITION_MARK:
+          for k in range(j, j + trans[1]):
+            threads0[j].append(trans[2])
+        elif trans[0] == regex.DFA.TRANSITION_MOVE:
+          threads1.extend(threads0[j:j + trans[1]])
+          j += trans[1]
+        #elif trans[0] == regex.DFA.TRANSITION_DEL:
+        #  del threads1[-trans[1]:]
+        else:
+          assert False
+      assert j == len(threads0)
+      threads0, threads1 = threads1, threads0
+      del threads1[prefix_slop:]
+
+    threads0 = [None]
+    threads1 = [None]
+    prefix_slop = 1
+
+    # this is basically just a renumbering
+
+    # state numbers in the DFA become base/def numbers in the FlexDFA,
+    # obviously with gaps in the numbering depending on how things fit
+    state_to_flex_base_def = {}
+
+    # action numbers in the DFA become state numbers in the FlexDFA,
+    # with the start-action for each start-condition being copied into
+    # the correctly numbered slot (may cause duplicates), and then all
+    # actions reachable from these being copied to the subsequent slots
+    # (if start-action is reached again, uses the lower numbered copy)
+    flex_state_to_action = [0] + dfa.start_action
+    action_to_flex_state = {-1: 0} # see comment about state -1 below
+    for i in range(len(flex_state_to_action)):
+      action = flex_state_to_action[i]
+      if action not in action_to_flex_state:
+        action_to_flex_state[action] = i
+
+    # last start-condition is really end-of-buffer (EOB), it has only
+    # a dummy rule that accepts the null string and executes EOB action
+    eob_state = len(dfa.start_action)
+
+    # state 0 is the jam state, the EOB state will be added later on
+    self.states = [([], 0, 0)] # accept, base, def
+    self.entries = [(eob_state, 0)] + [(0, 0)] * 0x100 # nxt, chk
+
+    # full_entries[i, j] is transition on character j in state i
+    # in our way of thinking, 0 is don't care and -1 is failure
+    # however, in the flex way these are both 0 (don't care),
+    # the distinction being that failure has no associated action
+    # thus all entries of states are filled, with 0 as a catch-all
+    full_entries = numpy.zeros((0x100, 0x101), numpy.int16)
+    full_entries[0, 0] = eob_state # all states go to EOB on NUL
+    used = numpy.zeros(0x200, numpy.bool)
+    used[:0x101] = True # account for the jam (don't care) state
+
+    while len(self.states) < len(flex_state_to_action):
+      action = flex_state_to_action[len(self.states)]
+      state, transition = dfa.actions[action]
+      #print('state', len(self.states), 'transition', transition)
+
+      del threads0[prefix_slop:]
+      transit(transition)
+      #print(threads0[prefix_slop:])
+      flex_accept = []
+      for k in [j for i in threads0[prefix_slop:] for j in i]:
+        if k & 1:
+          if (
+            len(flex_accept) > 0 and
+            flex_accept[-1] == (k >> 1) | FlexDFA.YY_TRAILING_HEAD_MASK
+          ):
+            # zero length trailing context, accept immediately
+            flex_accept.append(k >> 1)
+          else:
+            # look back to start of trailing context, then accept
+            flex_accept.append((k >> 1) | FlexDFA.YY_HEAD_MASK)
+        else:
+          # mark start of (hopefully safe) trailing context
+          flex_accept.append((k >> 1) | FlexDFA.YY_TRAILING_HEAD_MASK)
+      #print(flex_accept)
+
+      if state in state_to_flex_base_def:
+        flex_base, flex_def = state_to_flex_base[state]
+      else:
+        # extend full_entries array if required
+        if len(self.states) >= full_entries.shape[0]:
+          new_full_entries = numpy.zeros(
+            (full_entries.shape[0] * 2, 0x101),
+            numpy.int16
+          )
+          new_full_entries[:full_entries.shape[0], :] = full_entries
+          full_entries = new_full_entries
+
+        # calculate full entry from dfa.state char-to-action table
+        breaks, actions, _ = dfa.states[state]
+        char0 = 0
+        for i in range(len(breaks)):
+          char1 = breaks[i]
+          next_action = actions[i]
+          if next_action in action_to_flex_state:
+            next_flex_state = action_to_flex_state[next_action]
+          else:
+            next_flex_state = len(flex_state_to_action)
+            action_to_flex_state[next_action] = next_flex_state
+            flex_state_to_action.append(next_action)
+          full_entries[len(self.states), char0:char1] = next_flex_state
+          char0 = char1
+        assert char0 == 0x100
+
+        # remap NUL transition to 0x100 and replace with EOB transition
+        full_entries[len(self.states), 0x100] = \
+          full_entries[len(self.states), 0]
+        full_entries[len(self.states), 0] = eob_state
+        #print(len(self.states), full_entries[len(self.states), :])
+
+        # find most similar/earliest existing state to use as default
+        mask = (
+          full_entries[len(self.states):len(self.states) + 1, :] !=
+          full_entries[:len(self.states), :]
+        )
+        diff = numpy.sum(mask, 1)
+        flex_def = numpy.argmin(diff, 0)
+        if diff[flex_def] == 0: # exactly matching state
+          # can't use the normal similarity mechanism here, because it
+          # will choose a base of 0 (happens to correspond to the jam
+          # state) and this will make flex's inner loop abort early
+          # ... highlights various issues with flex's tables format,
+          # for instance that duplicate states may be unavoidable when
+          # start conditions are used, and that checking the base is
+          # not an ideal way to check if a state has the same transitions
+          # as the jam state, in fact a state can only share the same base
+          # as another state by COINCIDENCE due to the yy_chk[] issue!
+          # ... fix this by merging indistinguishable states (except for
+          # duplicate start conditions, which may have to use this hack)
+          flex_base = self.states[flex_def][1]
+        else:
+          mask = mask[flex_def, :]
+
+          # make sure used array is at least large enough to find a spot
+          while used.shape[0] < len(self.entries) + 0x101:
+            new_used = numpy.zeros((used.shape[0] * 2,), numpy.bool)
+            new_used[:used.shape[0]] = used
+            used = new_used
+
+          # find a suitable spot and store differences from default state
+          flex_base = 0
+          while flex_base < len(self.entries):
+            if not numpy.any(used[flex_base:flex_base + 0x101] & mask):
+              break
+            flex_base += 1
+          used[flex_base:flex_base + 0x101] |= mask
+          if len(self.entries) < flex_base + 0x101:
+            self.entries.extend(
+              [(0xffff, 0xffff)] * (flex_base + 0x101 - len(self.entries))
+            )
+          for i in numpy.nonzero(mask)[0]:
+            assert self.entries[flex_base + i] == (0xffff, 0xffff)
+            self.entries[flex_base + i] = (
+              full_entries[len(self.states), i],
+              len(self.states)
+            )
+
+      self.states.append((flex_accept, flex_base, flex_def))
+    #print(full_entries[:len(self.states), :])
+    #print(flex_state_to_action)
+
+def generate(plex, skel_file, out_file):
+  nfa = plex.to_nfa()
+  eob_expr = regex.RegexGroup(children = [regex.RegexEmpty()])
+  eob_expr.post_process(len(plex.actions))
+  eob_expr.add_to_nfa(nfa)
+  flex_dfa = FlexDFA(nfa.to_dfa())
+
+  with open(skel_file, 'r') as fin:
+    with open(out_file, 'w+') as fout:
+      line = fin.readline()
+      while len(line):
+        if line == '/* GENERATE SECTION1 */\n':
+          fout.write(
+            '''/* GENERATE SECTION1 BEGIN */
+{0:s}/* GENERATE SECTION1 END*/
+'''.format(
+              ''.join([element.get_text(i, 0) for i in plex[0].code_blocks])
+            )
+          )
+        elif line == '/* GENERATE STARTCONDDECL */\n':
+          fout.write(
+            '''/* GENERATE STARTCONDDECL BEGIN */
+{0:s}/* GENERATE STARTCONDDECL END*/
+'''.format(
+              ''.join(
+                [
+                  '#define {0:s} {1:d}\n'.format(
+                    plex.start_conditions[i].name,
+                    i
+                  )
+                  for i in range(len(plex.start_conditions))
+                ]
+              )
+            )
+          )
+        elif line == '/* GENERATE TABLES */\n':
+          yy_acclist = []
+          yy_accept = [0]
+          for flex_accept, _, _ in flex_dfa.states:
+            yy_acclist.extend(flex_accept)
+            yy_accept.append(len(yy_acclist))
+          fout.write(
+            '''/* GENERATE TABLES BEGIN */
+#define YY_END_OF_BUFFER {0:d}
+static const flex_int16_t yy_acclist[] = {{{1:s}
+}};
+static const flex_int16_t yy_accept[] = {{{2:s}
+}};
+static const flex_int16_t yy_base[] = {{{3:s}
+}};
+static const flex_int16_t yy_def[] = {{{4:s}
+}};
+static const flex_int16_t yy_nxt[] = {{{5:s}
+}};
+static const flex_int16_t yy_chk[] = {{{6:s}
+}};
+/* GENERATE TABLES END */
+'''.format(
+              len(plex.actions),
+              ','.join(
+                [
+                  '\n\t{0:s}'.format(
+                    ', '.join(
+                      [
+                        '{0:5d}'.format(j)
+                        for j in yy_acclist[i:i + 10]
+                      ]
+                    )
+                  )
+                  for i in range(0, len(yy_acclist), 10)
+                ]
+              ),
+              ','.join(
+                [
+                  '\n\t{0:s}'.format(
+                    ', '.join(
+                      [
+                        '{0:5d}'.format(j)
+                        for j in yy_accept[i:i + 10]
+                      ]
+                    )
+                  )
+                  for i in range(0, len(yy_accept), 10)
+                ]
+              ),
+              ','.join(
+                [
+                  '\n\t{0:s}'.format(
+                    ', '.join(
+                      [
+                        '{0:5d}'.format(j)
+                        for _, j, _ in flex_dfa.states[i:i + 10]
+                      ]
+                    )
+                  )
+                  for i in range(0, len(flex_dfa.states), 10)
+                ]
+              ),
+              ','.join(
+                [
+                  '\n\t{0:s}'.format(
+                    ', '.join(
+                      [
+                        '{0:5d}'.format(j)
+                        for _, _, j in flex_dfa.states[i:i + 10]
+                      ]
+                    )
+                  )
+                  for i in range(0, len(flex_dfa.states), 10)
+                ]
+              ),
+              ','.join(
+                [
+                  '\n\t{0:s}'.format(
+                    ', '.join(
+                      [
+                        '{0:5d}'.format(j)
+                        for j, _ in flex_dfa.entries[i:i + 10]
+                      ]
+                    )
+                  )
+                  for i in range(0, len(flex_dfa.entries), 10)
+                ]
+              ),
+              ','.join(
+                [
+                  '\n\t{0:s}'.format(
+                    ', '.join(
+                      [
+                        '{0:5d}'.format(j)
+                        for _, j in flex_dfa.entries[i:i + 10]
+                      ]
+                    )
+                  )
+                  for i in range(0, len(flex_dfa.entries), 10)
+                ]
+              )
+            )
+          )
+        elif line == '/* GENERATE SECTION2INITIAL */\n':
+          fout.write(
+            '''/* GENERATE SECTION2INITIAL BEGIN */
+{0:s}/* GENERATE SECTION2INITIAL END */
+'''.format(
+              ''.join([element.get_text(i, 0) for i in plex[1].code_blocks])
+            )
+          )
+        elif line == '/* GENERATE SECTION2 */\n':
+          eof_action_to_start_conditions = [
+            [
+              j
+              for j in range(len(plex.start_conditions))
+              if plex.start_conditions[i].eof_action == j
+            ]
+            for i in range(len(plex.eof_actions))
+          ]
+          #print('eof_action_to_start_conditions', eof_action_to_start_conditions)
+          fout.write(
+            '''/* GENERATE SECTION2 BEGIN */
+{0:s}{1:s}/* GENERATE SECTION2 END */
+'''.format(
+              ''.join(
+                [
+                  '''case {0:d}:
+YY_RULE_SETUP
+{1:s}  YY_BREAK
+'''.format(
+                    i,
+                    element.get_text(plex.actions[i], 0)
+                  )
+                  for i in range(len(plex.actions))
+                ]
+              ),
+              ''.join(
+                [
+                  '{0:s}{1:s}'.format(
+                    ''.join(
+                      [
+                        '\t\t\tcase YY_STATE_EOF({0:s}):\n'.format(
+                          plex.start_conditions[j].name
+                        )
+                        for j in eof_action_to_start_conditions[i]
+                      ]
+                    ),
+                    element.get_text(plex.eof_actions[i], 0)
+                  )
+                  for i in range(len(plex.eof_actions))
+                  if len(eof_action_to_start_conditions[i]) > 0
+                ]
+              )
+            )
+          )
+        elif line == '/* GENERATE SECTION3 */\n':
+          fout.write(
+            '''/* GENERATE SECTION3 BEGIN */
+{0:s}/*GENERATE SECTION3 END */
+'''.format(
+              '' if len(plex) < 3 else element.get_text(plex[2], 0)
+            )
+          )
+        else:
+          fout.write(line)
+        line = fin.readline()
author	Nick Downing <downing.nick@gmail.com>
	Sun, 1 Jul 2018 14:56:41 +0000 (00:56 +1000)
committer	Nick Downing <downing.nick@gmail.com>
	Sun, 1 Jul 2018 14:56:41 +0000 (00:56 +1000)
bootstrap_plex.py		patch \| blob \| history
flex_dfa.py	[new file with mode: 0644]	patch \| blob