__pycache__
-/*.xml
/bootstrap/*.xml
/bootstrap/lex_yy.py
/bootstrap/out
/lex-yacc-examples/example7
/lex_yy.py
/out
+/regex.py
/skel/skel_flex.c.orig
/t_def.py
/tests/*.c
/tests/flex0
/tests/flex1
/tests/lex_yy.py
-/tests_ast/*.xml
/tests_ast/lex_yy.py
+/tests_ast/t_def.py
/y_tab.py
-all: lex_yy.py t_def.py y_tab.py
+all: lex_yy.py regex.py t_def.py y_tab.py
lex_yy.py: scan.l
bootstrap_pilex/pilex.py --element --python $<
+regex.py: regex.t
+ bootstrap_pitree/pitree.py --python -o $@ $<
+
t_def.py: pilex.t
bootstrap_pitree/pitree.py --python $<
bootstrap_piyacc/piyacc.py --element --python $<
clean:
- rm -f lex_yy.py t_def.py y_tab.py
+ rm -f lex_yy.py regex.py t_def.py y_tab.py
+++ /dev/null
-#!/usr/bin/env python3
-
-# Copyright (C) 2019 Nick Downing <nick@ndcode.org>
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the GNU General Public License as published by the Free Software
-# Foundation; version 2.
-#
-# This program is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
-# details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program; if not, write to the Free Software Foundation, Inc., 51
-# Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
-
-import re
-import sys
-
-re_begin = re.compile(
- '([\t ]*# GENERATE .*) BEGIN'
-)
-re_end = re.compile(
- '([\t ]*# GENERATE END)'
-)
-
-line = sys.stdin.readline()
-while len(line):
- match = re_begin.match(line)
- if match is not None:
- sys.stdout.write(match.group(1))
- line = sys.stdin.readline()
- while len(line):
- match = re_end.match(line)
- if match is not None:
- sys.stdout.write(line[len(match.group(1)):])
- break
- line = sys.stdin.readline()
- else:
- sys.stdout.write(line)
- line = sys.stdin.readline()
+++ /dev/null
-#!/usr/bin/env python3
-
-# Copyright (C) 2019 Nick Downing <nick@ndcode.org>
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the GNU General Public License as published by the Free Software
-# Foundation; version 2.
-#
-# This program is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
-# details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program; if not, write to the Free Software Foundation, Inc., 51
-# Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
-
-import re
-import sys
-
-if len(sys.argv) >= 2:
- package_name = '{0:s}.'.format(sys.argv[1])
-else:
- package_name = ''
-
-default_value = {
- 'bool': 'False',
- 'int': '-1',
- 'ref': 'None',
- 'str': '\'\'',
- 'list(bool)': '[]',
- 'list(int)': '[]',
- 'list(ref)': '[]',
- 'list(str)': '[]',
- 'set(bool)': 'set()',
- 'set(int)': 'set()',
- 'set(ref)': 'set()',
- 'set(str)': 'set()'
-}
-default_value_str = {
- 'bool': 'false',
- 'int': '-1',
- 'ref': '-1',
- 'str': ''
-}
-
-re_class = re.compile(
- '([\t ]*)class ([A-Za-z_][A-Za-z0-9_]*)\(([A-Za-z_][A-Za-z0-9_.]*)'
-)
-re_element = re.compile(
- '([\t ]*)# GENERATE ELEMENT\((([^()]|\([^()]*\))*)\)( BEGIN)?'
-)
-re_factory = re.compile(
- '([\t ]*)# GENERATE FACTORY\(([^()]*)\)( BEGIN)?'
-)
-stack = []
-classes = []
-base_classes = [{'element.Element': []}] # params
-
-line = sys.stdin.readline()
-while len(line):
- match = re_class.match(line)
- if match is not None:
- sys.stdout.write(line)
- indent = match.group(1)
- class_name = match.group(2)
- base_class = match.group(3)
- while len(stack) and stack[-1][0][:len(indent)] == indent:
- _, temp_class_name, _, _ = stack.pop()
- for temp_base_class, temp_fields in base_classes.pop().items():
- base_classes[-1][
- '{0:s}.{1:s}'.format(temp_class_name, temp_base_class)
- ] = temp_fields
- for i in range(len(base_classes) - 1, -1, -1):
- if base_class in base_classes[i]:
- classes.append(
- '.'.join([j for _, j, _, _ in stack] + [class_name])
- )
- full_base_class = '.'.join(
- [j for _, j, _, _ in stack[:i]] + [base_class]
- )
- base_classes[-1][class_name] = list(base_classes[i][base_class])
- break
- else:
- full_base_class = base_class
- stack.append((indent, class_name, base_class, full_base_class))
- base_classes.append({})
- else:
- match = re_element.match(line)
- if match is not None:
- indent = match.group(1)
- params = match.group(2)
- begin = match.group(4)
-
- while len(stack) and stack[-1][0][:len(indent)] == indent:
- _, temp_class_name, _, _ = stack.pop()
- for temp_base_class, temp_fields in base_classes.pop().items():
- base_classes[-1][
- '{0:s}.{1:s}'.format(temp_class_name, temp_base_class)
- ] = temp_fields
- _, class_name, base_class, full_base_class = stack[-1]
-
- fields = params.split(',')
- if fields[-1] == '':
- del fields[-1:]
- fields = [i.split() for i in fields]
- fields = [(type, name) for [type, name] in fields]
- i = len(base_classes[-2][class_name])
- base_classes[-2][class_name].extend(fields)
-
- sys.stdout.write(
- '''{0:s}# GENERATE ELEMENT({1:s}) BEGIN
-{2:s}def __init__(
-{3:s} self,
-{4:s} tag = '{5:s}',
-{6:s} attrib = {{}},
-{7:s} text = '',
-{8:s} children = []{9:s}
-{10:s}):
-{11:s} {12:s}.__init__(
-{13:s} self,
-{14:s} tag,
-{15:s} attrib,
-{16:s} text,
-{17:s} children{18:s}
-{19:s} )
-{20:s}'''.format(
- indent,
- params,
- indent,
- indent,
- indent,
- '_'.join([i for _, i, _, _ in stack]),
- indent,
- indent,
- indent,
- ''.join(
- [
- ',\n{0:s} {1:s} = {2:s}'.format(
- indent,
- name,
- default_value[type]
- )
- for type, name in base_classes[-2][class_name]
- ]
- ),
- indent,
- indent,
- full_base_class,
- indent,
- indent,
- indent,
- indent,
- indent,
- ''.join(
- [
- ',\n{0:s} {1:s}'.format(
- indent,
- name
- )
- for _, name in base_classes[-2][class_name][:i]
- ]
- ),
- indent,
- ''.join(
- [
- '{0:s} self.{1:s} = {2:s}\n'.format(
- indent,
- name,
- name
- )
- for _, name in fields
- ]
- )
- )
- )
- if len(fields):
- sys.stdout.write(
- '''{0:s}def serialize(self, ref_list):
-{1:s} {2:s}.serialize(self, ref_list)
-'''.format(
- indent,
- indent,
- full_base_class
- )
- )
- for type, name in fields:
- if type[:5] == 'list(' and type[-1:] == ')':
- subtype = type[5:-1]
- sys.stdout.write(
- '''{0:s} self.set(
-{1:s} '{2:s}',
-{3:s} ' '.join(
-{4:s} [
-{5:s} element.serialize_{6:s}(i{7:s})
-{8:s} for i in self.{9:s}
-{10:s} ]
-{11:s} )
-{12:s} )
-'''.format(
- indent,
- indent,
- name,
- indent,
- indent,
- indent,
- subtype,
- ', ref_list' if subtype == 'ref' else '',
- indent,
- name,
- indent,
- indent,
- indent
- )
- )
- elif type[:4] == 'set(' and type[-1:] == ')':
- subtype = type[4:-1]
- sys.stdout.write(
- '''{0:s} self.set(
-{1:s} '{2:s}',
-{3:s} ' '.join(
-{4:s} [
-{5:s} element.serialize_{6:s}(i{7:s})
-{8:s} for i in sorted(self.{9:s})
-{10:s} ]
-{11:s} )
-{12:s} )
-'''.format(
- indent,
- indent,
- name,
- indent,
- indent,
- indent,
- subtype,
- ', ref_list' if subtype == 'ref' else '',
- indent,
- name,
- indent,
- indent,
- indent
- )
- )
- else:
- sys.stdout.write(
- '''{0:s} self.set(
-{1:s} '{2:s}',
-{3:s} element.serialize_{4:s}(self.{5:s}{6:s})
-{7:s} )
-'''.format(
- indent,
- indent,
- name,
- indent,
- type,
- name,
- ', ref_list' if type == 'ref' else '',
- indent
- )
- )
- sys.stdout.write(
- '''{0:s}def deserialize(self, ref_list):
-{1:s} {2:s}.deserialize(self, ref_list)
-'''.format(
- indent,
- indent,
- full_base_class
- )
- )
- for type, name in fields:
- if type[:5] == 'list(' and type[-1:] == ')':
- subtype = type[5:-1]
- sys.stdout.write(
- '''{0:s} self.{1:s} = [
-{2:s} element.deserialize_{3:s}(i{4:s})
-{5:s} for i in self.get('{6:s}', '').split()
-{7:s} ]
-'''.format(
- indent,
- name,
- indent,
- subtype,
- ', ref_list' if subtype == 'ref' else '',
- indent,
- name,
- indent
- )
- )
- elif type[:4] == 'set(' and type[-1:] == ')':
- subtype = type[4:-1]
- sys.stdout.write(
- '''{0:s} self.{1:s} = set(
-{2:s} [
-{3:s} element.deserialize_{4:s}(i{5:s})
-{6:s} for i in self.get('{7:s}', '').split()
-{8:s} ]
-{9:s} )
-'''.format(
- indent,
- name,
- indent,
- indent,
- subtype,
- ', ref_list' if subtype == 'ref' else '',
- indent,
- name,
- indent,
- indent
- )
- )
- else:
- sys.stdout.write(
- '''{0:s} self.{1:s} = element.deserialize_{2:s}(self.get('{3:s}', '{4:s}'){5:s})
-'''.format(
- indent,
- name,
- type,
- name,
- default_value_str[type],
- ', ref_list' if type == 'ref' else ''
- )
- )
- sys.stdout.write(
- '''{0:s}def copy(self, factory = None):
-{1:s} result = {2:s}.copy(
-{3:s} self,
-{4:s} {5:s} if factory is None else factory
-{6:s} ){7:s}
-{8:s} return result
-'''.format(
- indent,
- indent,
- full_base_class,
- indent,
- indent,
- class_name,
- indent,
- ''.join(
- [
- '\n{0:s} result.{1:s} = self.{2:s}'.format(
- indent,
- name,
- name
- )
- for _, name in fields
- ]
- ),
- indent
- )
- )
- sys.stdout.write(
- '''{0:s}# GENERATE END
-'''.format(
- indent
- )
- )
- if begin is not None:
- line = sys.stdin.readline()
- while len(line):
- if line.strip() == '# GENERATE END':
- break
- line = sys.stdin.readline()
- else:
- assert False
- else:
- match = re_factory.match(line)
- if match is not None:
- indent = match.group(1)
- param = match.group(2)
- begin = match.group(3)
-
- sys.stdout.write(
- '''{0:s}# GENERATE FACTORY({1:s}) BEGIN
-{2:s}tag_to_class = {{{3:s}
-{4:s}}}
-{5:s}def factory(tag, attrib = {{}}, *args, **kwargs):
-{6:s} return tag_to_class.get(tag, {7:s})(tag, attrib, *args, **kwargs)
-{8:s}# GENERATE END
-'''.format(
- indent,
- param,
- indent,
- ','.join(
- [
- '\n{0:s} \'{1:s}\': {2:s}'.format(
- indent,
- i.replace('.', '_'),
- i
- )
- for i in classes
- ]
- ),
- indent,
- indent,
- indent,
- param,
- indent
- )
- )
-
- if begin is not None:
- line = sys.stdin.readline()
- while len(line):
- if line.strip() == '# GENERATE END':
- break
- line = sys.stdin.readline()
- else:
- assert False
- else:
- sys.stdout.write(line)
- line = sys.stdin.readline()
+++ /dev/null
-# Copyright (C) 2019 Nick Downing <nick@ndcode.org>
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the GNU General Public License as published by the Free Software
-# Foundation; version 2.
-#
-# This program is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
-# details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program; if not, write to the Free Software Foundation, Inc., 51
-# Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
-
-import bisect_set
-import element
-import nfa
-
-# defines the alphabet size, set this to 0x11000 for unicode
-n_characters = 0x100
-
-class Regex(element.Element):
- # GENERATE ELEMENT(int n_groups) BEGIN
- def __init__(
- self,
- tag = 'Regex',
- attrib = {},
- text = '',
- children = [],
- n_groups = -1
- ):
- element.Element.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- self.n_groups = (
- element.deserialize_int(n_groups)
- if isinstance(n_groups, str) else
- n_groups
- )
- def serialize(self, ref_list):
- element.Element.serialize(self, ref_list)
- self.set('n_groups', element.serialize_int(self.n_groups))
- def deserialize(self, ref_list):
- element.Element.deserialize(self, ref_list)
- self.n_groups = element.deserialize_int(self.get('n_groups', '-1'))
- def copy(self, factory = None):
- result = element.Element.copy(
- self,
- Regex if factory is None else factory
- )
- result.n_groups = self.n_groups
- return result
- def repr_serialize(self, params):
- element.Element.repr_serialize(self, params)
- if self.n_groups != -1:
- params.append(
- 'n_groups = {0:s}'.format(repr(self.n_groups))
- )
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.Regex({0:s})'.format(', '.join(params))
- # GENERATE END
- def post_process(self, groups, caseless = False):
- self.n_groups = 0
- for i in self:
- i.post_process(groups, caseless)
- self.n_groups += i.n_groups
- def to_nfa_state(self, _nfa, group_ref_data, group_index, next_state):
- raise NotImplementedError
- def add_to_nfa(self, _nfa, group_ref_data):
- _nfa.start_state.append(self.to_nfa_state(_nfa, group_ref_data, 0, 0))
-
-class RegexNone(Regex):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'RegexNone',
- attrib = {},
- text = '',
- children = [],
- n_groups = -1
- ):
- Regex.__init__(
- self,
- tag,
- attrib,
- text,
- children,
- n_groups
- )
- def copy(self, factory = None):
- result = Regex.copy(
- self,
- RegexNone if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexNone({0:s})'.format(', '.join(params))
- # GENERATE END
- def to_nfa_state(self, _nfa, group_ref_data, group_index, next_state):
- return -1
-
-class RegexEmpty(Regex):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'RegexEmpty',
- attrib = {},
- text = '',
- children = [],
- n_groups = -1
- ):
- Regex.__init__(
- self,
- tag,
- attrib,
- text,
- children,
- n_groups
- )
- def copy(self, factory = None):
- result = Regex.copy(
- self,
- RegexEmpty if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexEmpty({0:s})'.format(', '.join(params))
- # GENERATE END
- def to_nfa_state(self, _nfa, group_ref_data, group_index, next_state):
- return next_state
-
-class RegexCharacter(Regex):
- # GENERATE ELEMENT(list(int) character_set) BEGIN
- def __init__(
- self,
- tag = 'RegexCharacter',
- attrib = {},
- text = '',
- children = [],
- n_groups = -1,
- character_set = []
- ):
- Regex.__init__(
- self,
- tag,
- attrib,
- text,
- children,
- n_groups
- )
- self.character_set = (
- [element.deserialize_int(i) for i in character_set.split()]
- if isinstance(character_set, str) else
- character_set
- )
- def serialize(self, ref_list):
- Regex.serialize(self, ref_list)
- self.set(
- 'character_set',
- ' '.join([element.serialize_int(i) for i in self.character_set])
- )
- def deserialize(self, ref_list):
- Regex.deserialize(self, ref_list)
- self.character_set = [
- element.deserialize_int(i)
- for i in self.get('character_set', '').split()
- ]
- def copy(self, factory = None):
- result = Regex.copy(
- self,
- RegexCharacter if factory is None else factory
- )
- result.character_set = self.character_set
- return result
- def repr_serialize(self, params):
- Regex.repr_serialize(self, params)
- if len(self.character_set):
- params.append(
- 'character_set = [{0:s}]'.format(
- ', '.join([repr(i) for i in self.character_set])
- )
- )
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexCharacter({0:s})'.format(', '.join(params))
- # GENERATE END
- def post_process(self, groups, caseless = False):
- Regex.post_process(self, groups, caseless)
- if caseless:
- temp = bisect_set.bisect_set_and(
- self.character_set,
- [0x41, 0x5b, 0x61, 0x7b]
- )
- self.character_set = bisect_set.bisect_set_or(
- self.character_set,
- [i ^ 0x20 for i in temp if i >= 0x60] +
- [i ^ 0x20 for i in temp if i < 0x60]
- )
- def to_nfa_state(self, _nfa, group_ref_data, group_index, next_state):
- new_state = len(_nfa.states)
- _nfa.states.append(
- (nfa.NFA.STATE_CHARACTER, self.character_set, next_state)
- )
- return new_state
-
-class RegexCharacterLiteral(RegexCharacter):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'RegexCharacterLiteral',
- attrib = {},
- text = '',
- children = [],
- n_groups = -1,
- character_set = []
- ):
- RegexCharacter.__init__(
- self,
- tag,
- attrib,
- text,
- children,
- n_groups,
- character_set
- )
- def copy(self, factory = None):
- result = RegexCharacter.copy(
- self,
- RegexCharacterLiteral if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexCharacterLiteral({0:s})'.format(', '.join(params))
- # GENERATE END
- def post_process(self, groups, caseless = False):
- RegexCharacter.post_process(self, groups, False)
- if caseless:
- temp = bisect_set.bisect_set_and(
- self.character_set,
- [0x41, 0x5b, 0x61, 0x7b]
- )
- self.character_set = bisect_set.bisect_set_or(
- self.character_set,
- [i ^ 0x20 for i in temp if i >= 0x60] +
- [i ^ 0x20 for i in temp if i < 0x60]
- )
-
-class RegexCharacterRange(RegexCharacter):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'RegexCharacterRange',
- attrib = {},
- text = '',
- children = [],
- n_groups = -1,
- character_set = []
- ):
- RegexCharacter.__init__(
- self,
- tag,
- attrib,
- text,
- children,
- n_groups,
- character_set
- )
- def copy(self, factory = None):
- result = RegexCharacter.copy(
- self,
- RegexCharacterRange if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexCharacterRange({0:s})'.format(', '.join(params))
- # GENERATE END
- def post_process(self, groups, caseless = False):
- RegexCharacter.post_process(self, groups, False)
- self.character_set = [self[0].character_set[0], self[1].character_set[-1]]
- if caseless:
- temp = bisect_set.bisect_set_and(
- self.character_set,
- [0x41, 0x5b, 0x61, 0x7b]
- )
- self.character_set = bisect_set.bisect_set_or(
- self.character_set,
- [i ^ 0x20 for i in temp if i >= 0x60] +
- [i ^ 0x20 for i in temp if i < 0x60]
- )
-
-class RegexCharacterOr(RegexCharacter):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'RegexCharacterOr',
- attrib = {},
- text = '',
- children = [],
- n_groups = -1,
- character_set = []
- ):
- RegexCharacter.__init__(
- self,
- tag,
- attrib,
- text,
- children,
- n_groups,
- character_set
- )
- def copy(self, factory = None):
- result = RegexCharacter.copy(
- self,
- RegexCharacterOr if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexCharacterOr({0:s})'.format(', '.join(params))
- # GENERATE END
- def post_process(self, groups, caseless = False):
- RegexCharacter.post_process(self, groups, caseless)
- self.character_set = bisect_set.bisect_set_or(
- self[0].character_set,
- self[1].character_set
- )
-
-class RegexCharacterAnd(RegexCharacter):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'RegexCharacterAnd',
- attrib = {},
- text = '',
- children = [],
- n_groups = -1,
- character_set = []
- ):
- RegexCharacter.__init__(
- self,
- tag,
- attrib,
- text,
- children,
- n_groups,
- character_set
- )
- def copy(self, factory = None):
- result = RegexCharacter.copy(
- self,
- RegexCharacterAnd if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexCharacterAnd({0:s})'.format(', '.join(params))
- # GENERATE END
- def post_process(self, groups, caseless = False):
- RegexCharacter.post_process(self, groups, caseless)
- self.character_set = bisect_set.bisect_set_and(
- self[0].character_set,
- self[1].character_set
- )
-
-class RegexCharacterNot(RegexCharacter):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'RegexCharacterNot',
- attrib = {},
- text = '',
- children = [],
- n_groups = -1,
- character_set = []
- ):
- RegexCharacter.__init__(
- self,
- tag,
- attrib,
- text,
- children,
- n_groups,
- character_set
- )
- def copy(self, factory = None):
- result = RegexCharacter.copy(
- self,
- RegexCharacterNot if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexCharacterNot({0:s})'.format(', '.join(params))
- # GENERATE END
- def post_process(self, groups, caseless = False):
- RegexCharacter.post_process(self, groups, caseless)
- self.character_set = bisect_set.bisect_set_not(self[0].character_set)
-
-class RegexOr(Regex):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'RegexOr',
- attrib = {},
- text = '',
- children = [],
- n_groups = -1
- ):
- Regex.__init__(
- self,
- tag,
- attrib,
- text,
- children,
- n_groups
- )
- def copy(self, factory = None):
- result = Regex.copy(
- self,
- RegexOr if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexOr({0:s})'.format(', '.join(params))
- # GENERATE END
- def to_nfa_state(self, _nfa, group_ref_data, group_index, next_state):
- child0_state = self[0].to_nfa_state(
- _nfa,
- group_ref_data,
- group_index,
- next_state
- )
- child1_state = self[1].to_nfa_state(
- _nfa,
- group_ref_data,
- group_index + self[0].n_groups,
- next_state
- )
- if child0_state == -1:
- return child1_state
- if child1_state == -1:
- return child0_state
- new_state = len(_nfa.states)
- _nfa.states.append((nfa.NFA.STATE_OR, child0_state, child1_state))
- return new_state
-
-class RegexAnd(Regex):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'RegexAnd',
- attrib = {},
- text = '',
- children = [],
- n_groups = -1
- ):
- Regex.__init__(
- self,
- tag,
- attrib,
- text,
- children,
- n_groups
- )
- def copy(self, factory = None):
- result = Regex.copy(
- self,
- RegexAnd if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexAnd({0:s})'.format(', '.join(params))
- # GENERATE END
- def to_nfa_state(self, _nfa, group_ref_data, group_index, next_state):
- join0_state = len(_nfa.states)
- _nfa.states.append(nfa.NFA.join0_state) # no arguments so use static one
- join1_state = len(_nfa.states)
- _nfa.states.append((nfa.NFA.STATE_JOIN1, next_state))
- child0_state = self[0].to_nfa_state(
- _nfa,
- group_ref_data,
- group_index,
- join0_state
- )
- if child0_state == -1:
- return -1
- child1_state = self[1].to_nfa_state(
- _nfa,
- group_ref_data,
- group_index + self[0].n_groups,
- join1_state
- )
- if child1_state == -1:
- return -1
- new_state = len(_nfa.states)
- _nfa.states.append((nfa.NFA.STATE_AND, child0_state, child1_state))
- return new_state
-
-class RegexSequence(Regex):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'RegexSequence',
- attrib = {},
- text = '',
- children = [],
- n_groups = -1
- ):
- Regex.__init__(
- self,
- tag,
- attrib,
- text,
- children,
- n_groups
- )
- def copy(self, factory = None):
- result = Regex.copy(
- self,
- RegexSequence if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexSequence({0:s})'.format(', '.join(params))
- # GENERATE END
- def to_nfa_state(self, _nfa, group_ref_data, group_index, next_state):
- next_state = self[1].to_nfa_state(
- _nfa,
- group_ref_data,
- group_index + self[0].n_groups,
- next_state
- )
- if next_state == -1:
- return -1
- return self[0].to_nfa_state(
- _nfa,
- group_ref_data,
- group_index,
- next_state
- )
-
-class RegexRepeat(Regex):
- # GENERATE ELEMENT(int count0, int count1, bool non_greedy) BEGIN
- def __init__(
- self,
- tag = 'RegexRepeat',
- attrib = {},
- text = '',
- children = [],
- n_groups = -1,
- count0 = -1,
- count1 = -1,
- non_greedy = False
- ):
- Regex.__init__(
- self,
- tag,
- attrib,
- text,
- children,
- n_groups
- )
- self.count0 = (
- element.deserialize_int(count0)
- if isinstance(count0, str) else
- count0
- )
- self.count1 = (
- element.deserialize_int(count1)
- if isinstance(count1, str) else
- count1
- )
- self.non_greedy = (
- element.deserialize_bool(non_greedy)
- if isinstance(non_greedy, str) else
- non_greedy
- )
- def serialize(self, ref_list):
- Regex.serialize(self, ref_list)
- self.set('count0', element.serialize_int(self.count0))
- self.set('count1', element.serialize_int(self.count1))
- self.set('non_greedy', element.serialize_bool(self.non_greedy))
- def deserialize(self, ref_list):
- Regex.deserialize(self, ref_list)
- self.count0 = element.deserialize_int(self.get('count0', '-1'))
- self.count1 = element.deserialize_int(self.get('count1', '-1'))
- self.non_greedy = element.deserialize_bool(self.get('non_greedy', 'false'))
- def copy(self, factory = None):
- result = Regex.copy(
- self,
- RegexRepeat if factory is None else factory
- )
- result.count0 = self.count0
- result.count1 = self.count1
- result.non_greedy = self.non_greedy
- return result
- def repr_serialize(self, params):
- Regex.repr_serialize(self, params)
- if self.count0 != -1:
- params.append(
- 'count0 = {0:s}'.format(repr(self.count0))
- )
- if self.count1 != -1:
- params.append(
- 'count1 = {0:s}'.format(repr(self.count1))
- )
- if self.non_greedy != False:
- params.append(
- 'non_greedy = {0:s}'.format(repr(self.non_greedy))
- )
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexRepeat({0:s})'.format(', '.join(params))
- # GENERATE END
- def post_process(self, groups, caseless = False):
- # total hack which will be done in a Python action in future
- if len(self) >= 2:
- assert self[1].tag == 'Number'
- self.count0 = int(self[1].text)
- if len(self) >= 3:
- assert self[2].tag == 'Number'
- self.count1 = int(self[2].text)
- else:
- self.count1 = self.count0
- del self[1:]
- # end total hack
- Regex.post_process(self, groups, caseless)
- def to_nfa_state(self, _nfa, group_ref_data, group_index, next_state):
- count0 = self.count0
- count1 = self.count1
- if count1 == -1:
- new_state = len(_nfa.states)
- _nfa.states.append(None)
- child_state = self[0].to_nfa_state(
- _nfa,
- group_ref_data,
- group_index,
- new_state
- )
- if child_state != -1:
- _nfa.states[new_state] = (
- (nfa.NFA.STATE_OR, next_state, child_state)
- if self.non_greedy else
- (nfa.NFA.STATE_OR, child_state, next_state)
- )
- next_state = new_state
- else:
- done_state = next_state
- for i in range(count1 - count0):
- child_state = self[0].to_nfa_state(
- _nfa,
- group_ref_data,
- group_index,
- next_state
- )
- if child_state == -1:
- break
- new_state = len(_nfa.states)
- _nfa.states.append(
- (nfa.NFA.STATE_OR, done_state, child_state)
- if self.non_greedy else
- (nfa.NFA.STATE_OR, child_state, done_state)
- )
- next_state = new_state
- for i in range(count0):
- next_state = self[0].to_nfa_state(
- _nfa,
- group_ref_data,
- group_index,
- next_state
- )
- if next_state == -1:
- break
- return next_state
-
-class RegexGroup(Regex):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'RegexGroup',
- attrib = {},
- text = '',
- children = [],
- n_groups = -1
- ):
- Regex.__init__(
- self,
- tag,
- attrib,
- text,
- children,
- n_groups
- )
- def copy(self, factory = None):
- result = Regex.copy(
- self,
- RegexGroup if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexGroup({0:s})'.format(', '.join(params))
- # GENERATE END
- def post_process(self, groups, caseless = False):
- # we use -1 here because named or action groups use self[0] for text
- groups.append(self)
- self[-1].post_process(groups, caseless)
- self.n_groups = self[-1].n_groups + 1
- def to_nfa_state(self, _nfa, group_ref_data, group_index, next_state):
- new_state = len(_nfa.states)
- _nfa.states.append(
- (nfa.NFA.STATE_MARK, group_ref_data[group_index][1], next_state)
- )
- next_state = new_state
- next_state = self[-1].to_nfa_state(
- _nfa,
- group_ref_data,
- group_index + 1,
- next_state
- )
- if next_state == -1:
- return -1
- new_state = len(_nfa.states)
- _nfa.states.append(
- (nfa.NFA.STATE_MARK, group_ref_data[group_index][0], next_state)
- )
- return new_state
-
-# internal base class
-class Text(element.Element):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'Text',
- attrib = {},
- text = '',
- children = []
- ):
- element.Element.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = element.Element.copy(
- self,
- Text if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.Text({0:s})'.format(', '.join(params))
- # GENERATE END
- def get_text(self):
- return element.get_text(self, 0)
-
-class RegexGroupName(RegexGroup):
- class Text(Text):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'RegexGroupName_Text',
- attrib = {},
- text = '',
- children = []
- ):
- Text.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = Text.copy(
- self,
- Text if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexGroupName.Text({0:s})'.format(', '.join(params))
- # GENERATE END
-
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'RegexGroupName',
- attrib = {},
- text = '',
- children = [],
- n_groups = -1
- ):
- RegexGroup.__init__(
- self,
- tag,
- attrib,
- text,
- children,
- n_groups
- )
- def copy(self, factory = None):
- result = RegexGroup.copy(
- self,
- RegexGroupName if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexGroupName({0:s})'.format(', '.join(params))
- # GENERATE END
-
-class RegexGroupAction(RegexGroup):
- class Text(Text):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'RegexGroupAction_Text',
- attrib = {},
- text = '',
- children = []
- ):
- Text.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = Text.copy(
- self,
- Text if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexGroupAction.Text({0:s})'.format(', '.join(params))
- # GENERATE END
-
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'RegexGroupAction',
- attrib = {},
- text = '',
- children = [],
- n_groups = -1
- ):
- RegexGroup.__init__(
- self,
- tag,
- attrib,
- text,
- children,
- n_groups
- )
- def copy(self, factory = None):
- result = RegexGroup.copy(
- self,
- RegexGroupAction if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexGroupAction({0:s})'.format(', '.join(params))
- # GENERATE END
-
-class RegexGroupElement(RegexGroup):
- class Text(Text):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'RegexGroupElement_Text',
- attrib = {},
- text = '',
- children = []
- ):
- Text.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = Text.copy(
- self,
- Text if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexGroupElement.Text({0:s})'.format(', '.join(params))
- # GENERATE END
-
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'RegexGroupElement',
- attrib = {},
- text = '',
- children = [],
- n_groups = -1
- ):
- RegexGroup.__init__(
- self,
- tag,
- attrib,
- text,
- children,
- n_groups
- )
- def copy(self, factory = None):
- result = RegexGroup.copy(
- self,
- RegexGroupElement if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexGroupElement({0:s})'.format(', '.join(params))
- # GENERATE END
-
-# GENERATE FACTORY(element.Element) BEGIN
-tag_to_class = {
- 'Regex': Regex,
- 'RegexNone': RegexNone,
- 'RegexEmpty': RegexEmpty,
- 'RegexCharacter': RegexCharacter,
- 'RegexCharacterLiteral': RegexCharacterLiteral,
- 'RegexCharacterRange': RegexCharacterRange,
- 'RegexCharacterOr': RegexCharacterOr,
- 'RegexCharacterAnd': RegexCharacterAnd,
- 'RegexCharacterNot': RegexCharacterNot,
- 'RegexOr': RegexOr,
- 'RegexAnd': RegexAnd,
- 'RegexSequence': RegexSequence,
- 'RegexRepeat': RegexRepeat,
- 'RegexGroup': RegexGroup,
- 'Text': Text,
- 'RegexGroupName': RegexGroupName,
- 'RegexGroupName_Text': RegexGroupName.Text,
- 'RegexGroupAction': RegexGroupAction,
- 'RegexGroupAction_Text': RegexGroupAction.Text,
- 'RegexGroupElement': RegexGroupElement,
- 'RegexGroupElement_Text': RegexGroupElement.Text
-}
-def factory(tag, attrib = {}, *args, **kwargs):
- return tag_to_class.get(tag, element.Element)(tag, attrib, *args, **kwargs)
-# GENERATE END
-
-if __name__ == '__main__':
- import sys
- import xml.etree.ElementTree
- import wrap_repr
-
- _regex = RegexAnd(
- children = [
- RegexRepeat(
- children = [
- RegexCharacterNot(
- children = [
- RegexCharacterLiteral()
- ],
- character_set = [0, 256]
- )
- ]
- ),
- RegexGroup(
- children = [
- RegexOr(
- children = [
- RegexOr(
- children = [
- RegexOr(
- children = [
- RegexOr(
- children = [
- RegexNone(),
- RegexGroup(
- children = [
- RegexRepeat(
- children = [
- RegexCharacterLiteral(
- character_set = [9, 14, 32, 33]
- )
- ],
- count0 = 1
- )
- ],
- index = 1,
- name = 'Whitespace'
- )
- ]
- ),
- RegexGroup(
- children = [
- RegexRepeat(
- children = [
- RegexCharacterLiteral(
- character_set = [48, 58]
- )
- ],
- count0 = 1
- )
- ],
- index = 2,
- name = 'Number'
- )
- ]
- ),
- RegexGroup(
- children = [
- RegexSequence(
- children = [
- RegexSequence(
- children = [
- RegexSequence(
- children = [
- RegexSequence(
- children = [
- RegexEmpty(),
- RegexCharacterLiteral(
- character_set = [102, 103]
- )
- ]
- ),
- RegexCharacterLiteral(
- character_set = [111, 112]
- )
- ]
- ),
- RegexCharacterLiteral(
- character_set = [114, 115]
- )
- ]
- ),
- RegexRepeat(
- children = [
- RegexCharacterLiteral(
- character_set = [101, 102]
- )
- ],
- count0 = 0,
- count1 = 1
- )
- ]
- )
- ],
- index = 3,
- name = 'For'
- )
- ]
- ),
- RegexGroup(
- children = [
- RegexSequence(
- children = [
- RegexCharacterLiteral(
- character_set = [65, 91, 95, 96, 97, 123]
- ),
- RegexRepeat(
- children = [
- RegexCharacterLiteral(
- character_set = [48, 58, 65, 91, 95, 96, 97, 123]
- )
- ]
- )
- ]
- )
- ],
- index = 4,
- name = 'Identifier'
- )
- ]
- )
- ],
- index = 0
- )
- ]
- )
- sys.stdout.write(
- wrap_repr.wrap_repr(
- ' _regex = {0:s}'.format(repr(_regex).replace('regex.', '')),
- 79
- )
- )
-
- _nfa = nfa.NFA()
- _regex.add_to_nfa(_nfa)
- sys.stdout.write(
- wrap_repr.wrap_repr(
- ' _nfa = {0:s}'.format(repr(_nfa).replace('nfa.', '')),
- 79
- )
- )
-
- text = ' id 99id id99 for fore foree forex '
- i = 0
- while i < len(text):
- print('text "{0:s}"'.format(text[i:i + 72].replace('\n', '$')))
- thread = _nfa.match_text(text, i)
- if thread is None:
- print('no match')
- break
- i = thread[0] # end position of overall match
- group_start = [-1 for j in range(len(_nfa.groups))]
- group_end = [-1 for j in range(len(_nfa.groups))]
- while thread is not None:
- pos, mark, thread = thread
- group = mark >> 1
- if (mark & 1) == 0:
- group_start[group] = pos
- print(
- 'group {0:d} name "{1:s}" text "{2:s}"'.format(
- group,
- _nfa.groups[group][0],
- text[group_start[group]:group_end[group]].replace('\n', '$')
- )
- )
- else:
- group_end[group] = pos
-
- _dfa = _nfa.to_dfa()
- sys.stdout.write(
- wrap_repr.wrap_repr(
- ' _dfa = {0:s}'.format(repr(_dfa).replace('dfa.', '')),
- 79
- )
- )
-
- text = ' id 99id id99 for fore foree forex '
- i = 0
- while i < len(text):
- print('text "{0:s}"'.format(text[i:i + 72].replace('\n', '$')))
- thread = _dfa.match_text(text, i)
- if thread is None:
- print('no match')
- break
- i = thread[0] # end position of overall match
- group_start = [-1 for j in range(len(_dfa.groups))]
- group_end = [-1 for j in range(len(_dfa.groups))]
- while thread is not None:
- pos, mark, thread = thread
- group = mark >> 1
- if (mark & 1) == 0:
- group_start[group] = pos
- print(
- 'group {0:d} name "{1:s}" text "{2:s}"'.format(
- group,
- _dfa.groups[group][0],
- text[group_start[group]:group_end[group]].replace('\n', '$')
- )
- )
- else:
- group_end[group] = pos
-
-# move this into grammar.py:
-# grammar = Grammar(children = [Grammar.Production(children = [RegexSequence(
-#children = [RegexSequence(children = [RegexEmpty(), RegexCharacterRule(character_set
-#= [288, 295], rule_name = 'whitespace_opt')]), RegexCharacterRule(character_set = [
-#259, 262], rule_name = 'expr0')])], nonterminal = 0), Grammar.Production(
-#children = [RegexSequence(children = [RegexEmpty(), RegexCharacterRule(character_set
-#= [262, 265], rule_name = 'expr1')])], nonterminal = 1), Grammar.Production(
-#children = [RegexSequence(children = [RegexEmpty(), RegexGroup(children = [
-#RegexSequence(children = [RegexSequence(children = [RegexSequence(children = [
-#RegexSequence(children = [RegexEmpty(), RegexCharacterRule(character_set = [259, 262
-#], rule_name = 'expr0')]), RegexCharacter(character_set = [43, 44])]),
-#RegexCharacterRule(character_set = [288, 295], rule_name = 'whitespace_opt')]),
-#RegexCharacterRule(character_set = [262, 265], rule_name = 'expr1')])], group_index
-#= 0, group_name = 'Add')])], nonterminal = 2), Grammar.Production(children = [
-#RegexSequence(children = [RegexEmpty(), RegexGroup(children = [RegexSequence(
-#children = [RegexSequence(children = [RegexSequence(children = [RegexSequence(
-#children = [RegexEmpty(), RegexCharacterRule(character_set = [259, 262], rule_name =
-#'expr0')]), RegexCharacter(character_set = [45, 46])]), RegexCharacterRule(character_set
-#= [288, 295], rule_name = 'whitespace_opt')]), RegexCharacterRule(character_set = [
-#262, 265], rule_name = 'expr1')])], group_index = 0, group_name = 'Subtract')])
-#], nonterminal = 3), Grammar.Production(children = [RegexSequence(children = [
-#RegexEmpty(), RegexCharacterRule(character_set = [265, 268], rule_name = 'expr2')])
-#], nonterminal = 4), Grammar.Production(children = [RegexSequence(children = [
-#RegexEmpty(), RegexGroup(children = [RegexSequence(children = [RegexSequence(
-#children = [RegexSequence(children = [RegexSequence(children = [RegexEmpty(),
-#RegexCharacterRule(character_set = [262, 265], rule_name = 'expr1')]),
-#RegexCharacter(character_set = [42, 43])]), RegexCharacterRule(character_set = [288, 295
-#], rule_name = 'whitespace_opt')]), RegexCharacterRule(character_set = [265, 268],
-#rule_name = 'expr2')])], group_index = 0, group_name = 'Multiply')])],
-#nonterminal = 5), Grammar.Production(children = [RegexSequence(children = [
-#RegexEmpty(), RegexGroup(children = [RegexSequence(children = [RegexSequence(
-#children = [RegexSequence(children = [RegexSequence(children = [RegexEmpty(),
-#RegexCharacterRule(character_set = [262, 265], rule_name = 'expr1')]),
-#RegexCharacter(character_set = [47, 48])]), RegexCharacterRule(character_set = [288, 295
-#], rule_name = 'whitespace_opt')]), RegexCharacterRule(character_set = [265, 268],
-#rule_name = 'expr2')])], group_index = 0, group_name = 'Divide')])],
-#nonterminal = 6), Grammar.Production(children = [RegexSequence(children = [
-#RegexSequence(children = [RegexEmpty(), RegexGroup(children = [RegexSequence(
-#children = [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name =
-#'number')])], group_index = 0, group_name = 'Number')]), RegexCharacterRule(
-#character_set = [288, 295], rule_name = 'whitespace_opt')])], nonterminal = 7),
-#Grammar.Production(children = [RegexSequence(children = [RegexEmpty(),
-#RegexGroup(children = [RegexSequence(children = [RegexSequence(children = [
-#RegexSequence(children = [RegexEmpty(), RegexCharacter(character_set = [45, 46])]),
-#RegexCharacterRule(character_set = [288, 295], rule_name = 'whitespace_opt')]),
-#RegexCharacterRule(character_set = [265, 268], rule_name = 'expr2')])], group_index
-#= 0, group_name = 'Negate')])], nonterminal = 8), Grammar.Production(children =
-#[RegexSequence(children = [RegexSequence(children = [RegexSequence(children = [
-#RegexSequence(children = [RegexSequence(children = [RegexEmpty(),
-#RegexCharacter(character_set = [40, 41])]), RegexCharacterRule(character_set = [288, 295
-#], rule_name = 'whitespace_opt')]), RegexCharacterRule(character_set = [259, 262],
-#rule_name = 'expr0')]), RegexCharacter(character_set = [41, 42])]),
-#RegexCharacterRule(character_set = [288, 295], rule_name = 'whitespace_opt')])],
-#nonterminal = 9), Grammar.Production(children = [RegexSequence(children = [
-#RegexEmpty(), RegexCharacter(character_set = [48, 49])])], nonterminal = 10),
-#Grammar.Production(children = [RegexSequence(children = [RegexEmpty(),
-#RegexCharacter(character_set = [49, 50])])], nonterminal = 11), Grammar.Production(
-#children = [RegexSequence(children = [RegexEmpty(), RegexCharacter(character_set = [
-#50, 51])])], nonterminal = 12), Grammar.Production(children = [RegexSequence(
-#children = [RegexEmpty(), RegexCharacter(character_set = [51, 52])])], nonterminal =
-#13), Grammar.Production(children = [RegexSequence(children = [RegexEmpty(),
-#RegexCharacter(character_set = [52, 53])])], nonterminal = 14), Grammar.Production(
-#children = [RegexSequence(children = [RegexEmpty(), RegexCharacter(character_set = [
-#53, 54])])], nonterminal = 15), Grammar.Production(children = [RegexSequence(
-#children = [RegexEmpty(), RegexCharacter(character_set = [54, 55])])], nonterminal =
-#16), Grammar.Production(children = [RegexSequence(children = [RegexEmpty(),
-#RegexCharacter(character_set = [55, 56])])], nonterminal = 17), Grammar.Production(
-#children = [RegexSequence(children = [RegexEmpty(), RegexCharacter(character_set = [
-#56, 57])])], nonterminal = 18), Grammar.Production(children = [RegexSequence(
-#children = [RegexEmpty(), RegexCharacter(character_set = [57, 58])])], nonterminal =
-#19), Grammar.Production(children = [RegexSequence(children = [RegexSequence(
-#children = [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name =
-#'number')]), RegexCharacter(character_set = [48, 49])])], nonterminal = 20),
-#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-#= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
-#)]), RegexCharacter(character_set = [49, 50])])], nonterminal = 21),
-#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-#= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
-#)]), RegexCharacter(character_set = [50, 51])])], nonterminal = 22),
-#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-#= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
-#)]), RegexCharacter(character_set = [51, 52])])], nonterminal = 23),
-#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-#= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
-#)]), RegexCharacter(character_set = [52, 53])])], nonterminal = 24),
-#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-#= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
-#)]), RegexCharacter(character_set = [53, 54])])], nonterminal = 25),
-#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-#= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
-#)]), RegexCharacter(character_set = [54, 55])])], nonterminal = 26),
-#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-#= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
-#)]), RegexCharacter(character_set = [55, 56])])], nonterminal = 27),
-#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-#= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
-#)]), RegexCharacter(character_set = [56, 57])])], nonterminal = 28),
-#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-#= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
-#)]), RegexCharacter(character_set = [57, 58])])], nonterminal = 29),
-#Grammar.Production(children = [RegexEmpty()], nonterminal = 30),
-#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-#= [RegexEmpty(), RegexCharacterRule(character_set = [288, 295], rule_name =
-#'whitespace_opt')]), RegexCharacter(character_set = [9, 10])])], nonterminal = 31),
-#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-#= [RegexEmpty(), RegexCharacterRule(character_set = [288, 295], rule_name =
-#'whitespace_opt')]), RegexCharacter(character_set = [10, 11])])], nonterminal = 32),
-#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-#= [RegexEmpty(), RegexCharacterRule(character_set = [288, 295], rule_name =
-#'whitespace_opt')]), RegexCharacter(character_set = [11, 12])])], nonterminal = 33),
-#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-#= [RegexEmpty(), RegexCharacterRule(character_set = [288, 295], rule_name =
-#'whitespace_opt')]), RegexCharacter(character_set = [12, 13])])], nonterminal = 34),
-#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-#= [RegexEmpty(), RegexCharacterRule(character_set = [288, 295], rule_name =
-#'whitespace_opt')]), RegexCharacter(character_set = [13, 14])])], nonterminal = 35),
-#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-#= [RegexEmpty(), RegexCharacterRule(character_set = [288, 295], rule_name =
-#'whitespace_opt')]), RegexCharacter(character_set = [32, 33])])], nonterminal = 36)
-#], n_terminals = 258)
-# #sys.stdout.write(
-# # wrap_repr.wrap_repr(
-# # ' grammar = {0:s}'.format(repr(grammar).replace('regex.', '')),
-# # 79
-# # )
-# #)
-#
-# lr1 = grammar.to_lr1()
-# #sys.stdout.write(
-# # wrap_repr.wrap_repr(
-# # ' lr1 = {0:s}'.format(repr(lr1).replace('regex.', '')),
-# # 79
-# # )
-# #)
-#
-# lr1.parse_text('(13 + 5 * 6) * 2', 0)
-# root = element.Element('root', text = '(13 + 5 * 6) * 2')
-# lr1.parse_yychunk(root, 0, 0, element.Element, iter([]))
-# xml.etree.ElementTree.dump(root)
-#
-# clr1 = lr1.to_clr1()
-# #sys.stdout.write(
-# # wrap_repr.wrap_repr(
-# # ' clr1 = {0:s}'.format(repr(clr1).replace('regex.', '')),
-# # 79
-# # )
-# #)
-#
-# clr1.parse_text('(13 + 5 * 6) * 2', 0)
-# root = element.Element('root', text = '(13 + 5 * 6) * 2')
-# clr1.parse_yychunk(root, 0, 0, element.Element, iter([]))
-# xml.etree.ElementTree.dump(root)
-#
-# lalr1 = lr1.to_lalr1()
-# #sys.stdout.write(
-# # wrap_repr.wrap_repr(
-# # ' lalr1 = {0:s}'.format(repr(lalr1).replace('regex.', '')),
-# # 79
-# # )
-# #)
-#
-# lalr1.parse_text('(13 + 5 * 6) * 2', 0)
-# root = element.Element('root', text = '(13 + 5 * 6) * 2')
-# lalr1.parse_yychunk(root, 0, 0, element.Element, iter([]))
-# xml.etree.ElementTree.dump(root)
+++ /dev/null
-#!/bin/sh
-if ./generate_ast.py regex <regex.py >regex.py.new && ! diff -q regex.py regex.py.new
-then
- mv regex.py.new regex.py
-else
- rm -f regex.py.new
-fi
--- /dev/null
+/*
+ * Copyright (C) 2019 Nick Downing <nick@ndcode.org>
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+%{
+ import bisect_set
+ import element
+ import nfa
+%}
+
+%%
+
+class Regex {
+ int n_groups = -1;
+};
+class RegexNone: Regex;
+class RegexEmpty: Regex;
+class RegexCharacter: Regex {
+ list(int) character_set = [];
+};
+class RegexCharacterLiteral: RegexCharacter;
+class RegexCharacterRange: RegexCharacter;
+class RegexCharacterOr: RegexCharacter;
+class RegexCharacterAnd: RegexCharacter;
+class RegexCharacterNot: RegexCharacter;
+class RegexOr: Regex;
+class RegexAnd: Regex;
+class RegexSequence: Regex;
+class RegexRepeat: Regex {
+ int count0 = -1;
+ int count1 = -1;
+ bool non_greedy = False;
+};
+class RegexGroup: Regex;
+class Text;
+class RegexGroupName: RegexGroup {
+ class Text: Text;
+};
+class RegexGroupAction: RegexGroup {
+ class Text: Text;
+};
+class RegexGroupElement: RegexGroup {
+ class Text: Text;
+};
+
+%%
+
+# defines the alphabet size, set this to 0x11000 for unicode
+n_characters = 0x100
+
+# internal base class
+
+def factory(tag, attrib = {}, *args, **kwargs):
+ return tag_to_class.get(tag, element.Element)(tag, attrib, *args, **kwargs)
+
+@method(Regex)
+def post_process(self, groups, caseless = False):
+ self.n_groups = 0
+ for i in self:
+ i.post_process(groups, caseless)
+ self.n_groups += i.n_groups
+@method(RegexCharacter)
+def post_process(self, groups, caseless = False):
+ Regex.post_process(self, groups, caseless)
+ if caseless:
+ temp = bisect_set.bisect_set_and(
+ self.character_set,
+ [0x41, 0x5b, 0x61, 0x7b]
+ )
+ self.character_set = bisect_set.bisect_set_or(
+ self.character_set,
+ [i ^ 0x20 for i in temp if i >= 0x60] +
+ [i ^ 0x20 for i in temp if i < 0x60]
+ )
+@method(RegexCharacterLiteral)
+def post_process(self, groups, caseless = False):
+ RegexCharacter.post_process(self, groups, False)
+ if caseless:
+ temp = bisect_set.bisect_set_and(
+ self.character_set,
+ [0x41, 0x5b, 0x61, 0x7b]
+ )
+ self.character_set = bisect_set.bisect_set_or(
+ self.character_set,
+ [i ^ 0x20 for i in temp if i >= 0x60] +
+ [i ^ 0x20 for i in temp if i < 0x60]
+ )
+@method(RegexCharacterRange)
+def post_process(self, groups, caseless = False):
+ RegexCharacter.post_process(self, groups, False)
+ self.character_set = [self[0].character_set[0], self[1].character_set[-1]]
+ if caseless:
+ temp = bisect_set.bisect_set_and(
+ self.character_set,
+ [0x41, 0x5b, 0x61, 0x7b]
+ )
+ self.character_set = bisect_set.bisect_set_or(
+ self.character_set,
+ [i ^ 0x20 for i in temp if i >= 0x60] +
+ [i ^ 0x20 for i in temp if i < 0x60]
+ )
+@method(RegexCharacterOr)
+def post_process(self, groups, caseless = False):
+ RegexCharacter.post_process(self, groups, caseless)
+ self.character_set = bisect_set.bisect_set_or(
+ self[0].character_set,
+ self[1].character_set
+ )
+@method(RegexCharacterAnd)
+def post_process(self, groups, caseless = False):
+ RegexCharacter.post_process(self, groups, caseless)
+ self.character_set = bisect_set.bisect_set_and(
+ self[0].character_set,
+ self[1].character_set
+ )
+@method(RegexCharacterNot)
+def post_process(self, groups, caseless = False):
+ RegexCharacter.post_process(self, groups, caseless)
+ self.character_set = bisect_set.bisect_set_not(self[0].character_set)
+@method(RegexRepeat)
+def post_process(self, groups, caseless = False):
+ # total hack which will be done in a Python action in future
+ if len(self) >= 2:
+ assert self[1].tag == 'Number'
+ self.count0 = int(self[1].text)
+ if len(self) >= 3:
+ assert self[2].tag == 'Number'
+ self.count1 = int(self[2].text)
+ else:
+ self.count1 = self.count0
+ del self[1:]
+ # end total hack
+ Regex.post_process(self, groups, caseless)
+@method(RegexGroup)
+def post_process(self, groups, caseless = False):
+ # we use -1 here because named or action groups use self[0] for text
+ groups.append(self)
+ self[-1].post_process(groups, caseless)
+ self.n_groups = self[-1].n_groups + 1
+del post_process
+
+@method(Regex)
+def to_nfa_state(self, _nfa, group_ref_data, group_index, next_state):
+ raise NotImplementedError
+@method(RegexNone)
+def to_nfa_state(self, _nfa, group_ref_data, group_index, next_state):
+ return -1
+@method(RegexEmpty)
+def to_nfa_state(self, _nfa, group_ref_data, group_index, next_state):
+ return next_state
+@method(RegexCharacter)
+def to_nfa_state(self, _nfa, group_ref_data, group_index, next_state):
+ new_state = len(_nfa.states)
+ _nfa.states.append(
+ (nfa.NFA.STATE_CHARACTER, self.character_set, next_state)
+ )
+ return new_state
+@method(RegexOr)
+def to_nfa_state(self, _nfa, group_ref_data, group_index, next_state):
+ child0_state = self[0].to_nfa_state(
+ _nfa,
+ group_ref_data,
+ group_index,
+ next_state
+ )
+ child1_state = self[1].to_nfa_state(
+ _nfa,
+ group_ref_data,
+ group_index + self[0].n_groups,
+ next_state
+ )
+ if child0_state == -1:
+ return child1_state
+ if child1_state == -1:
+ return child0_state
+ new_state = len(_nfa.states)
+ _nfa.states.append((nfa.NFA.STATE_OR, child0_state, child1_state))
+ return new_state
+@method(RegexAnd)
+def to_nfa_state(self, _nfa, group_ref_data, group_index, next_state):
+ join0_state = len(_nfa.states)
+ _nfa.states.append(nfa.NFA.join0_state) # no arguments so use static one
+ join1_state = len(_nfa.states)
+ _nfa.states.append((nfa.NFA.STATE_JOIN1, next_state))
+ child0_state = self[0].to_nfa_state(
+ _nfa,
+ group_ref_data,
+ group_index,
+ join0_state
+ )
+ if child0_state == -1:
+ return -1
+ child1_state = self[1].to_nfa_state(
+ _nfa,
+ group_ref_data,
+ group_index + self[0].n_groups,
+ join1_state
+ )
+ if child1_state == -1:
+ return -1
+ new_state = len(_nfa.states)
+ _nfa.states.append((nfa.NFA.STATE_AND, child0_state, child1_state))
+ return new_state
+@method(RegexSequence)
+def to_nfa_state(self, _nfa, group_ref_data, group_index, next_state):
+ next_state = self[1].to_nfa_state(
+ _nfa,
+ group_ref_data,
+ group_index + self[0].n_groups,
+ next_state
+ )
+ if next_state == -1:
+ return -1
+ return self[0].to_nfa_state(
+ _nfa,
+ group_ref_data,
+ group_index,
+ next_state
+ )
+@method(RegexRepeat)
+def to_nfa_state(self, _nfa, group_ref_data, group_index, next_state):
+ count0 = self.count0
+ count1 = self.count1
+ if count1 == -1:
+ new_state = len(_nfa.states)
+ _nfa.states.append(None)
+ child_state = self[0].to_nfa_state(
+ _nfa,
+ group_ref_data,
+ group_index,
+ new_state
+ )
+ if child_state != -1:
+ _nfa.states[new_state] = (
+ (nfa.NFA.STATE_OR, next_state, child_state)
+ if self.non_greedy else
+ (nfa.NFA.STATE_OR, child_state, next_state)
+ )
+ next_state = new_state
+ else:
+ done_state = next_state
+ for i in range(count1 - count0):
+ child_state = self[0].to_nfa_state(
+ _nfa,
+ group_ref_data,
+ group_index,
+ next_state
+ )
+ if child_state == -1:
+ break
+ new_state = len(_nfa.states)
+ _nfa.states.append(
+ (nfa.NFA.STATE_OR, done_state, child_state)
+ if self.non_greedy else
+ (nfa.NFA.STATE_OR, child_state, done_state)
+ )
+ next_state = new_state
+ for i in range(count0):
+ next_state = self[0].to_nfa_state(
+ _nfa,
+ group_ref_data,
+ group_index,
+ next_state
+ )
+ if next_state == -1:
+ break
+ return next_state
+@method(RegexGroup)
+def to_nfa_state(self, _nfa, group_ref_data, group_index, next_state):
+ new_state = len(_nfa.states)
+ _nfa.states.append(
+ (nfa.NFA.STATE_MARK, group_ref_data[group_index][1], next_state)
+ )
+ next_state = new_state
+ next_state = self[-1].to_nfa_state(
+ _nfa,
+ group_ref_data,
+ group_index + 1,
+ next_state
+ )
+ if next_state == -1:
+ return -1
+ new_state = len(_nfa.states)
+ _nfa.states.append(
+ (nfa.NFA.STATE_MARK, group_ref_data[group_index][0], next_state)
+ )
+ return new_state
+del to_nfa_state
+
+@method(Regex)
+def add_to_nfa(self, _nfa, group_ref_data):
+ _nfa.start_state.append(self.to_nfa_state(_nfa, group_ref_data, 0, 0))
+del add_to_nfa
+
+@method(Text)
+def get_text(self):
+ return element.get_text(self, 0)
+del get_text
+
+if __name__ == '__main__':
+ import sys
+ import xml.etree.ElementTree
+ import wrap_repr
+
+ _regex = RegexAnd(
+ children = [
+ RegexRepeat(
+ children = [
+ RegexCharacterNot(
+ children = [
+ RegexCharacterLiteral()
+ ],
+ character_set = [0, 256]
+ )
+ ]
+ ),
+ RegexGroup(
+ children = [
+ RegexOr(
+ children = [
+ RegexOr(
+ children = [
+ RegexOr(
+ children = [
+ RegexOr(
+ children = [
+ RegexNone(),
+ RegexGroup(
+ children = [
+ RegexRepeat(
+ children = [
+ RegexCharacterLiteral(
+ character_set = [9, 14, 32, 33]
+ )
+ ],
+ count0 = 1
+ )
+ ],
+ index = 1,
+ name = 'Whitespace'
+ )
+ ]
+ ),
+ RegexGroup(
+ children = [
+ RegexRepeat(
+ children = [
+ RegexCharacterLiteral(
+ character_set = [48, 58]
+ )
+ ],
+ count0 = 1
+ )
+ ],
+ index = 2,
+ name = 'Number'
+ )
+ ]
+ ),
+ RegexGroup(
+ children = [
+ RegexSequence(
+ children = [
+ RegexSequence(
+ children = [
+ RegexSequence(
+ children = [
+ RegexSequence(
+ children = [
+ RegexEmpty(),
+ RegexCharacterLiteral(
+ character_set = [102, 103]
+ )
+ ]
+ ),
+ RegexCharacterLiteral(
+ character_set = [111, 112]
+ )
+ ]
+ ),
+ RegexCharacterLiteral(
+ character_set = [114, 115]
+ )
+ ]
+ ),
+ RegexRepeat(
+ children = [
+ RegexCharacterLiteral(
+ character_set = [101, 102]
+ )
+ ],
+ count0 = 0,
+ count1 = 1
+ )
+ ]
+ )
+ ],
+ index = 3,
+ name = 'For'
+ )
+ ]
+ ),
+ RegexGroup(
+ children = [
+ RegexSequence(
+ children = [
+ RegexCharacterLiteral(
+ character_set = [65, 91, 95, 96, 97, 123]
+ ),
+ RegexRepeat(
+ children = [
+ RegexCharacterLiteral(
+ character_set = [48, 58, 65, 91, 95, 96, 97, 123]
+ )
+ ]
+ )
+ ]
+ )
+ ],
+ index = 4,
+ name = 'Identifier'
+ )
+ ]
+ )
+ ],
+ index = 0
+ )
+ ]
+ )
+ sys.stdout.write(
+ wrap_repr.wrap_repr(
+ ' _regex = {0:s}'.format(repr(_regex).replace('regex.', '')),
+ 79
+ )
+ )
+
+ _nfa = nfa.NFA()
+ _regex.add_to_nfa(_nfa)
+ sys.stdout.write(
+ wrap_repr.wrap_repr(
+ ' _nfa = {0:s}'.format(repr(_nfa).replace('nfa.', '')),
+ 79
+ )
+ )
+
+ text = ' id 99id id99 for fore foree forex '
+ i = 0
+ while i < len(text):
+ print('text "{0:s}"'.format(text[i:i + 72].replace('\n', '$')))
+ thread = _nfa.match_text(text, i)
+ if thread is None:
+ print('no match')
+ break
+ i = thread[0] # end position of overall match
+ group_start = [-1 for j in range(len(_nfa.groups))]
+ group_end = [-1 for j in range(len(_nfa.groups))]
+ while thread is not None:
+ pos, mark, thread = thread
+ group = mark >> 1
+ if (mark & 1) == 0:
+ group_start[group] = pos
+ print(
+ 'group {0:d} name "{1:s}" text "{2:s}"'.format(
+ group,
+ _nfa.groups[group][0],
+ text[group_start[group]:group_end[group]].replace('\n', '$')
+ )
+ )
+ else:
+ group_end[group] = pos
+
+ _dfa = _nfa.to_dfa()
+ sys.stdout.write(
+ wrap_repr.wrap_repr(
+ ' _dfa = {0:s}'.format(repr(_dfa).replace('dfa.', '')),
+ 79
+ )
+ )
+
+ text = ' id 99id id99 for fore foree forex '
+ i = 0
+ while i < len(text):
+ print('text "{0:s}"'.format(text[i:i + 72].replace('\n', '$')))
+ thread = _dfa.match_text(text, i)
+ if thread is None:
+ print('no match')
+ break
+ i = thread[0] # end position of overall match
+ group_start = [-1 for j in range(len(_dfa.groups))]
+ group_end = [-1 for j in range(len(_dfa.groups))]
+ while thread is not None:
+ pos, mark, thread = thread
+ group = mark >> 1
+ if (mark & 1) == 0:
+ group_start[group] = pos
+ print(
+ 'group {0:d} name "{1:s}" text "{2:s}"'.format(
+ group,
+ _dfa.groups[group][0],
+ text[group_start[group]:group_end[group]].replace('\n', '$')
+ )
+ )
+ else:
+ group_end[group] = pos
+
+# move this into grammar.py:
+# grammar = Grammar(children = [Grammar.Production(children = [RegexSequence(
+#children = [RegexSequence(children = [RegexEmpty(), RegexCharacterRule(character_set
+#= [288, 295], rule_name = 'whitespace_opt')]), RegexCharacterRule(character_set = [
+#259, 262], rule_name = 'expr0')])], nonterminal = 0), Grammar.Production(
+#children = [RegexSequence(children = [RegexEmpty(), RegexCharacterRule(character_set
+#= [262, 265], rule_name = 'expr1')])], nonterminal = 1), Grammar.Production(
+#children = [RegexSequence(children = [RegexEmpty(), RegexGroup(children = [
+#RegexSequence(children = [RegexSequence(children = [RegexSequence(children = [
+#RegexSequence(children = [RegexEmpty(), RegexCharacterRule(character_set = [259, 262
+#], rule_name = 'expr0')]), RegexCharacter(character_set = [43, 44])]),
+#RegexCharacterRule(character_set = [288, 295], rule_name = 'whitespace_opt')]),
+#RegexCharacterRule(character_set = [262, 265], rule_name = 'expr1')])], group_index
+#= 0, group_name = 'Add')])], nonterminal = 2), Grammar.Production(children = [
+#RegexSequence(children = [RegexEmpty(), RegexGroup(children = [RegexSequence(
+#children = [RegexSequence(children = [RegexSequence(children = [RegexSequence(
+#children = [RegexEmpty(), RegexCharacterRule(character_set = [259, 262], rule_name =
+#'expr0')]), RegexCharacter(character_set = [45, 46])]), RegexCharacterRule(character_set
+#= [288, 295], rule_name = 'whitespace_opt')]), RegexCharacterRule(character_set = [
+#262, 265], rule_name = 'expr1')])], group_index = 0, group_name = 'Subtract')])
+#], nonterminal = 3), Grammar.Production(children = [RegexSequence(children = [
+#RegexEmpty(), RegexCharacterRule(character_set = [265, 268], rule_name = 'expr2')])
+#], nonterminal = 4), Grammar.Production(children = [RegexSequence(children = [
+#RegexEmpty(), RegexGroup(children = [RegexSequence(children = [RegexSequence(
+#children = [RegexSequence(children = [RegexSequence(children = [RegexEmpty(),
+#RegexCharacterRule(character_set = [262, 265], rule_name = 'expr1')]),
+#RegexCharacter(character_set = [42, 43])]), RegexCharacterRule(character_set = [288, 295
+#], rule_name = 'whitespace_opt')]), RegexCharacterRule(character_set = [265, 268],
+#rule_name = 'expr2')])], group_index = 0, group_name = 'Multiply')])],
+#nonterminal = 5), Grammar.Production(children = [RegexSequence(children = [
+#RegexEmpty(), RegexGroup(children = [RegexSequence(children = [RegexSequence(
+#children = [RegexSequence(children = [RegexSequence(children = [RegexEmpty(),
+#RegexCharacterRule(character_set = [262, 265], rule_name = 'expr1')]),
+#RegexCharacter(character_set = [47, 48])]), RegexCharacterRule(character_set = [288, 295
+#], rule_name = 'whitespace_opt')]), RegexCharacterRule(character_set = [265, 268],
+#rule_name = 'expr2')])], group_index = 0, group_name = 'Divide')])],
+#nonterminal = 6), Grammar.Production(children = [RegexSequence(children = [
+#RegexSequence(children = [RegexEmpty(), RegexGroup(children = [RegexSequence(
+#children = [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name =
+#'number')])], group_index = 0, group_name = 'Number')]), RegexCharacterRule(
+#character_set = [288, 295], rule_name = 'whitespace_opt')])], nonterminal = 7),
+#Grammar.Production(children = [RegexSequence(children = [RegexEmpty(),
+#RegexGroup(children = [RegexSequence(children = [RegexSequence(children = [
+#RegexSequence(children = [RegexEmpty(), RegexCharacter(character_set = [45, 46])]),
+#RegexCharacterRule(character_set = [288, 295], rule_name = 'whitespace_opt')]),
+#RegexCharacterRule(character_set = [265, 268], rule_name = 'expr2')])], group_index
+#= 0, group_name = 'Negate')])], nonterminal = 8), Grammar.Production(children =
+#[RegexSequence(children = [RegexSequence(children = [RegexSequence(children = [
+#RegexSequence(children = [RegexSequence(children = [RegexEmpty(),
+#RegexCharacter(character_set = [40, 41])]), RegexCharacterRule(character_set = [288, 295
+#], rule_name = 'whitespace_opt')]), RegexCharacterRule(character_set = [259, 262],
+#rule_name = 'expr0')]), RegexCharacter(character_set = [41, 42])]),
+#RegexCharacterRule(character_set = [288, 295], rule_name = 'whitespace_opt')])],
+#nonterminal = 9), Grammar.Production(children = [RegexSequence(children = [
+#RegexEmpty(), RegexCharacter(character_set = [48, 49])])], nonterminal = 10),
+#Grammar.Production(children = [RegexSequence(children = [RegexEmpty(),
+#RegexCharacter(character_set = [49, 50])])], nonterminal = 11), Grammar.Production(
+#children = [RegexSequence(children = [RegexEmpty(), RegexCharacter(character_set = [
+#50, 51])])], nonterminal = 12), Grammar.Production(children = [RegexSequence(
+#children = [RegexEmpty(), RegexCharacter(character_set = [51, 52])])], nonterminal =
+#13), Grammar.Production(children = [RegexSequence(children = [RegexEmpty(),
+#RegexCharacter(character_set = [52, 53])])], nonterminal = 14), Grammar.Production(
+#children = [RegexSequence(children = [RegexEmpty(), RegexCharacter(character_set = [
+#53, 54])])], nonterminal = 15), Grammar.Production(children = [RegexSequence(
+#children = [RegexEmpty(), RegexCharacter(character_set = [54, 55])])], nonterminal =
+#16), Grammar.Production(children = [RegexSequence(children = [RegexEmpty(),
+#RegexCharacter(character_set = [55, 56])])], nonterminal = 17), Grammar.Production(
+#children = [RegexSequence(children = [RegexEmpty(), RegexCharacter(character_set = [
+#56, 57])])], nonterminal = 18), Grammar.Production(children = [RegexSequence(
+#children = [RegexEmpty(), RegexCharacter(character_set = [57, 58])])], nonterminal =
+#19), Grammar.Production(children = [RegexSequence(children = [RegexSequence(
+#children = [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name =
+#'number')]), RegexCharacter(character_set = [48, 49])])], nonterminal = 20),
+#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
+#= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
+#)]), RegexCharacter(character_set = [49, 50])])], nonterminal = 21),
+#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
+#= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
+#)]), RegexCharacter(character_set = [50, 51])])], nonterminal = 22),
+#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
+#= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
+#)]), RegexCharacter(character_set = [51, 52])])], nonterminal = 23),
+#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
+#= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
+#)]), RegexCharacter(character_set = [52, 53])])], nonterminal = 24),
+#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
+#= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
+#)]), RegexCharacter(character_set = [53, 54])])], nonterminal = 25),
+#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
+#= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
+#)]), RegexCharacter(character_set = [54, 55])])], nonterminal = 26),
+#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
+#= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
+#)]), RegexCharacter(character_set = [55, 56])])], nonterminal = 27),
+#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
+#= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
+#)]), RegexCharacter(character_set = [56, 57])])], nonterminal = 28),
+#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
+#= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
+#)]), RegexCharacter(character_set = [57, 58])])], nonterminal = 29),
+#Grammar.Production(children = [RegexEmpty()], nonterminal = 30),
+#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
+#= [RegexEmpty(), RegexCharacterRule(character_set = [288, 295], rule_name =
+#'whitespace_opt')]), RegexCharacter(character_set = [9, 10])])], nonterminal = 31),
+#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
+#= [RegexEmpty(), RegexCharacterRule(character_set = [288, 295], rule_name =
+#'whitespace_opt')]), RegexCharacter(character_set = [10, 11])])], nonterminal = 32),
+#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
+#= [RegexEmpty(), RegexCharacterRule(character_set = [288, 295], rule_name =
+#'whitespace_opt')]), RegexCharacter(character_set = [11, 12])])], nonterminal = 33),
+#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
+#= [RegexEmpty(), RegexCharacterRule(character_set = [288, 295], rule_name =
+#'whitespace_opt')]), RegexCharacter(character_set = [12, 13])])], nonterminal = 34),
+#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
+#= [RegexEmpty(), RegexCharacterRule(character_set = [288, 295], rule_name =
+#'whitespace_opt')]), RegexCharacter(character_set = [13, 14])])], nonterminal = 35),
+#Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
+#= [RegexEmpty(), RegexCharacterRule(character_set = [288, 295], rule_name =
+#'whitespace_opt')]), RegexCharacter(character_set = [32, 33])])], nonterminal = 36)
+#], n_terminals = 258)
+# #sys.stdout.write(
+# # wrap_repr.wrap_repr(
+# # ' grammar = {0:s}'.format(repr(grammar).replace('regex.', '')),
+# # 79
+# # )
+# #)
+#
+# lr1 = grammar.to_lr1()
+# #sys.stdout.write(
+# # wrap_repr.wrap_repr(
+# # ' lr1 = {0:s}'.format(repr(lr1).replace('regex.', '')),
+# # 79
+# # )
+# #)
+#
+# lr1.parse_text('(13 + 5 * 6) * 2', 0)
+# root = element.Element('root', text = '(13 + 5 * 6) * 2')
+# lr1.parse_yychunk(root, 0, 0, element.Element, iter([]))
+# xml.etree.ElementTree.dump(root)
+#
+# clr1 = lr1.to_clr1()
+# #sys.stdout.write(
+# # wrap_repr.wrap_repr(
+# # ' clr1 = {0:s}'.format(repr(clr1).replace('regex.', '')),
+# # 79
+# # )
+# #)
+#
+# clr1.parse_text('(13 + 5 * 6) * 2', 0)
+# root = element.Element('root', text = '(13 + 5 * 6) * 2')
+# clr1.parse_yychunk(root, 0, 0, element.Element, iter([]))
+# xml.etree.ElementTree.dump(root)
+#
+# lalr1 = lr1.to_lalr1()
+# #sys.stdout.write(
+# # wrap_repr.wrap_repr(
+# # ' lalr1 = {0:s}'.format(repr(lalr1).replace('regex.', '')),
+# # 79
+# # )
+# #)
+#
+# lalr1.parse_text('(13 + 5 * 6) * 2', 0)
+# root = element.Element('root', text = '(13 + 5 * 6) * 2')
+# lalr1.parse_yychunk(root, 0, 0, element.Element, iter([]))
+# xml.etree.ElementTree.dump(root)
+all: lex_yy.py t_def.py
+
lex_yy.py: cal_py.l
- ../../bootstrap_flex.git/src/flex -o /dev/null $< 2>$<.xml
- ../../pilex.git/pilex.py --element --python $<.xml
+ ../pilex.py --element --python $<
+
+t_def.py: cal_py.t
+ ../../pitree.git/pitree.py --python $<
clean:
- rm -f lex_yy.py *.xml
+ rm -f lex_yy.py t_def.py
+++ /dev/null
-import element
-
-class Text(element.Element):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'Text',
- attrib = {},
- text = '',
- children = []
- ):
- element.Element.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = element.Element.copy(
- self,
- Text if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'ast.Text({0:s})'.format(', '.join(params))
- # GENERATE END
- def get_text(self):
- return element.get_text(self, 0)
-
-class AST(element.Element):
- class Expr(element.Element):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'AST_Expr',
- attrib = {},
- text = '',
- children = []
- ):
- element.Element.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = element.Element.copy(
- self,
- Expr if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'ast.AST.Expr({0:s})'.format(', '.join(params))
- # GENERATE END
- def eval(self):
- raise NotImplementedException()
-
- class Num(Expr):
- class Mantissa(Text):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'AST_Num_Mantissa',
- attrib = {},
- text = '',
- children = []
- ):
- Text.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = Text.copy(
- self,
- Mantissa if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'ast.AST.Num.Mantissa({0:s})'.format(', '.join(params))
- # GENERATE END
-
- class Fraction(Text):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'AST_Num_Fraction',
- attrib = {},
- text = '',
- children = []
- ):
- Text.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = Text.copy(
- self,
- Fraction if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'ast.AST.Num.Fraction({0:s})'.format(', '.join(params))
- # GENERATE END
-
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'AST_Num',
- attrib = {},
- text = '',
- children = []
- ):
- AST.Expr.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = AST.Expr.copy(
- self,
- Num if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'ast.AST.Num({0:s})'.format(', '.join(params))
- # GENERATE END
- def eval(self):
- return float(element.get_text(self, 0))
-
- class Add(element.Element):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'AST_Add',
- attrib = {},
- text = '',
- children = []
- ):
- element.Element.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = element.Element.copy(
- self,
- Add if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'ast.AST.Add({0:s})'.format(', '.join(params))
- # GENERATE END
- def eval(self):
- return self[0].eval() + self[1].eval()
-
- class Sub(element.Element):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'AST_Sub',
- attrib = {},
- text = '',
- children = []
- ):
- element.Element.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = element.Element.copy(
- self,
- Sub if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'ast.AST.Sub({0:s})'.format(', '.join(params))
- # GENERATE END
- def eval(self):
- return self[0].eval() - self[1].eval()
-
- class Mul(element.Element):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'AST_Mul',
- attrib = {},
- text = '',
- children = []
- ):
- element.Element.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = element.Element.copy(
- self,
- Mul if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'ast.AST.Mul({0:s})'.format(', '.join(params))
- # GENERATE END
- def eval(self):
- return self[0].eval() * self[1].eval()
-
- class Div(element.Element):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'AST_Div',
- attrib = {},
- text = '',
- children = []
- ):
- element.Element.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = element.Element.copy(
- self,
- Div if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'ast.AST.Div({0:s})'.format(', '.join(params))
- # GENERATE END
- def eval(self):
- return self[0].eval() / self[1].eval()
-
- class Neg(element.Element):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'AST_Neg',
- attrib = {},
- text = '',
- children = []
- ):
- element.Element.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = element.Element.copy(
- self,
- Neg if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'ast.AST.Neg({0:s})'.format(', '.join(params))
- # GENERATE END
- def eval(self):
- return -self[0].eval()
-
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'AST',
- attrib = {},
- text = '',
- children = []
- ):
- element.Element.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = element.Element.copy(
- self,
- AST if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'ast.AST({0:s})'.format(', '.join(params))
- # GENERATE END
-
-# GENERATE FACTORY(element.Element) BEGIN
-tag_to_class = {
- 'Text': Text,
- 'AST': AST,
- 'AST_Expr': AST.Expr,
- 'AST_Num': AST.Num,
- 'AST_Num_Mantissa': AST.Num.Mantissa,
- 'AST_Num_Fraction': AST.Num.Fraction,
- 'AST_Add': AST.Add,
- 'AST_Sub': AST.Sub,
- 'AST_Mul': AST.Mul,
- 'AST_Div': AST.Div,
- 'AST_Neg': AST.Neg
-}
-def factory(tag, attrib = {}, *args, **kwargs):
- return tag_to_class.get(tag, element.Element)(tag, attrib, *args, **kwargs)
-# GENERATE END
+++ /dev/null
-#!/bin/sh
-if ../generate_ast.py ast <ast.py >ast.py.new && ! diff -q ast.py ast.py.new
-then
- mv ast.py.new ast.py
-else
- rm -f ast.py.new
-fi
%{
-import ast
+import t_def
import xml.etree.ElementTree
NUM = 0x100
yylval = None
%}
-DIGIT (?E{ast.AST.Num.Mantissa}[0-9]+)\.?|(?E{ast.AST.Num.Mantissa}[0-9]*)\.(?E{ast.AST.Num.Fraction}[0-9]+)
+DIGIT (?E{t_def.AST.Num.Mantissa}[0-9]+)\.?|(?E{t_def.AST.Num.Mantissa}[0-9]*)\.(?E{t_def.AST.Num.Fraction}[0-9]+)
%option noecs nometa-ecs noyywrap reject yymore
%%
[ ]
-(?E{ast.AST.Num}{DIGIT}) {
+(?E{t_def.AST.Num}{DIGIT}) {
global yylval
yylval = float(yytext)
return NUM
--- /dev/null
+/*
+ * Copyright (C) 2019 Nick Downing <nick@ndcode.org>
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+%{
+ import element
+%}
+
+%%
+
+class Text;
+class AST {
+ class Expr;
+ class Num: Expr {
+ class Mantissa: Text;
+ class Fraction: Text;
+ };
+ class Add;
+ class Sub;
+ class Mul;
+ class Div;
+ class Neg;
+};
+
+%%
+
+def factory(tag, attrib = {}, *args, **kwargs):
+ return tag_to_class.get(tag, element.Element)(tag, attrib, *args, **kwargs)
+
+@method(Text)
+def get_text(self):
+ return element.get_text(self, 0)
+del get_text
+
+@method(AST.Expr)
+def eval(self):
+ raise NotImplementedException()
+@method(AST.Num)
+def eval(self):
+ return float(element.get_text(self, 0))
+@method(AST.Add)
+def eval(self):
+ return self[0].eval() + self[1].eval()
+@method(AST.Sub)
+def eval(self):
+ return self[0].eval() - self[1].eval()
+@method(AST.Mul)
+def eval(self):
+ return self[0].eval() * self[1].eval()
+@method(AST.Div)
+def eval(self):
+ return self[0].eval() / self[1].eval()
+@method(AST.Neg)
+def eval(self):
+ return -self[0].eval()
+del eval
-# Copyright (C) 2018 Nick Downing <nick@ndcode.org>
+# Copyright (C) 2019 Nick Downing <nick@ndcode.org>
# SPDX-License-Identifier: GPL-2.0-only
#
# This program is free software; you can redistribute it and/or modify it under
str_to_bool = {'false': False, 'true': True}
def deserialize_bool(text):
+ assert text is not None
return str_to_bool[text]
def serialize_int(value):
return str(value)
def deserialize_int(text):
+ assert text is not None
return int(text)
def serialize_ref(value, ref_list):
+ assert text is not None
if value is None:
ref = -1
else:
return str(ref)
def deserialize_ref(text, ref_list):
+ assert text is not None
ref = int(text)
return None if ref < 0 else ref_list[ref]
return value
def deserialize_str(text):
+ assert text is not None
return text
def serialize(value, fout, encoding = 'unicode'):