Fix location tracking bugs (i) cursor was not initialized with input file name, ...

author Nick Downing <nick@ndcode.org>

Sun, 24 Dec 2023 23:39:25 +0000 (10:39 +1100)

committer Nick Downing <nick@ndcode.org>

Mon, 25 Dec 2023 05:38:15 +0000 (16:38 +1100)
author Nick Downing <nick@ndcode.org>
Sun, 24 Dec 2023 23:39:25 +0000 (10:39 +1100)
committer Nick Downing <nick@ndcode.org>
Mon, 25 Dec 2023 05:38:15 +0000 (16:38 +1100)
diff --git a/ndcode/piyacc/cli.py b/ndcode/piyacc/cli.py

index 3047cef..42bc2c9 100755 (executable)
--- a/ndcode/piyacc/cli.py
+++ b/ndcode/piyacc/cli.py
@@ -94,6 +94,7 @@ Runs the following steps:
        from ndcode.piyacc import y_tab
        state.infilename = in_file
        lex_yy.yyin = fin
+      lex_yy.scanner_cursor = state.Boundary(in_file, 1, 1)
        _ast = y_tab.yyparse(t_def.AST)
    #element.serialize(_ast, 'a.xml', 'utf-8')
    #_ast = element.deserialize('a.xml', t_def.factory, 'utf-8')
diff --git a/ndcode/piyacc/parse-gram.y b/ndcode/piyacc/parse-gram.y

index ec9a945..87f1c97 100644 (file)
--- a/ndcode/piyacc/parse-gram.y
+++ b/ndcode/piyacc/parse-gram.y
@@ -24,7 +24,7 @@
    from ndcode.piyacc import state
    from ndcode.piyacc import t_def
  
-  YYLTYPE = state.location
+  YYLTYPE = state.Location
  }
  
  %code
diff --git a/ndcode/piyacc/scan-gram.l b/ndcode/piyacc/scan-gram.l

index 591b24c..f864923 100644 (file)
--- a/ndcode/piyacc/scan-gram.l
+++ b/ndcode/piyacc/scan-gram.l
@@ -24,14 +24,14 @@
    from ndcode.piyacc import t_def
    from ndcode.piyacc import y_tab
  
-  scanner_cursor = state.boundary()
+  scanner_cursor = state.Boundary()
    def YY_USER_ACTION():
      state.location_compute(y_tab.yylloc, scanner_cursor, yytext)
  
    gram_last_string = ''
    bracketed_id_str = None
-  bracketed_id_loc = 0
-  bracketed_id_start = 0
+  bracketed_id_loc = None
+  bracketed_id_start = None
    bracketed_id_context_state = -1
  
    obstack_for_string = []
@@ -45,10 +45,10 @@
    # these should be yylex()-local, but moved to here, see further down:
    nesting = 0
    context_state = -1
-  id_loc = state.location()
+  id_loc = None
    code_start = None
    token_start = None
-  scanner_cursor = state.boundary('<stdin>', 0, 0)
+  scanner_cursor = state.Boundary('<stdin>', 1, 1)
  %}
  
  %x SC_YACC_COMMENT
@@ -83,7 +83,7 @@ eqopt    ([[:space:]]*=)?
    # from an action since the action functions are not nested to yylex():
    #nesting = 0
    #context_state = 0
-  #id_loc = state.location()
+  #id_loc = None
    #code_start = scanner_cursor.copy()
    #token_start = scanner_cursor.copy()
    #first = True
@@ -100,7 +100,7 @@ eqopt    ([[:space:]]*=)?
    "//".*                       #continue
    "/*" {
      global token_start, context_state
-    token_start = y_tab.yylloc.start
+    token_start = y_tab.yylloc.start.copy()
      context_state = YY_START()
      BEGIN(SC_YACC_COMMENT)
    }
@@ -253,7 +253,7 @@ eqopt    ([[:space:]]*=)?
    (?E{t_def.AST.ID}{id}) {
      global id_loc, bracketed_id_str
      y_tab.yylval = yytext
-    id_loc = y_tab.yylloc
+    id_loc = y_tab.yylloc.copy()
      bracketed_id_str = None
      markup_push(element.Element)
      BEGIN(SC_AFTER_IDENTIFIER)
@@ -275,7 +275,7 @@ eqopt    ([[:space:]]*=)?
    /* Characters.  */
    "'" {
      global token_start
-    token_start = y_tab.yylloc.start
+    token_start = y_tab.yylloc.start.copy()
      BEGIN(SC_ESCAPED_CHARACTER)
      markup_push(element.Element)
      markup_push(t_def.AST.Char)
@@ -286,7 +286,7 @@ eqopt    ([[:space:]]*=)?
    /* Strings. */
    "\"" {
      global token_start
-    token_start = y_tab.yylloc.start
+    token_start = y_tab.yylloc.start.copy()
      BEGIN(SC_ESCAPED_STRING)
      markup_push(element.Element)
      markup_push(t_def.AST.String)
@@ -297,7 +297,7 @@ eqopt    ([[:space:]]*=)?
    /* Prologue. */
    "%{" {
      global code_start
-    code_start = y_tab.yylloc.start
+    code_start = y_tab.yylloc.start.copy()
      BEGIN(SC_PROLOGUE)
      markup_push(element.Element)
      markup_push(t_def.AST.Section1.Prologue)
@@ -310,7 +310,7 @@ eqopt    ([[:space:]]*=)?
      global nesting, code_start
      obstack_for_string.append(yytext)
      nesting = 0
-    code_start = y_tab.yylloc.start
+    code_start = y_tab.yylloc.start.copy()
      BEGIN(SC_BRACED_CODE)
      markup_push(element.Element)
      # new way, includes braces, wrapped by <AST_Production_Action> later
@@ -327,7 +327,7 @@ eqopt    ([[:space:]]*=)?
    "%?"[ \f\n\t\v]*"{" {
      global nesting, code_start
      nesting = 0
-    code_start = y_tab.yylloc.start
+    code_start = y_tab.yylloc.start.copy()
      BEGIN(SC_PREDICATE)
      markup_push(element.Element)
      markup_push(t_def.AST.BracedPredicate)
@@ -340,7 +340,7 @@ eqopt    ([[:space:]]*=)?
      global nesting, code_start
      obstack_for_string.append(yytext)
      nesting = 0
-    code_start = y_tab.yylloc.start
+    code_start = y_tab.yylloc.start.copy()
      BEGIN(SC_ELEMENT_GROUP)
      markup_push(element.Element)
      markup_flush(len(yytext))
@@ -358,7 +358,7 @@ eqopt    ([[:space:]]*=)?
    "<" {
      global nesting, token_start
      nesting = 0
-    token_start = y_tab.yylloc.start
+    token_start = y_tab.yylloc.start.copy()
      BEGIN(SC_TAG)
      markup_push(element.Element)
      markup_push(t_def.AST.TagRef)
@@ -377,7 +377,7 @@ eqopt    ([[:space:]]*=)?
    "[" {
      global bracketed_id_str, bracketed_id_start, bracketed_id_context_state
      bracketed_id_str = None
-    bracketed_id_start = y_tab.yylloc.start
+    bracketed_id_start = y_tab.yylloc.start.copy()
      bracketed_id_context_state = YY_START()
      BEGIN(SC_BRACKETED_ID)
    }
@@ -411,45 +411,70 @@ eqopt    ([[:space:]]*=)?
  {
    "[" {
      global bracketed_id_start, bracketed_id_context_state
-    if bracketed_id_str is not None:
-      scanner_cursor.column -= len(yytext)
-      markup_yyless(0)
-      markup_pop_token() # element.Element
-      BEGIN(SC_RETURN_BRACKETED_ID)
-      y_tab.yylloc = id_loc
-      return y_tab.ID
-    else:
-      markup_pop_token() # element.Element
-      bracketed_id_start = y_tab.yylloc.start
-      bracketed_id_context_state = YY_START()
-      BEGIN(SC_BRACKETED_ID)
+    # at this point bracketed_id_str has to be None
+    #if bracketed_id_str is not None:
+    #  scanner_cursor.column -= len(yytext)
+    #  markup_yyless(0)
+    #  markup_pop_token() # element.Element
+    #  BEGIN(SC_RETURN_BRACKETED_ID)
+    #  y_tab.yylloc = id_loc.copy()
+    #  return y_tab.ID
+    #else:
+    #  markup_pop_token() # element.Element
+    #  bracketed_id_start = y_tab.yylloc.start.copy()
+    #  bracketed_id_context_state = YY_START()
+    #  BEGIN(SC_BRACKETED_ID)
+    assert bracketed_id_str is None
+    markup_pop_token() # element.Element
+    bracketed_id_start = y_tab.yylloc.start.copy()
+    bracketed_id_context_state = YY_START()
+    BEGIN(SC_BRACKETED_ID)
    }
    ":" {
      markup_pop_token() # element.Element
-    BEGIN(SC_RETURN_BRACKETED_ID if bracketed_id_str else INITIAL)
-    y_tab.yylloc = id_loc
+    # at this point bracketed_id_str has to be None
+    #BEGIN(SC_RETURN_BRACKETED_ID if bracketed_id_str is not None else INITIAL)
+    assert bracketed_id_str is None
+    BEGIN(INITIAL)
+    y_tab.yylloc = id_loc.copy()
      markup_flush(len(yytext))
      return y_tab.ID_COLON
    }
    . {
+    global scanner_cursor
+
      scanner_cursor.column -= len(yytext)
      markup_yyless(0)
  
-    # total kludge: put back all whitespace/comments after the ID, and rescan
-    # (this will mess up the position tracking, need to revisit and fix later)
+    # put back all whitespace/comments after the ID, it will be rescanned
      assert len(yy_element_space.text) == len(yy_element_space.children) + 1
      unput(yy_element_space.text[-1])
      yy_element_space.text[-1] = ''
+    scanner_cursor = id_loc.end.copy()
  
      markup_pop_token() # element.Element
-    BEGIN(SC_RETURN_BRACKETED_ID if bracketed_id_str else INITIAL)
-    y_tab.yylloc = id_loc
+    # at this point bracketed_id_str has to be None
+    #BEGIN(SC_RETURN_BRACKETED_ID if bracketed_id_str is not None else INITIAL)
+    assert bracketed_id_str is None
+    BEGIN(INITIAL)
+    y_tab.yylloc = id_loc.copy()
      return y_tab.ID
    }
    <<EOF>> {
+    global scanner_cursor
+
+    # put back all whitespace/comments after the ID, it will be rescanned
+    assert len(yy_element_space.text) == len(yy_element_space.children) + 1
+    unput(yy_element_space.text[-1])
+    yy_element_space.text[-1] = ''
+    scanner_cursor = id_loc.end.copy()
+
      markup_pop_token() # element.Element
-    BEGIN(SC_RETURN_BRACKETED_ID if bracketed_id_str else INITIAL)
-    y_tab.yylloc = id_loc
+    # at this point bracketed_id_str has to be None
+    #BEGIN(SC_RETURN_BRACKETED_ID if bracketed_id_str is not None else INITIAL)
+    assert bracketed_id_str is None
+    BEGIN(INITIAL)
+    y_tab.yylloc = id_loc.copy()
      return y_tab.ID
    }
  }
@@ -466,7 +491,7 @@ eqopt    ([[:space:]]*=)?
        state.complain(y_tab.yylloc, state.complaint, 'unexpected identifier in bracketed name: {0:s}'.format(state.quote(yytext)))
      else:
        bracketed_id_str = yytext
-      bracketed_id_loc = y_tab.yylloc
+      bracketed_id_loc = y_tab.yylloc.copy()
    }
    "]" {
      global bracketed_id_str
@@ -475,7 +500,7 @@ eqopt    ([[:space:]]*=)?
        if INITIAL == bracketed_id_context_state:
          y_tab.yylval = bracketed_id_str
          bracketed_id_str = None
-        y_tab.yylloc = bracketed_id_loc
+        y_tab.yylloc = bracketed_id_loc.copy()
          return y_tab.BRACKETED_ID
      else:
        state.complain(y_tab.yylloc, state.complaint, 'an identifier expected')
@@ -497,7 +522,7 @@ eqopt    ([[:space:]]*=)?
      markup_yyless(0)
      y_tab.yylval = bracketed_id_str
      bracketed_id_str = None
-    y_tab.yylloc = bracketed_id_loc
+    y_tab.yylloc = bracketed_id_loc.copy()
      BEGIN(INITIAL)
      return y_tab.BRACKETED_ID
    }
@@ -562,7 +587,7 @@ eqopt    ([[:space:]]*=)?
      global gram_last_string
      gram_last_string = ''.join(obstack_for_string)
      del obstack_for_string[:] # not strictly correct
-    y_tab.yylloc.start = token_start
+    y_tab.yylloc.start = token_start.copy()
      y_tab.yylval = gram_last_string
      BEGIN(INITIAL)
      markup_pop() # t_def.AST.Text
@@ -586,7 +611,7 @@ eqopt    ([[:space:]]*=)?
      global gram_last_string
      gram_last_string = ''.join(obstack_for_string)
      del obstack_for_string[:] # not strictly correct
-    y_tab.yylloc.start = token_start
+    y_tab.yylloc.start = token_start.copy()
      if len(gram_last_string) == 0:
        state.complain(y_tab.yylloc, state.Wother, 'empty character literal')
        y_tab.yylval = ord('\'')
@@ -620,7 +645,7 @@ eqopt    ([[:space:]]*=)?
      if nesting < 0:
        gram_last_string = ''.join(obstack_for_string)
        del obstack_for_string[:] # not strictly correct
-      y_tab.yylloc.start = token_start
+      y_tab.yylloc.start = token_start.copy()
        y_tab.yylval = gram_last_string
        #del obstack_for_string[:]
        BEGIN(INITIAL)
@@ -749,21 +774,21 @@ eqopt    ([[:space:]]*=)?
      global context_state, token_start
      obstack_for_string.append(yytext)
      context_state = YY_START()
-    token_start = y_tab.yylloc.start
+    token_start = y_tab.yylloc.start.copy()
      BEGIN(SC_CHARACTER)
    }
    "\"" {
      global context_state, token_start
      obstack_for_string.append(yytext)
      context_state = YY_START()
-    token_start = y_tab.yylloc.start
+    token_start = y_tab.yylloc.start.copy()
      BEGIN(SC_STRING)
    }
    "/"{splice}"*" {
      global context_state, token_start
      obstack_for_string.append(yytext)
      context_state = YY_START()
-    token_start = y_tab.yylloc.start
+    token_start = y_tab.yylloc.start.copy()
      BEGIN(SC_COMMENT)
    }
    "/"{splice}"/" {
@@ -811,7 +836,7 @@ eqopt    ([[:space:]]*=)?
      if nesting < 0:
        gram_last_string = ''.join(obstack_for_string)
        del obstack_for_string[:] # not strictly correct
-      y_tab.yylloc.start = code_start
+      y_tab.yylloc.start = code_start.copy()
        y_tab.yylval = gram_last_string
        BEGIN(INITIAL)
        # new way, includes braces, wrapped by <AST_Production_Action> later
@@ -835,7 +860,7 @@ eqopt    ([[:space:]]*=)?
      if nesting < 0:
        gram_last_string = ''.join(obstack_for_string)
        del obstack_for_string[:] # not strictly correct
-      y_tab.yylloc.start = code_start
+      y_tab.yylloc.start = code_start.copy()
        y_tab.yylval = gram_last_string
        BEGIN(INITIAL)
        markup_pop() # t_def.AST.Text
@@ -858,7 +883,7 @@ eqopt    ([[:space:]]*=)?
      if nesting < 0:
        gram_last_string = ''.join(obstack_for_string)
        del obstack_for_string[:] # not strictly correct
-      y_tab.yylloc.start = code_start
+      y_tab.yylloc.start = code_start.copy()
        y_tab.yylval = gram_last_string
        #del obstack_for_string[:]
        BEGIN(INITIAL)
@@ -879,7 +904,7 @@ eqopt    ([[:space:]]*=)?
      global gram_last_string
      gram_last_string = ''.join(obstack_for_string)
      del obstack_for_string[:] # not strictly correct
-    y_tab.yylloc.start = code_start
+    y_tab.yylloc.start = code_start.copy()
      y_tab.yylval = gram_last_string
      BEGIN(INITIAL)
      markup_pop() # t_def.AST.Text
@@ -904,7 +929,7 @@ eqopt    ([[:space:]]*=)?
      global gram_last_string
      gram_last_string = ''.join(obstack_for_string)
      del obstack_for_string[:] # not strictly correct
-    y_tab.yylloc.start = code_start
+    y_tab.yylloc.start = code_start.copy()
      y_tab.yylval = gram_last_string
      BEGIN(INITIAL)
      return y_tab.EPILOGUE
@@ -976,7 +1001,7 @@ def convert_ucn_to_byte(ucn):
  #  boundary_set(&scanner_cursor, current_file, lineno, 1)
  
  def unexpected_end(start, msg, token_end):
-  loc = state.location(start.copy(), scanner_cursor.copy())
+  loc = state.Location(start.copy(), scanner_cursor.copy())
    scanner_cursor.column -= len(token_end)
    unput(token_end)
    token_end = state.quote(token_end)
@@ -1002,18 +1027,24 @@ def unexpected_newline(start, token_end):
  # recognized by several iterations of yylex(), it would be better to
  # try to use more complex regular expressions to match all in one go:
  
+# longer string to return to user is built up in yy_element_space, by
+# moving each piece matched from yy_element_token to yy_element_space
+
+# move first n characters matched in yy_element_token to yy_element_space
  def markup_flush(n):
    assert len(yy_element_space.text) == len(yy_element_space.children) + 1
    assert len(yy_element_token.text) == len(yy_element_token.children) + 1
    yy_element_space.text[-1] += yy_element_token.text[0][:n]
    yy_element_token.text[0] = yy_element_token.text[0][n:]
  
+# put back last n characters matched in yy_element_token onto the input
  def markup_yyless(n):
    assert len(yy_element_space.text) == len(yy_element_space.children) + 1
    assert len(yy_element_token.text) == len(yy_element_token.children) + 1
    yyless(n)
    yy_element_token.text[0] = yy_element_token.text[0][:n]
  
+# mark the current position in yy_element_space for insertion of markup
  def markup_push(factory, *args, **kwargs):
    global yy_element_space
    assert len(yy_element_space.text) == len(yy_element_space.children) + 1
@@ -1021,6 +1052,7 @@ def markup_push(factory, *args, **kwargs):
    markup_stack.append(yy_element_space)
    yy_element_space = factory(*args, **kwargs)
  
+# insert markup in yy_element_space between marked and current position
  def markup_pop():
    global yy_element_space
    assert len(yy_element_space.text) == len(yy_element_space.children) + 1
@@ -1030,6 +1062,9 @@ def markup_pop():
    yy_element_space.children.append(_element)
    yy_element_space.text.append('')
  
+# similar to markup_flush() followed by markup_pop(), but leaves the
+# marked-up item in yy_element_token to be returned to yylex() caller
+# note: requires previous push call to be markup_push(element.Element)
  def markup_pop_token():
    global yy_element_space, yy_element_token
  
diff --git a/ndcode/piyacc/skel/skel_py.py b/ndcode/piyacc/skel/skel_py.py

index 9883bcf..3a263ab 100644 (file)
--- a/ndcode/piyacc/skel/skel_py.py
+++ b/ndcode/piyacc/skel/skel_py.py
@@ -30,10 +30,10 @@ import lex_yy
  class YYLTYPE:
    def __init__(
      self,
-    first_line = 0,
-    first_column = 0,
-    last_line = 0,
-    last_column = 0
+    first_line = 1,
+    first_column = 1,
+    last_line = 1,
+    last_column = 1
    ):
      self.first_line = first_line
      self.first_column = first_column
diff --git a/ndcode/piyacc/skel/skel_py_element.py b/ndcode/piyacc/skel/skel_py_element.py

index 4750ca9..40726b2 100644 (file)
--- a/ndcode/piyacc/skel/skel_py_element.py
+++ b/ndcode/piyacc/skel/skel_py_element.py
@@ -32,10 +32,10 @@ import lex_yy
  class YYLTYPE:
    def __init__(
      self,
-    first_line = 0,
-    first_column = 0,
-    last_line = 0,
-    last_column = 0
+    first_line = 1,
+    first_column = 1,
+    last_line = 1,
+    last_column = 1
    ):
      self.first_line = first_line
      self.first_column = first_column
diff --git a/ndcode/piyacc/state.py b/ndcode/piyacc/state.py

index e395abb..0286ba4 100644 (file)
--- a/ndcode/piyacc/state.py
+++ b/ndcode/piyacc/state.py
@@ -17,23 +17,23 @@
  import sys
  
  # miscellaneous state accessed by scan-gram.l and parse-gram.y
-class boundary:
+class Boundary:
    def __init__(self, file = '<stdin>', line = 1, column = 1):
      self.file = file
      self.line = line
      self.column = column
    def copy(self):
-    return boundary(self.file, self.line, self.column)
+    return Boundary(self.file, self.line, self.column)
  
  def equal_boundaries(a, b):
    return a.column == b.column and a.line == b.line and a.file == b.file
  
-class location:
+class Location:
    def __init__(self, start = None, end = None):
-    self.start = boundary() if start is None else start
-    self.end = boundary() if end is None else end
+    self.start = Boundary() if start is None else start
+    self.end = Boundary() if end is None else end
    def copy(self):
-    return location(self.start.copy(), self.end.copy())
+    return Location(self.start.copy(), self.end.copy())
  
  warning_midrule_values = 0
  warning_yacc = 1
diff --git a/skel_y_tab.py b/skel_y_tab.py

index 4f8b0bb..4f7d953 100644 (file)
--- a/skel_y_tab.py
+++ b/skel_y_tab.py
@@ -32,10 +32,10 @@ from ndcode.piyacc import lex_yy
  class YYLTYPE:
    def __init__(
      self,
-    first_line = 0,
-    first_column = 0,
-    last_line = 0,
-    last_column = 0
+    first_line = 1,
+    first_column = 1,
+    last_line = 1,
+    last_column = 1
    ):
      self.first_line = first_line
      self.first_column = first_column
author	Nick Downing <nick@ndcode.org>
	Sun, 24 Dec 2023 23:39:25 +0000 (10:39 +1100)
committer	Nick Downing <nick@ndcode.org>
	Mon, 25 Dec 2023 05:38:15 +0000 (16:38 +1100)
ndcode/piyacc/cli.py		patch \| blob \| history
ndcode/piyacc/parse-gram.y		patch \| blob \| history
ndcode/piyacc/scan-gram.l		patch \| blob \| history
ndcode/piyacc/skel/skel_py.py		patch \| blob \| history
ndcode/piyacc/skel/skel_py_element.py		patch \| blob \| history
ndcode/piyacc/state.py		patch \| blob \| history
skel_y_tab.py		patch \| blob \| history