Improve error handling in Python parser

author Nick Downing <nick@ndcode.org>

Sun, 20 Jan 2019 04:03:10 +0000 (15:03 +1100)

committer Nick Downing <nick@ndcode.org>

Sun, 20 Jan 2019 04:03:10 +0000 (15:03 +1100)
author Nick Downing <nick@ndcode.org>
Sun, 20 Jan 2019 04:03:10 +0000 (15:03 +1100)
committer Nick Downing <nick@ndcode.org>
Sun, 20 Jan 2019 04:03:10 +0000 (15:03 +1100)
diff --git a/bootstrap/scan-gram.l b/bootstrap/scan-gram.l

index 9419da7..84aee8a 100644 (file)
--- a/bootstrap/scan-gram.l
+++ b/bootstrap/scan-gram.l
@@ -248,7 +248,7 @@ eqopt    ([[:space:]]*=)?
      unput('%token-table')
    }
  
-  "%"{id}                      state.complain(y_tab.yylloc, state.complaint, 'invalid directive: {0:s}'.format(quote(yytext)))
+  "%"{id}                      state.complain(y_tab.yylloc, state.complaint, 'invalid directive: {0:s}'.format(state.quote(yytext)))
  
    "="                          return y_tab.EQUAL
    "|"                          return y_tab.PIPE
@@ -278,7 +278,7 @@ eqopt    ([[:space:]]*=)?
  
    /* Identifiers may not start with a digit.  Yet, don't silently
       accept "1FOO" as "1 FOO".  */
-  {int}{id}                    state.complain(y_tab.yylloc, state.complaint, 'invalid identifier: %s', quote(yytext))
+  {int}{id}                    state.complain(y_tab.yylloc, state.complaint, 'invalid identifier: {0:s}'.format(state.quote(yytext)))
  
    /* Characters.  */
    "'" {
@@ -394,7 +394,7 @@ eqopt    ([[:space:]]*=)?
      BEGIN(SC_BRACKETED_ID)
    }
  
-  [^\[%A-Za-z0-9_<>{}\"\'*;|=/, \f\n\t\v]+|. state.complain(y_tab.yylloc, state.complaint, '%s: %s', ngettext('invalid character', 'invalid characters', gram_leng), quote_mem(yytext, gram_leng))
+  [^\[%A-Za-z0-9_<>{}\"\'*;|=/, \f\n\t\v]+|. state.complain(y_tab.yylloc, state.complaint, '{0:s}: {1:s}'.format('invalid character' if len(yytext) == 1 else 'invalid characters', state.quote(yytext)))
  
    <<EOF>> {
      y_tab.yylloc.start = scanner_cursor.copy()
@@ -463,7 +463,7 @@ eqopt    ([[:space:]]*=)?
    {id} {
      global bracketed_id_str, bracketed_id_loc
      if bracketed_id_str is not None:
-      state.complain(y_tab.yylloc, state.complaint, 'unexpected identifier in bracketed name: %s', quote(yytext))
+      state.complain(y_tab.yylloc, state.complaint, 'unexpected identifier in bracketed name: {0:s}'.format(state.quote(yytext)))
      else:
        bracketed_id_str = yytext
        bracketed_id_loc = y_tab.yylloc
@@ -481,7 +481,7 @@ eqopt    ([[:space:]]*=)?
        state.complain(y_tab.yylloc, state.complaint, 'an identifier expected')
    }
  
-  [^\].A-Za-z0-9_/ \f\n\t\v]+|.        state.complain(y_tab.yylloc, state.complaint, '{0:s}: {1:s}'.format('invalid character in bracketed name' if gram_leng == 1 else 'invalid characters in bracketed name', quote_mem(yytext, gram_leng)))
+  [^\].A-Za-z0-9_/ \f\n\t\v]+|.        state.complain(y_tab.yylloc, state.complaint, '{0:s}: {1:s}'.format('invalid character in bracketed name' if len(yytext) == 1 else 'invalid characters in bracketed name', state.quote(yytext)))
  
    <<EOF>> {
      BEGIN(bracketed_id_context_state)
@@ -636,7 +636,7 @@ eqopt    ([[:space:]]*=)?
    "<"+ {
      global nesting
      obstack_for_string.append(yytext)
-    nesting += gram_leng
+    nesting += len(yytext)
    }
  
    <<EOF>>                      unexpected_eof(token_start, '>')
@@ -651,7 +651,7 @@ eqopt    ([[:space:]]*=)?
    \\[0-7]{1,3} {
      c = strtoul(yytext + 1, None, 8)
      if not c or 0x7f * 2 + 1 < c:
-      state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: %s', yytext + 1)
+      state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: {0:s}'.format(yytext[1:]))
      else:
        obstack_for_string.append(chr(c))
        rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', int(c))
@@ -663,7 +663,7 @@ eqopt    ([[:space:]]*=)?
    \\x[0-9abcdefABCDEF]+ {
      c = strtoul(yytext + 2, None, 16)
      if not c or 0x7f * 2 + 1 < c:
-      state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: %s', yytext + 1)
+      state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: {0:s}'.format(yytext[1:]))
      else:
        obstack_for_string.append(chr(c))
        rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', int(c))
@@ -727,7 +727,7 @@ eqopt    ([[:space:]]*=)?
    \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
      c = convert_ucn_to_byte(yytext)
      if c <= 0:
-      state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: %s', yytext + 1)
+      state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: {0:s}'.format(yytext[1:]))
      else:
        obstack_for_string.append(chr(c))
        rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', c)
@@ -736,12 +736,12 @@ eqopt    ([[:space:]]*=)?
        gram_piece_append('</AST_Text_Escape>')
    }
    \\(.|\n) {
-    p = yytext + 1
-    if c_isspace(int(*p)) and c_isprint(int(*p)):
-      p = quote(p)
-    else:
-      p = quotearg_style_mem(escape_quoting_style, p, 1)
-    state.complain(y_tab.yylloc, state.complaint, 'invalid character after \\-escape: %s', p)
+    p = yytext[1:]
+    if True: #c_isspace(int(*p)) and c_isprint(int(*p)):
+      p = state.quote(p)
+    #else:
+    #  p = quotearg_style_mem(escape_quoting_style, p, 1)
+    state.complain(y_tab.yylloc, state.complaint, 'invalid character after \\-escape: {0:s}'.format(p))
    }
  }
  
@@ -985,7 +985,7 @@ eqopt    ([[:space:]]*=)?
  def scan_integer(number, base, loc):
    num = int(number, base)
    if 0x7fffffff < num:
-    state.complain(y_tab.yylloc, state.complaint, 'integer out of range: %s', quote(number))
+    state.complain(y_tab.yylloc, state.complaint, 'integer out of range: {0:s}'.format(state.quote(number)))
      num = 0x7fffffff
    return num
  
@@ -1009,18 +1009,18 @@ def scan_integer(number, base, loc):
  
  def unexpected_end(start, msg, token_end):
    loc = state.location(start.copy(), scanner_cursor.copy())
-  scanner_cursor.column -= i
+  scanner_cursor.column -= len(token_end)
    unput(token_end)
-  token_end = quote(token_end)
+  token_end = state.quote(token_end)
    if token_end == '\'\\\'\'':
      token_end = '"\'"'
-  state.complain(y_tab.yylloc, state.complaint, msg, token_end)
+  state.complain(y_tab.yylloc, state.complaint, msg.format(token_end))
  
  def unexpected_eof(start, token_end):
-  unexpected_end(start, 'missing {0:s} at end of file'.format(token_end))
+  unexpected_end(start, 'missing {0:s} at end of file', token_end)
  
  def unexpected_newline(start, token_end):
-  unexpected_end(start, 'missing {0:s} at end of line'.format(token_end))
+  unexpected_end(start, 'missing {0:s} at end of line', token_end)
  
  #def gram_scanner_initialize():
  #  global obstack_for_string
diff --git a/bootstrap/state.py b/bootstrap/state.py

index f4eb74c..a87299a 100644 (file)
--- a/bootstrap/state.py
+++ b/bootstrap/state.py
@@ -72,3 +72,6 @@ precedence_assoc = 4
  
  destructor = 0
  printer = 1
+
+def quote(str):
+  return '"{0:s}"'.format(str.replace('\\', '\\\\').replace('"', '\\"'))
author	Nick Downing <nick@ndcode.org>
	Sun, 20 Jan 2019 04:03:10 +0000 (15:03 +1100)
committer	Nick Downing <nick@ndcode.org>
	Sun, 20 Jan 2019 04:03:10 +0000 (15:03 +1100)
bootstrap/scan-gram.l		patch \| blob \| history
bootstrap/state.py		patch \| blob \| history