Add location tracking, syntax error and invalid character reporting
authorNick Downing <nick@ndcode.org>
Mon, 25 Dec 2023 08:18:18 +0000 (19:18 +1100)
committerNick Downing <nick@ndcode.org>
Mon, 25 Dec 2023 08:18:18 +0000 (19:18 +1100)
ansi_c.l
ansi_c.py
ansi_c.y

index 6d9ab61..e057770 100644 (file)
--- a/ansi_c.l
+++ b/ansi_c.l
@@ -34,6 +34,27 @@ S_CHAR (?E{t_def.Text}[^"\\\n]+)|{ESCAPE_SEQUENCE}
 %{
   import t_def
   import y_tab
+
+  def YY_USER_ACTION():
+    line = y_tab.yylloc.last_line
+    column = y_tab.yylloc.last_column
+    y_tab.yylloc.first_line = line
+    y_tab.yylloc.first_column = column
+    p0 = 0
+    p = 0
+    while p < len(yytext):
+      if yytext[p] == '\n':
+        line += 1
+        column = 1
+        p0 = p + 1
+      elif yytext[p] == '\t':
+        column += p - p0
+        column += 8 - ((column - 1) & 7)
+        p0 = p + 1
+      p += 1
+    column += p - p0
+    y_tab.yylloc.last_line = line
+    y_tab.yylloc.last_column = column
 %}
 
 %%
@@ -162,7 +183,10 @@ __func__                           return y_tab.STRING_LITERAL # revisit
 "?"                                    return ord('?')
 
 [ \t\v\n\f]+                           # whitespace separates tokens
-<INITIAL,COMMENT>.                     # discard bad characters
+<COMMENT>.|\n                          # discard bad characters
+<INITIAL>.|\n {
+  y_tab.yyerror(y_tab.yylloc, 'invalid character')
+}
 <<EOF>>                                        return 0
 
 %%
index d80261f..f67656a 100755 (executable)
--- a/ansi_c.py
+++ b/ansi_c.py
@@ -40,10 +40,11 @@ EXIT_FAILURE = 1
 if len(sys.argv) < 2:
   print(f'usage: {sys.argv[0]:s} program.i')
   sys.exit(EXIT_FAILURE)
-program_i = sys.argv[1]
+in_file = sys.argv[1]
 
-with open(program_i) as fin:
+with open(in_file) as fin:
   lex_yy.yyin = fin
+  y_tab.in_file = in_file
   y_tab.last_token = -1
   y_tab.doing_typedef = False
   y_tab.typedef_stack = [set()]
index 2245e84..9ba4734 100644 (file)
--- a/ansi_c.y
+++ b/ansi_c.y
 
 %{
   import base_type
+  import sys
   import t_def
 
+  # set this before calling yyparse(), for error messages
+  in_file = None
+
   # used to suppress TYPEDEF_NAME after '.', PTR_OP, STRUCT, UNION, ENUM token
   last_token = -1
 
@@ -1391,6 +1395,10 @@ typedef_name
 
 %%
 
+def yyerror(loc, msg):
+  print(f'{in_file:s}({loc.first_line:d},{loc.first_column:d}..{loc.last_line:d},{loc.last_column:d}): {msg:s}')
+  sys.exit(1)
+
 # helper factory to construct Expression object and analyze type and value
 def build(factory, *args, **kwargs):
   expression = factory(*args, **kwargs)