From a97690fc724a7beba77d7fde449ea56676804933 Mon Sep 17 00:00:00 2001 From: Anthony Van de Gejuchte Date: Mon, 13 Jun 2016 12:36:47 +0200 Subject: [PATCH] Various LineTerminator changes * Escaped newlines should also produce SyntaxError * Fix multiline comment parsing and add tests * Adapt makePredicate to handle \u2028 and \u2029 * Move up nlb check in regex so it's checked before any escape handling * Change error messages to conform ecma standard * Find_eol not recornizing \u2028 and \u2029 as line terminator * Remove \u180e as it is removed in unicode 6.3.0 from the category zs --- lib/parse.js | 46 +++++++++++++++------------------ lib/utils.js | 13 ++++++++-- test/mocha/comment.js | 50 ++++++++++++++++++++++++++++++++++++ test/mocha/line-endings.js | 6 ++++- test/mocha/string-literal.js | 2 +- 5 files changed, 87 insertions(+), 30 deletions(-) create mode 100644 test/mocha/comment.js diff --git a/lib/parse.js b/lib/parse.js index c7089b2d..bfbd14d5 100644 --- a/lib/parse.js +++ b/lib/parse.js @@ -107,7 +107,9 @@ var OPERATORS = makePredicate([ "||" ]); -var WHITESPACE_CHARS = makePredicate(characters(" \u00a0\n\r\t\f\u000b\u200b\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000\uFEFF")); +var WHITESPACE_CHARS = makePredicate(characters(" \u00a0\n\r\t\f\u000b\u200b\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000\uFEFF")); + +var NEWLINE_CHARS = makePredicate(characters("\n\r\u2028\u2029")); var PUNC_BEFORE_EXPRESSION = makePredicate(characters("[{(,.;:")); @@ -234,7 +236,7 @@ function tokenizer($TEXT, filename, html5_comments, shebang) { var ch = S.text.charAt(S.pos++); if (signal_eof && !ch) throw EX_EOF; - if ("\r\n\u2028\u2029".indexOf(ch) >= 0) { + if (NEWLINE_CHARS(ch)) { S.newline_before = S.newline_before || !in_string; ++S.line; S.col = 0; @@ -261,7 +263,7 @@ function tokenizer($TEXT, filename, html5_comments, shebang) { var text = S.text; for (var i = S.pos, n = S.text.length; i < n; ++i) { var ch = text[i]; - if (ch == '\n' || ch == '\r') + if (NEWLINE_CHARS(ch)) return i; } return -1; @@ -313,8 +315,7 @@ function tokenizer($TEXT, filename, html5_comments, shebang) { }; function skip_whitespace() { - var ch; - while (WHITESPACE_CHARS(ch = peek()) || ch == "\u2028" || ch == "\u2029") + while (WHITESPACE_CHARS(peek())) next(); }; @@ -352,7 +353,7 @@ function tokenizer($TEXT, filename, html5_comments, shebang) { if (!isNaN(valid)) { return token("num", valid); } else { - parse_error("Invalid syntax: " + num); + parse_error("SyntaxError: Invalid syntax: " + num); } }; @@ -400,18 +401,18 @@ function tokenizer($TEXT, filename, html5_comments, shebang) { for (; n > 0; --n) { var digit = parseInt(next(true), 16); if (isNaN(digit)) - parse_error("Invalid hex-character pattern in string"); + parse_error("SyntaxError: Invalid hex-character pattern in string"); num = (num << 4) | digit; } return num; }; - var read_string = with_eof_error("Unterminated string constant", function(quote_char){ + var read_string = with_eof_error("SyntaxError: Unterminated string constant", function(quote_char){ var quote = next(), ret = ""; for (;;) { var ch = next(true, true); if (ch == "\\") ch = read_escaped_char(true); - else if ("\r\n\u2028\u2029".indexOf(ch) >= 0) parse_error("Unterminated string constant"); + else if (NEWLINE_CHARS(ch)) parse_error("SyntaxError: Unterminated string constant"); else if (ch == quote) break; ret += ch; } @@ -436,21 +437,14 @@ function tokenizer($TEXT, filename, html5_comments, shebang) { return next_token; }; - var skip_multiline_comment = with_eof_error("Unterminated multiline comment", function(){ + var skip_multiline_comment = with_eof_error("SyntaxError: Unterminated multiline comment", function(){ var regex_allowed = S.regex_allowed; var i = find("*/", true); - var text = S.text.substring(S.pos, i).replace(/\r\n|\r/g, '\n'); - var a = text.split("\n"), n = a.length; + var text = S.text.substring(S.pos, i).replace(/\r\n|\r|\u2028|\u2029/g, '\n'); // update stream position - S.pos = i + 2; - S.line += n - 1; - if (n > 1) S.col = a[n - 1].length; - else S.col += a[n - 1].length; - S.col += 2; - var nlb = S.newline_before = S.newline_before || text.indexOf("\n") >= 0; + forward(text.length /* doesn't count \r\n as 2 char while S.pos - i does */ + 2); S.comments_before.push(token("comment2", text, true)); S.regex_allowed = regex_allowed; - S.newline_before = nlb; return next_token; }); @@ -463,9 +457,9 @@ function tokenizer($TEXT, filename, html5_comments, shebang) { else break; } else { - if (ch != "u") parse_error("Expecting UnicodeEscapeSequence -- uXXXX"); + if (ch != "u") parse_error("SyntaxError: Expecting UnicodeEscapeSequence -- uXXXX"); ch = read_escaped_char(); - if (!is_identifier_char(ch)) parse_error("Unicode char: " + ch.charCodeAt(0) + " is not valid in identifier"); + if (!is_identifier_char(ch)) parse_error("SyntaxError: Unicode char: " + ch.charCodeAt(0) + " is not valid in identifier"); name += ch; backslash = false; } @@ -477,9 +471,11 @@ function tokenizer($TEXT, filename, html5_comments, shebang) { return name; }; - var read_regexp = with_eof_error("Unterminated regular expression", function(regexp){ + var read_regexp = with_eof_error("SyntaxError: Unterminated regular expression", function(regexp){ var prev_backslash = false, ch, in_class = false; - while ((ch = next(true))) if (prev_backslash) { + while ((ch = next(true))) if (NEWLINE_CHARS(ch)) { + parse_error("SyntaxError: Unexpected line terminator"); + } else if (prev_backslash) { regexp += "\\" + ch; prev_backslash = false; } else if (ch == "[") { @@ -492,8 +488,6 @@ function tokenizer($TEXT, filename, html5_comments, shebang) { break; } else if (ch == "\\") { prev_backslash = true; - } else if ("\r\n\u2028\u2029".indexOf(ch) >= 0) { - parse_error("Unexpected line terminator"); } else { regexp += ch; } @@ -602,7 +596,7 @@ function tokenizer($TEXT, filename, html5_comments, shebang) { } break; } - parse_error("Unexpected character '" + ch + "'"); + parse_error("SyntaxError: Unexpected character '" + ch + "'"); }; next_token.context = function(nc) { diff --git a/lib/utils.js b/lib/utils.js index 78c6dbf7..8ef61936 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -227,10 +227,19 @@ function makePredicate(words) { } cats.push([words[i]]); } + function quote(word) { + return JSON.stringify(word).replace(/[\u2028\u2029]/g, function(s) { + switch (s) { + case "\u2028": return "\\u2028"; + case "\u2029": return "\\u2029"; + } + return s; + }); + } function compareTo(arr) { - if (arr.length == 1) return f += "return str === " + JSON.stringify(arr[0]) + ";"; + if (arr.length == 1) return f += "return str === " + quote(arr[0]) + ";"; f += "switch(str){"; - for (var i = 0; i < arr.length; ++i) f += "case " + JSON.stringify(arr[i]) + ":"; + for (var i = 0; i < arr.length; ++i) f += "case " + quote(arr[i]) + ":"; f += "return true}return false;"; } // When there are more than three length categories, an outer diff --git a/test/mocha/comment.js b/test/mocha/comment.js new file mode 100644 index 00000000..69cdb3d5 --- /dev/null +++ b/test/mocha/comment.js @@ -0,0 +1,50 @@ +var assert = require("assert"); +var uglify = require("../../"); + +describe("Comment", function() { + it("Should recognize eol of single line comments", function() { + var tests = [ + "//Some comment 1\n>", + "//Some comment 2\r>", + "//Some comment 3\r\n>", + "//Some comment 4\u2028>", + "//Some comment 5\u2029>" + ]; + + var fail = function(e) { + return e instanceof uglify.JS_Parse_Error && + e.message === "SyntaxError: Unexpected token: operator (>)" && + e.line === 2 && + e.col === 0; + } + + for (var i = 0; i < tests.length; i++) { + assert.throws(function() { + uglify.parse(tests[i], {fromString: true}) + }, fail, tests[i]); + } + }); + + it("Should update the position of a multiline comment correctly", function() { + var tests = [ + "/*Some comment 1\n\n\n*/\n>\n\n\n\n\n\n", + "/*Some comment 2\r\n\r\n\r\n*/\r\n>\n\n\n\n\n\n", + "/*Some comment 3\r\r\r*/\r>\n\n\n\n\n\n", + "/*Some comment 4\u2028\u2028\u2028*/\u2028>\n\n\n\n\n\n", + "/*Some comment 5\u2029\u2029\u2029*/\u2029>\n\n\n\n\n\n" + ]; + + var fail = function(e) { + return e instanceof uglify.JS_Parse_Error && + e.message === "SyntaxError: Unexpected token: operator (>)" && + e.line === 5 && + e.col === 0; + } + + for (var i = 0; i < tests.length; i++) { + assert.throws(function() { + uglify.parse(tests[i], {fromString: true}) + }, fail, tests[i]); + } + }); +}); diff --git a/test/mocha/line-endings.js b/test/mocha/line-endings.js index 3457dd70..ef46bccd 100644 --- a/test/mocha/line-endings.js +++ b/test/mocha/line-endings.js @@ -37,6 +37,10 @@ describe("line-endings", function() { "/\r/", "/\u2028/", "/\u2029/", + "/\\\n/", + "/\\\r/", + "/\\\u2028/", + "/\\\u2029/", "/someRandomTextLike[]()*AndThen\n/" ] var test = function(input) { @@ -46,7 +50,7 @@ describe("line-endings", function() { } var fail = function(e) { return e instanceof Uglify.JS_Parse_Error && - e.message === "Unexpected line terminator"; + e.message === "SyntaxError: Unexpected line terminator"; } for (var i = 0; i < inputs.length; i++) { assert.throws(test(inputs[i]), fail); diff --git a/test/mocha/string-literal.js b/test/mocha/string-literal.js index d427472f..fc4c4277 100644 --- a/test/mocha/string-literal.js +++ b/test/mocha/string-literal.js @@ -19,7 +19,7 @@ describe("String literals", function() { var error = function(e) { return e instanceof UglifyJS.JS_Parse_Error && - e.message === "Unterminated string constant"; + e.message === "SyntaxError: Unterminated string constant"; }; for (var input in inputs) { -- 2.34.1