From a97690fc724a7beba77d7fde449ea56676804933 Mon Sep 17 00:00:00 2001
From: Anthony Van de Gejuchte <anthonyvdgent@gmail.com>
Date: Mon, 13 Jun 2016 12:36:47 +0200
Subject: [PATCH] Various LineTerminator changes

* Escaped newlines should also produce SyntaxError
* Fix multiline comment parsing and add tests
* Adapt makePredicate to handle \u2028 and \u2029
* Move up nlb check in regex so it's checked before any escape handling
* Change error messages to conform ecma standard
* Find_eol not recornizing \u2028 and \u2029 as line terminator
* Remove \u180e as it is removed in unicode 6.3.0 from the category zs
---
 lib/parse.js                 | 46 +++++++++++++++------------------
 lib/utils.js                 | 13 ++++++++--
 test/mocha/comment.js        | 50 ++++++++++++++++++++++++++++++++++++
 test/mocha/line-endings.js   |  6 ++++-
 test/mocha/string-literal.js |  2 +-
 5 files changed, 87 insertions(+), 30 deletions(-)
 create mode 100644 test/mocha/comment.js

diff --git a/lib/parse.js b/lib/parse.js
index c7089b2d..bfbd14d5 100644
--- a/lib/parse.js
+++ b/lib/parse.js
@@ -107,7 +107,9 @@ var OPERATORS = makePredicate([
     "||"
 ]);
 
-var WHITESPACE_CHARS = makePredicate(characters(" \u00a0\n\r\t\f\u000b\u200b\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000\uFEFF"));
+var WHITESPACE_CHARS = makePredicate(characters(" \u00a0\n\r\t\f\u000b\u200b\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000\uFEFF"));
+
+var NEWLINE_CHARS = makePredicate(characters("\n\r\u2028\u2029"));
 
 var PUNC_BEFORE_EXPRESSION = makePredicate(characters("[{(,.;:"));
 
@@ -234,7 +236,7 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
         var ch = S.text.charAt(S.pos++);
         if (signal_eof && !ch)
             throw EX_EOF;
-        if ("\r\n\u2028\u2029".indexOf(ch) >= 0) {
+        if (NEWLINE_CHARS(ch)) {
             S.newline_before = S.newline_before || !in_string;
             ++S.line;
             S.col = 0;
@@ -261,7 +263,7 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
         var text = S.text;
         for (var i = S.pos, n = S.text.length; i < n; ++i) {
             var ch = text[i];
-            if (ch == '\n' || ch == '\r')
+            if (NEWLINE_CHARS(ch))
                 return i;
         }
         return -1;
@@ -313,8 +315,7 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
     };
 
     function skip_whitespace() {
-        var ch;
-        while (WHITESPACE_CHARS(ch = peek()) || ch == "\u2028" || ch == "\u2029")
+        while (WHITESPACE_CHARS(peek()))
             next();
     };
 
@@ -352,7 +353,7 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
         if (!isNaN(valid)) {
             return token("num", valid);
         } else {
-            parse_error("Invalid syntax: " + num);
+            parse_error("SyntaxError: Invalid syntax: " + num);
         }
     };
 
@@ -400,18 +401,18 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
         for (; n > 0; --n) {
             var digit = parseInt(next(true), 16);
             if (isNaN(digit))
-                parse_error("Invalid hex-character pattern in string");
+                parse_error("SyntaxError: Invalid hex-character pattern in string");
             num = (num << 4) | digit;
         }
         return num;
     };
 
-    var read_string = with_eof_error("Unterminated string constant", function(quote_char){
+    var read_string = with_eof_error("SyntaxError: Unterminated string constant", function(quote_char){
         var quote = next(), ret = "";
         for (;;) {
             var ch = next(true, true);
             if (ch == "\\") ch = read_escaped_char(true);
-            else if ("\r\n\u2028\u2029".indexOf(ch) >= 0) parse_error("Unterminated string constant");
+            else if (NEWLINE_CHARS(ch)) parse_error("SyntaxError: Unterminated string constant");
             else if (ch == quote) break;
             ret += ch;
         }
@@ -436,21 +437,14 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
         return next_token;
     };
 
-    var skip_multiline_comment = with_eof_error("Unterminated multiline comment", function(){
+    var skip_multiline_comment = with_eof_error("SyntaxError: Unterminated multiline comment", function(){
         var regex_allowed = S.regex_allowed;
         var i = find("*/", true);
-        var text = S.text.substring(S.pos, i).replace(/\r\n|\r/g, '\n');
-        var a = text.split("\n"), n = a.length;
+        var text = S.text.substring(S.pos, i).replace(/\r\n|\r|\u2028|\u2029/g, '\n');
         // update stream position
-        S.pos = i + 2;
-        S.line += n - 1;
-        if (n > 1) S.col = a[n - 1].length;
-        else S.col += a[n - 1].length;
-        S.col += 2;
-        var nlb = S.newline_before = S.newline_before || text.indexOf("\n") >= 0;
+        forward(text.length /* doesn't count \r\n as 2 char while S.pos - i does */ + 2);
         S.comments_before.push(token("comment2", text, true));
         S.regex_allowed = regex_allowed;
-        S.newline_before = nlb;
         return next_token;
     });
 
@@ -463,9 +457,9 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
                 else break;
             }
             else {
-                if (ch != "u") parse_error("Expecting UnicodeEscapeSequence -- uXXXX");
+                if (ch != "u") parse_error("SyntaxError: Expecting UnicodeEscapeSequence -- uXXXX");
                 ch = read_escaped_char();
-                if (!is_identifier_char(ch)) parse_error("Unicode char: " + ch.charCodeAt(0) + " is not valid in identifier");
+                if (!is_identifier_char(ch)) parse_error("SyntaxError: Unicode char: " + ch.charCodeAt(0) + " is not valid in identifier");
                 name += ch;
                 backslash = false;
             }
@@ -477,9 +471,11 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
         return name;
     };
 
-    var read_regexp = with_eof_error("Unterminated regular expression", function(regexp){
+    var read_regexp = with_eof_error("SyntaxError: Unterminated regular expression", function(regexp){
         var prev_backslash = false, ch, in_class = false;
-        while ((ch = next(true))) if (prev_backslash) {
+        while ((ch = next(true))) if (NEWLINE_CHARS(ch)) {
+            parse_error("SyntaxError: Unexpected line terminator");
+        } else if (prev_backslash) {
             regexp += "\\" + ch;
             prev_backslash = false;
         } else if (ch == "[") {
@@ -492,8 +488,6 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
             break;
         } else if (ch == "\\") {
             prev_backslash = true;
-        } else if ("\r\n\u2028\u2029".indexOf(ch) >= 0) {
-            parse_error("Unexpected line terminator");
         } else {
             regexp += ch;
         }
@@ -602,7 +596,7 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
             }
             break;
         }
-        parse_error("Unexpected character '" + ch + "'");
+        parse_error("SyntaxError: Unexpected character '" + ch + "'");
     };
 
     next_token.context = function(nc) {
diff --git a/lib/utils.js b/lib/utils.js
index 78c6dbf7..8ef61936 100644
--- a/lib/utils.js
+++ b/lib/utils.js
@@ -227,10 +227,19 @@ function makePredicate(words) {
             }
         cats.push([words[i]]);
     }
+    function quote(word) {
+        return JSON.stringify(word).replace(/[\u2028\u2029]/g, function(s) {
+            switch (s) {
+                case "\u2028": return "\\u2028";
+                case "\u2029": return "\\u2029";
+            }
+            return s;
+        });
+    }
     function compareTo(arr) {
-        if (arr.length == 1) return f += "return str === " + JSON.stringify(arr[0]) + ";";
+        if (arr.length == 1) return f += "return str === " + quote(arr[0]) + ";";
         f += "switch(str){";
-        for (var i = 0; i < arr.length; ++i) f += "case " + JSON.stringify(arr[i]) + ":";
+        for (var i = 0; i < arr.length; ++i) f += "case " + quote(arr[i]) + ":";
         f += "return true}return false;";
     }
     // When there are more than three length categories, an outer
diff --git a/test/mocha/comment.js b/test/mocha/comment.js
new file mode 100644
index 00000000..69cdb3d5
--- /dev/null
+++ b/test/mocha/comment.js
@@ -0,0 +1,50 @@
+var assert = require("assert");
+var uglify = require("../../");
+
+describe("Comment", function() {
+    it("Should recognize eol of single line comments", function() {
+        var tests = [
+            "//Some comment 1\n>",
+            "//Some comment 2\r>",
+            "//Some comment 3\r\n>",
+            "//Some comment 4\u2028>",
+            "//Some comment 5\u2029>"
+        ];
+
+        var fail = function(e) {
+            return e instanceof uglify.JS_Parse_Error &&
+                e.message === "SyntaxError: Unexpected token: operator (>)" &&
+                e.line === 2 &&
+                e.col === 0;
+        }
+
+        for (var i = 0; i < tests.length; i++) {
+            assert.throws(function() {
+                uglify.parse(tests[i], {fromString: true})
+            }, fail, tests[i]);
+        }
+    });
+
+    it("Should update the position of a multiline comment correctly", function() {
+        var tests = [
+            "/*Some comment 1\n\n\n*/\n>\n\n\n\n\n\n",
+            "/*Some comment 2\r\n\r\n\r\n*/\r\n>\n\n\n\n\n\n",
+            "/*Some comment 3\r\r\r*/\r>\n\n\n\n\n\n",
+            "/*Some comment 4\u2028\u2028\u2028*/\u2028>\n\n\n\n\n\n",
+            "/*Some comment 5\u2029\u2029\u2029*/\u2029>\n\n\n\n\n\n"
+        ];
+
+        var fail = function(e) {
+            return e instanceof uglify.JS_Parse_Error &&
+                e.message === "SyntaxError: Unexpected token: operator (>)" &&
+                e.line === 5 &&
+                e.col === 0;
+        }
+
+        for (var i = 0; i < tests.length; i++) {
+            assert.throws(function() {
+                uglify.parse(tests[i], {fromString: true})
+            }, fail, tests[i]);
+        }
+    });
+});
diff --git a/test/mocha/line-endings.js b/test/mocha/line-endings.js
index 3457dd70..ef46bccd 100644
--- a/test/mocha/line-endings.js
+++ b/test/mocha/line-endings.js
@@ -37,6 +37,10 @@ describe("line-endings", function() {
             "/\r/",
             "/\u2028/",
             "/\u2029/",
+            "/\\\n/",
+            "/\\\r/",
+            "/\\\u2028/",
+            "/\\\u2029/",
             "/someRandomTextLike[]()*AndThen\n/"
         ]
         var test = function(input) {
@@ -46,7 +50,7 @@ describe("line-endings", function() {
         }
         var fail = function(e) {
             return e instanceof Uglify.JS_Parse_Error &&
-                e.message === "Unexpected line terminator";
+                e.message === "SyntaxError: Unexpected line terminator";
         }
         for (var i = 0; i < inputs.length; i++) {
             assert.throws(test(inputs[i]), fail);
diff --git a/test/mocha/string-literal.js b/test/mocha/string-literal.js
index d427472f..fc4c4277 100644
--- a/test/mocha/string-literal.js
+++ b/test/mocha/string-literal.js
@@ -19,7 +19,7 @@ describe("String literals", function() {
 
         var error = function(e) {
             return e instanceof UglifyJS.JS_Parse_Error &&
-                e.message === "Unterminated string constant";
+                e.message === "SyntaxError: Unterminated string constant";
         };
 
         for (var input in inputs) {
-- 
2.34.1