From 4d7746baf31405427209de0d8c44d9c8263a2563 Mon Sep 17 00:00:00 2001
From: Anthony Van de Gejuchte <anthonyvdgent@gmail.com>
Date: Fri, 20 May 2016 10:25:35 +0200
Subject: [PATCH] Throw errors in strict mode for octal strings

Adds a directive tracker for the parser/tokenizer to
allow parsing depending on directive context.
---
 lib/parse.js                 | 58 +++++++++++++++++++++++++++++-----
 test/mocha/directives.js     | 61 ++++++++++++++++++++++++++++++++++++
 test/mocha/string-literal.js | 47 +++++++++++++++++++++++++++
 3 files changed, 159 insertions(+), 7 deletions(-)
 create mode 100644 test/mocha/directives.js

diff --git a/lib/parse.js b/lib/parse.js
index 467fc60b..4530c2d9 100644
--- a/lib/parse.js
+++ b/lib/parse.js
@@ -223,7 +223,9 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
         tokcol          : 0,
         newline_before  : false,
         regex_allowed   : false,
-        comments_before : []
+        comments_before : [],
+        directives      : {},
+        directive_stack : []
     };
 
     function peek() { return S.text.charAt(S.pos); };
@@ -392,8 +394,6 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
         for (;;) {
             var ch = next(true, true);
             if (ch == "\\") {
-                // read OctalEscapeSequence (XXX: deprecated if "strict mode")
-                // https://github.com/mishoo/UglifyJS/issues/178
                 var octal_len = 0, first = null;
                 ch = read_while(function(ch){
                     if (ch >= "0" && ch <= "7") {
@@ -406,8 +406,13 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
                     }
                     return false;
                 });
-                if (octal_len > 0) ch = String.fromCharCode(parseInt(ch, 8));
-                else ch = read_escaped_char(true);
+                if (octal_len > 0) {
+                    if (ch !== "0" && next_token.has_directive("use strict"))
+                        parse_error("Octal literals are not allowed in strict mode");
+                    ch = String.fromCharCode(parseInt(ch, 8));
+                } else {
+                    ch = read_escaped_char(true);
+                }
             }
             else if ("\r\n\u2028\u2029".indexOf(ch) >= 0) parse_error("Unterminated string constant");
             else if (ch == quote) break;
@@ -608,6 +613,35 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
         return S;
     };
 
+    next_token.add_directive = function(directive) {
+        S.directive_stack[S.directive_stack.length - 1].push(directive);
+
+        if (S.directives[directive] === undefined) {
+            S.directives[directive] = 1;
+        } else {
+            S.directives[directive]++;
+        }
+    }
+
+    next_token.push_directives_stack = function() {
+        S.directive_stack.push([]);
+    }
+
+    next_token.pop_directives_stack = function() {
+        var directives = S.directive_stack[S.directive_stack.length - 1];
+
+        for (var i = 0; i < directives.length; i++) {
+            S.directives[directives[i]]--;
+        }
+
+        S.directive_stack.pop();
+    }
+
+    next_token.has_directive = function(directive) {
+        return S.directives[directive] !== undefined &&
+            S.directives[directive] > 0;
+    }
+
     return next_token;
 
 };
@@ -781,9 +815,15 @@ function parse($TEXT, options) {
         handle_regexp();
         switch (S.token.type) {
           case "string":
+            if (S.in_directives) {
+                if (is_token(peek(), "punc", ";") || peek().nlb) {
+                    S.input.add_directive(S.token.raw.substr(1, S.token.raw.length - 2));
+                } else {
+                    S.in_directives = false;
+                }
+            }
             var dir = S.in_directives, stat = simple_statement();
-            // XXXv2: decide how to fix directives
-            if (dir && stat.body instanceof AST_String && !is("punc", ",")) {
+            if (dir) {
                 return new AST_Directive({
                     start : stat.body.start,
                     end   : stat.body.end,
@@ -1012,9 +1052,11 @@ function parse($TEXT, options) {
             body: (function(loop, labels){
                 ++S.in_function;
                 S.in_directives = true;
+                S.input.push_directives_stack();
                 S.in_loop = 0;
                 S.labels = [];
                 var a = block_();
+                S.input.pop_directives_stack();
                 --S.in_function;
                 S.in_loop = loop;
                 S.labels = labels;
@@ -1514,8 +1556,10 @@ function parse($TEXT, options) {
     return (function(){
         var start = S.token;
         var body = [];
+        S.input.push_directives_stack();
         while (!is("eof"))
             body.push(statement());
+        S.input.pop_directives_stack();
         var end = prev();
         var toplevel = options.toplevel;
         if (toplevel) {
diff --git a/test/mocha/directives.js b/test/mocha/directives.js
new file mode 100644
index 00000000..4433e429
--- /dev/null
+++ b/test/mocha/directives.js
@@ -0,0 +1,61 @@
+var assert = require("assert");
+var uglify = require("../../");
+
+describe("Directives", function() {
+    it ("Should allow tokenizer to store directives state", function() {
+        var tokenizer = uglify.tokenizer("", "foo.js");
+
+        // Stack level 0
+        assert.strictEqual(tokenizer.has_directive("use strict"), false);
+        assert.strictEqual(tokenizer.has_directive("use asm"), false);
+        assert.strictEqual(tokenizer.has_directive("use thing"), false);
+
+        // Stack level 2
+        tokenizer.push_directives_stack();
+        tokenizer.push_directives_stack();
+        tokenizer.add_directive("use strict");
+        assert.strictEqual(tokenizer.has_directive("use strict"), true);
+        assert.strictEqual(tokenizer.has_directive("use asm"), false);
+        assert.strictEqual(tokenizer.has_directive("use thing"), false);
+
+        // Stack level 3
+        tokenizer.push_directives_stack();
+        tokenizer.add_directive("use strict");
+        tokenizer.add_directive("use asm");
+        assert.strictEqual(tokenizer.has_directive("use strict"), true);
+        assert.strictEqual(tokenizer.has_directive("use asm"), true);
+        assert.strictEqual(tokenizer.has_directive("use thing"), false);
+
+        // Stack level 2
+        tokenizer.pop_directives_stack();
+        assert.strictEqual(tokenizer.has_directive("use strict"), true);
+        assert.strictEqual(tokenizer.has_directive("use asm"), false);
+        assert.strictEqual(tokenizer.has_directive("use thing"), false);
+
+        // Stack level 3
+        tokenizer.push_directives_stack();
+        tokenizer.add_directive("use thing");
+        tokenizer.add_directive("use\\\nasm");
+        assert.strictEqual(tokenizer.has_directive("use strict"), true);
+        assert.strictEqual(tokenizer.has_directive("use asm"), false); // Directives are strict!
+        assert.strictEqual(tokenizer.has_directive("use thing"), true);
+
+        // Stack level 2
+        tokenizer.pop_directives_stack();
+        assert.strictEqual(tokenizer.has_directive("use strict"), true);
+        assert.strictEqual(tokenizer.has_directive("use asm"), false);
+        assert.strictEqual(tokenizer.has_directive("use thing"), false);
+
+        // Stack level 1
+        tokenizer.pop_directives_stack();
+        assert.strictEqual(tokenizer.has_directive("use strict"), false);
+        assert.strictEqual(tokenizer.has_directive("use asm"), false);
+        assert.strictEqual(tokenizer.has_directive("use thing"), false);
+
+        // Stack level 0
+        tokenizer.pop_directives_stack();
+        assert.strictEqual(tokenizer.has_directive("use strict"), false);
+        assert.strictEqual(tokenizer.has_directive("use asm"), false);
+        assert.strictEqual(tokenizer.has_directive("use thing"), false);
+    });
+});
\ No newline at end of file
diff --git a/test/mocha/string-literal.js b/test/mocha/string-literal.js
index 84aaad7e..c54c161c 100644
--- a/test/mocha/string-literal.js
+++ b/test/mocha/string-literal.js
@@ -31,4 +31,51 @@ describe("String literals", function() {
         var output = UglifyJS.parse('var a = "a\\\nb";').print_to_string();
         assert.equal(output, 'var a="ab";');
     });
+
+    it("Should throw error in strict mode if string contains escaped octalIntegerLiteral", function() {
+        var inputs = [
+            '"use strict";\n"\\76";',
+            '"use strict";\nvar foo = "\\76";',
+            '"use strict";\n"\\1";',
+            '"use strict";\n"\\07";',
+            '"use strict";\n"\\011"'
+        ];
+
+        var test = function(input) {
+            return function() {
+                var output = UglifyJS.parse(input);
+            }
+        };
+
+        var error = function(e) {
+            return e instanceof UglifyJS.JS_Parse_Error &&
+                e.message === "Octal literals are not allowed in strict mode";
+        }
+
+        for (var input in inputs) {
+            assert.throws(test(inputs[input]), error);
+        }
+    });
+
+    it("Should not throw error outside strict mode if string contains escaped octalIntegerLiteral", function() {
+        var tests = [
+            ['"\\76";', '">";'],
+            ['"\\0"', '"\\x00";'],
+            ['"\\08"', '"\\x008";'],
+            ['"\\008"', '"\\x008";'],
+            ['"\\0008"', '"\\x008";'],
+            ['"use strict" === "use strict";\n"\\76";', '"use strict"==="use strict";">";'],
+            // ['"use\\\n strict";\n"\\07";', '"use\\\n strict";\n"\\u0007";'] // TODO No way to store this content literally yet as directive
+        ];
+
+        for (var test in tests) {
+            var output = UglifyJS.parse(tests[test][0]).print_to_string();
+            assert.equal(output, tests[test][1]);
+        }
+    });
+
+    it("Should not throw error when digit is 8 or 9", function() {
+        assert.equal(UglifyJS.parse('"use strict";"\\08"').print_to_string(), '"use strict";"\\x008";');
+        assert.equal(UglifyJS.parse('"use strict";"\\09"').print_to_string(), '"use strict";"\\x009";');
+    });
 });
\ No newline at end of file
-- 
2.34.1