From 2149bfb7071dedbcd42d2e245bb9c2f6b41a43bc Mon Sep 17 00:00:00 2001
From: Anthony Van de Gejuchte <anthonyvdgent@gmail.com>
Date: Mon, 13 Jun 2016 21:11:08 +0200
Subject: [PATCH] Don't mix strings with directives in output

 * Don't interpret strings with escaped content as directive
 * Don't interpret strings after empty statement as directive
 * Adapt output to prevent strings being represent as directive
 * Introduce UGLIFY_DEBUG to allow internal testing like EXPECT_DIRECTIVE
---
 README.md                    |   2 +-
 lib/output.js                |  42 ++++-
 lib/parse.js                 |   8 +-
 test/mocha/directives.js     | 309 +++++++++++++++++++++++++++++++++++
 test/mocha/string-literal.js |   4 +-
 test/run-tests.js            |   2 +
 tools/exports.js             |   4 +
 tools/node.js                |   5 +-
 8 files changed, 361 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index b0ac7121..51d0b629 100644
--- a/README.md
+++ b/README.md
@@ -452,7 +452,7 @@ can pass additional arguments that control the code output:
   objects
 - `space-colon` (default `true`) -- insert a space after the colon signs
 - `ascii-only` (default `false`) -- escape Unicode characters in strings and
-  regexps
+  regexps (affects directives with non-ascii characters becoming invalid)
 - `inline-script` (default `false`) -- escape the slash in occurrences of
   `</script` in strings
 - `width` (default 80) -- only takes effect when beautification is on, this
diff --git a/lib/output.js b/lib/output.js
index 6eae2f1f..1fa9899f 100644
--- a/lib/output.js
+++ b/lib/output.js
@@ -43,6 +43,8 @@
 
 "use strict";
 
+var EXPECT_DIRECTIVE = /^$|[;{][\s\n]*$/;
+
 function OutputStream(options) {
 
     options = defaults(options, {
@@ -354,7 +356,18 @@ function OutputStream(options) {
         force_semicolon : force_semicolon,
         to_ascii        : to_ascii,
         print_name      : function(name) { print(make_name(name)) },
-        print_string    : function(str, quote) { print(encode_string(str, quote)) },
+        print_string    : function(str, quote, escape_directive) {
+            var encoded = encode_string(str, quote);
+            if (escape_directive === true && encoded.indexOf("\\") === -1) {
+                // Insert semicolons to break directive prologue
+                if (!EXPECT_DIRECTIVE.test(OUTPUT)) {
+                    force_semicolon();
+                }
+                force_semicolon();
+            }
+            print(encoded);
+        },
+        encode_string   : encode_string,
         next_indent     : next_indent,
         with_indent     : with_indent,
         with_block      : with_block,
@@ -386,6 +399,7 @@ function OutputStream(options) {
     };
 
     var use_asm = false;
+    var in_directive = false;
 
     AST_Node.DEFMETHOD("print", function(stream, force_parens){
         var self = this, generator = self._codegen, prev_use_asm = use_asm;
@@ -642,9 +656,16 @@ function OutputStream(options) {
 
     /* -----[ statements ]----- */
 
-    function display_body(body, is_toplevel, output) {
+    function display_body(body, is_toplevel, output, allow_directives) {
         var last = body.length - 1;
+        in_directive = allow_directives;
         body.forEach(function(stmt, i){
+            if (in_directive === true && !(stmt instanceof AST_Directive ||
+                stmt instanceof AST_EmptyStatement ||
+                (stmt instanceof AST_SimpleStatement && stmt.body instanceof AST_String)
+            )) {
+                in_directive = false;
+            }
             if (!(stmt instanceof AST_EmptyStatement)) {
                 output.indent();
                 stmt.print(output);
@@ -653,7 +674,14 @@ function OutputStream(options) {
                     if (is_toplevel) output.newline();
                 }
             }
+            if (in_directive === true &&
+                stmt instanceof AST_SimpleStatement &&
+                stmt.body instanceof AST_String
+            ) {
+                in_directive = false;
+            }
         });
+        in_directive = false;
     };
 
     AST_StatementWithBody.DEFMETHOD("_do_print_body", function(output){
@@ -665,7 +693,7 @@ function OutputStream(options) {
         output.semicolon();
     });
     DEFPRINT(AST_Toplevel, function(self, output){
-        display_body(self.body, true, output);
+        display_body(self.body, true, output, true);
         output.print("");
     });
     DEFPRINT(AST_LabeledStatement, function(self, output){
@@ -677,9 +705,9 @@ function OutputStream(options) {
         self.body.print(output);
         output.semicolon();
     });
-    function print_bracketed(body, output) {
+    function print_bracketed(body, output, allow_directives) {
         if (body.length > 0) output.with_block(function(){
-            display_body(body, false, output);
+            display_body(body, false, output, allow_directives);
         });
         else output.print("{}");
     };
@@ -779,7 +807,7 @@ function OutputStream(options) {
             });
         });
         output.space();
-        print_bracketed(self.body, output);
+        print_bracketed(self.body, output, true);
     });
     DEFPRINT(AST_Lambda, function(self, output){
         self._do_print(output);
@@ -1185,7 +1213,7 @@ function OutputStream(options) {
         output.print(self.getValue());
     });
     DEFPRINT(AST_String, function(self, output){
-        output.print_string(self.getValue(), self.quote);
+        output.print_string(self.getValue(), self.quote, in_directive);
     });
     DEFPRINT(AST_Number, function(self, output){
         if (use_asm && self.start.raw != null) {
diff --git a/lib/parse.js b/lib/parse.js
index 2c007965..9b5a142b 100644
--- a/lib/parse.js
+++ b/lib/parse.js
@@ -815,9 +815,10 @@ function parse($TEXT, options) {
         handle_regexp();
         switch (S.token.type) {
           case "string":
-            if (S.in_directives) {
-                if (is_token(peek(), "punc", ";") || peek().nlb) {
-                    S.input.add_directive(S.token.raw.substr(1, S.token.raw.length - 2));
+            var dir = false;
+            if (S.in_directives === true) {
+                if ((is_token(peek(), "punc", ";") || peek().nlb) && S.token.raw.indexOf("\\") === -1) {
+                    S.input.add_directive(S.token.value);
                 } else {
                     S.in_directives = false;
                 }
@@ -855,6 +856,7 @@ function parse($TEXT, options) {
               case "(":
                 return simple_statement();
               case ";":
+                S.in_directives = false;
                 next();
                 return new AST_EmptyStatement();
               default:
diff --git a/test/mocha/directives.js b/test/mocha/directives.js
index 4433e429..604b0329 100644
--- a/test/mocha/directives.js
+++ b/test/mocha/directives.js
@@ -58,4 +58,313 @@ describe("Directives", function() {
         assert.strictEqual(tokenizer.has_directive("use asm"), false);
         assert.strictEqual(tokenizer.has_directive("use thing"), false);
     });
+
+    it("Should know which strings are directive and which ones are not", function() {
+        var test_directive = function(tokenizer, test) {
+            test.directives.map(function(directive) {
+                assert.strictEqual(tokenizer.has_directive(directive), true, directive + " in " + test.input);
+            });
+            test.non_directives.map(function(fake_directive) {
+                assert.strictEqual(tokenizer.has_directive(fake_directive), false, fake_directive + " in " + test.input);
+            });
+        }
+
+        var tests = [
+            {
+                input: '"use strict"\n',
+                directives: ["use strict"],
+                non_directives: ["use asm"]
+            },
+            {
+                input: '"use\\\nstrict";',
+                directives: [],
+                non_directives: ["use strict", "use\nstrict", "use \nstrict", "use asm"]
+            },
+            {
+                input: '"use strict"\n"use asm"\n"use bar"\n',
+                directives: ["use strict", "use asm", "use bar"],
+                non_directives: ["use foo", "use\\x20strict"]
+            },
+            {
+                input: '"use \\\nstrict";"use strict";',
+                directives: [],
+                non_directives: ["use strict", "use\nstrict", "use \nstrict", "use asm"]
+            },
+            {
+                input: '"\\76";',
+                directives: [],
+                non_directives: [">", "\\76"]
+            },
+            {
+                input: '"use strict"', // no ; or newline
+                directives: [],
+                non_directives: ["use strict", "use\nstrict", "use \nstrict", "use asm"]
+            },
+            {
+                input: ';"use strict"',
+                directives: [],
+                non_directives: ["use strict", "use\nstrict", "use \nstrict", "use asm"]
+            },
+            // Duplicate above code but put it in a function
+            {
+                input: 'function foo() {"use strict"\n',
+                directives: ["use strict"],
+                non_directives: ["use asm"]
+            },
+            {
+                input: 'function foo() {"use\\\nstrict";',
+                directives: [],
+                non_directives: ["use strict", "use\nstrict", "use \nstrict", "use asm"]
+            },
+            {
+                input: 'function foo() {"use strict"\n"use asm"\n"use bar"\n',
+                directives: ["use strict", "use asm", "use bar"],
+                non_directives: ["use foo", "use\\x20strict"]
+            },
+            {
+                input: 'function foo() {"use \\\nstrict";"use strict";',
+                directives: [],
+                non_directives: ["use strict", "use\nstrict", "use \nstrict", "use asm"]
+            },
+            {
+                input: 'var foo = function() {"\\76";',
+                directives: [],
+                non_directives: [">", "\\76"]
+            },
+            {
+                input: 'var foo = function() {"use strict"', // no ; or newline
+                directives: [],
+                non_directives: ["use strict", "use\nstrict", "use \nstrict", "use asm"]
+            },
+            {
+                input: 'var foo = function() {;"use strict"',
+                directives: [],
+                non_directives: ["use strict", "use\nstrict", "use \nstrict", "use asm"]
+            },
+            // Special cases
+            {
+                input: '"1";"2";"3";"4";;"5"',
+                directives: ["1", "2", "3", "4"],
+                non_directives: ["5", "6", "use strict", "use asm"]
+            },
+            {
+                input: 'if(1){"use strict";',
+                directives: [],
+                non_directives: ["use strict", "use\nstrict", "use \nstrict", "use asm"]
+            },
+            {
+                input: '"use strict";try{"use asm";',
+                directives: ["use strict"],
+                non_directives: ["use\nstrict", "use \nstrict", "use asm"]
+            }
+        ];
+
+        for (var i = 0; i < tests.length; i++) {
+            // Fail parser deliberately to get state at failure
+            var tokenizer = uglify.tokenizer(tests[i].input + "]", "foo.js");
+
+            try {
+                var parser = uglify.parse(tokenizer);
+                throw new Error("Expected parser to fail");
+            } catch (e) {
+                assert.strictEqual(e instanceof uglify.JS_Parse_Error, true);
+                assert.strictEqual(e.message, "Unexpected token: punc (])");
+            }
+
+            test_directive(tokenizer, tests[i]);
+        }
+    });
+
+    it("Should test EXPECT_DIRECTIVE RegExp", function() {
+        var tests = [
+            ["", true],
+            ["'test';", true],
+            ["'test';;", true],
+            ["'tests';\n", true],
+            ["'tests'", false],
+            ["'tests';   \n\t", true],
+            ["'tests';\n\n", true],
+            ["\n\n\"use strict\";\n\n", true]
+        ];
+
+        for (var i = 0; i < tests.length; i++) {
+            assert.strictEqual(uglify.EXPECT_DIRECTIVE.test(tests[i][0]), tests[i][1], tests[i][0]);
+        }
+    });
+
+    it("Should only print 2 semicolons spread over 2 lines in beautify mode", function() {
+        assert.strictEqual(
+            uglify.minify(
+                '"use strict";\'use strict\';"use strict";"use strict";;\'use strict\';console.log(\'use strict\');',
+                {fromString: true, output: {beautify: true, quote_style: 3}, compress: false}
+            ).code,
+            '"use strict";\n\n\'use strict\';\n\n"use strict";\n\n"use strict";\n\n;\'use strict\';\n\nconsole.log(\'use strict\');'
+        );
+    });
+
+    it("Should not add double semicolons in non-scoped block statements to avoid strings becoming directives", function() {
+        var tests = [
+            [
+                '{"use\x20strict"}',
+                '{"use strict"}'
+            ],
+            [
+                'function foo(){"use\x20strict";}', // Valid place for directives
+                'function foo(){"use strict"}'
+            ],
+            [
+                'try{"use\x20strict"}catch(e){}finally{"use\x20strict"}',
+                'try{"use strict"}catch(e){}finally{"use strict"}'
+            ],
+            [
+                'if(1){"use\x20strict"} else {"use strict"}',
+                'if(1){"use strict"}else{"use strict"}'
+            ]
+        ];
+
+        for (var i = 0; i < tests.length; i++) {
+            assert.strictEqual(
+                uglify.minify(tests[i][0], {fromString: true, quote_style: 3, compress: false}).code,
+                tests[i][1],
+                tests[i][0]
+            );
+        }
+    });
+
+    it("Should add double semicolon when relying on automatic semicolon insertion", function() {
+        var code = uglify.minify('"use strict";"use\\x20strict";',
+            {fromString: true, output: {semicolons: false}, compress: false}
+        ).code;
+        assert.strictEqual(code, '"use strict";;"use strict"\n');
+    });
+
+    it("Should check quote style of directives", function() {
+        var tests = [
+            // 0. Prefer double quotes, unless string contains more double quotes than single quotes
+            [
+                '"testing something";',
+                0,
+                '"testing something";'
+            ],
+            [
+                "'use strict';",
+                0,
+                '"use strict";'
+            ],
+            [
+                '"\\\'use strict\\\'";', // Not a directive as it contains quotes
+                0,
+                ';"\'use strict\'";',
+            ],
+            [
+                "'\"use strict\"';",
+                0,
+                "'\"use strict\"';",
+            ],
+            // 1. Always use single quote
+            [
+                '"testing something";',
+                1,
+                "'testing something';"
+            ],
+            [
+                "'use strict';",
+                1,
+                "'use strict';"
+            ],
+            [
+                '"\'use strict\'";',
+                1,
+                // Intentionally causes directive breakage at cost of less logic, usage should be rare anyway
+                "'\\'use strict\\'';",
+            ],
+            [
+                "'\\'use strict\\'';", // Not a valid directive
+                1,
+                "'\\'use strict\\'';" // But no ; necessary as directive stays invalid
+            ],
+            [
+                "'\"use strict\"';",
+                1,
+                "'\"use strict\"';",
+            ],
+            // 2. Always use double quote
+            [
+                '"testing something";',
+                2,
+                '"testing something";'
+            ],
+            [
+                "'use strict';",
+                2,
+                '"use strict";'
+            ],
+            [
+                '"\'use strict\'";',
+                2,
+                "\"'use strict'\";",
+            ],
+            [
+                "'\"use strict\"';",
+                2,
+                // Intentionally causes directive breakage at cost of less logic, usage should be rare anyway
+                '"\\\"use strict\\\"";',
+            ],
+            [
+                '"\\"use strict\\"";', // Not a valid directive
+                2,
+                '"\\"use strict\\"";' // But no ; necessary as directive stays invalid
+            ],
+            // 3. Always use original
+            [
+                '"testing something";',
+                3,
+                '"testing something";'
+            ],
+            [
+                "'use strict';",
+                3,
+                "'use strict';",
+            ],
+            [
+                '"\'use strict\'";',
+                3,
+                '"\'use strict\'";',
+            ],
+            [
+                "'\"use strict\"';",
+                3,
+                "'\"use strict\"';",
+            ],
+        ];
+        for (var i = 0; i < tests.length; i++) {
+            assert.strictEqual(
+                uglify.minify(tests[i][0], {fromString: true, output:{quote_style: tests[i][1]}, compress: false}).code,
+                tests[i][2],
+                tests[i][0] + " using mode " + tests[i][1]
+            );
+        }
+    });
+    it("Should be able to compress without side effects", function() {
+        // NOTE: the "use asm" directive disables any optimisation after being defined
+        var tests = [
+            [
+                '"use strict";"use strict";"use strict";"use foo";"use strict";;"use sloppy";doSomething("foo");',
+                '"use strict";"use foo";doSomething("foo");'
+            ],
+            [
+                 // Nothing gets optimised in the compressor because "use asm" is the first statement
+                '"use asm";"use\\x20strict";1+1;',
+                '"use asm";;"use strict";1+1;' // Yet, the parser noticed that "use strict" wasn't a directive
+            ]
+        ];
+
+        for (var i = 0; i < tests.length; i++) {
+            assert.strictEqual(
+                uglify.minify(tests[i][0], {fromString: true, compress: {collapse_vars: true, side_effects: true}}).code,
+                tests[i][1],
+                tests[i][0]
+            );
+        }
+    });
 });
\ No newline at end of file
diff --git a/test/mocha/string-literal.js b/test/mocha/string-literal.js
index b363a07f..ea984213 100644
--- a/test/mocha/string-literal.js
+++ b/test/mocha/string-literal.js
@@ -59,13 +59,13 @@ describe("String literals", function() {
 
     it("Should not throw error outside strict mode if string contains escaped octalIntegerLiteral", function() {
         var tests = [
-            ['"\\76";', '">";'],
+            ['"\\76";', ';">";'],
             ['"\\0"', '"\\0";'],
             ['"\\08"', '"\\08";'],
             ['"\\008"', '"\\08";'],
             ['"\\0008"', '"\\08";'],
             ['"use strict" === "use strict";\n"\\76";', '"use strict"==="use strict";">";'],
-            // ['"use\\\n strict";\n"\\07";', '"use\\\n strict";\n"\\u0007";'] // TODO No way to store this content literally yet as directive
+            ['"use\\\n strict";\n"\\07";', ';"use strict";"\07";']
         ];
 
         for (var test in tests) {
diff --git a/test/run-tests.js b/test/run-tests.js
index 6614b2a5..5fc69c69 100755
--- a/test/run-tests.js
+++ b/test/run-tests.js
@@ -1,5 +1,7 @@
 #! /usr/bin/env node
 
+global.UGLIFY_DEBUG = true;
+
 var U = require("../tools/node");
 var path = require("path");
 var fs = require("fs");
diff --git a/tools/exports.js b/tools/exports.js
index 09acc13e..a481143c 100644
--- a/tools/exports.js
+++ b/tools/exports.js
@@ -17,3 +17,7 @@ exports["string_template"] = string_template;
 exports["tokenizer"] = tokenizer;
 exports["is_identifier"] = is_identifier;
 exports["SymbolDef"] = SymbolDef;
+
+if (DEBUG) {
+    exports["EXPECT_DIRECTIVE"] = EXPECT_DIRECTIVE;
+}
diff --git a/tools/node.js b/tools/node.js
index 8cd3e4be..39976371 100644
--- a/tools/node.js
+++ b/tools/node.js
@@ -25,11 +25,12 @@ var FILES = exports.FILES = [
 
 var UglifyJS = exports;
 
-new Function("MOZ_SourceMap", "exports", FILES.map(function(file){
+new Function("MOZ_SourceMap", "exports", "DEBUG", FILES.map(function(file){
     return fs.readFileSync(file, "utf8");
 }).join("\n\n"))(
     require("source-map"),
-    UglifyJS
+    UglifyJS,
+    !!global.UGLIFY_DEBUG
 );
 
 UglifyJS.AST_Node.warn_function = function(txt) {
-- 
2.34.1