From 2149bfb7071dedbcd42d2e245bb9c2f6b41a43bc Mon Sep 17 00:00:00 2001 From: Anthony Van de Gejuchte Date: Mon, 13 Jun 2016 21:11:08 +0200 Subject: [PATCH] Don't mix strings with directives in output * Don't interpret strings with escaped content as directive * Don't interpret strings after empty statement as directive * Adapt output to prevent strings being represent as directive * Introduce UGLIFY_DEBUG to allow internal testing like EXPECT_DIRECTIVE --- README.md | 2 +- lib/output.js | 42 ++++- lib/parse.js | 8 +- test/mocha/directives.js | 309 +++++++++++++++++++++++++++++++++++ test/mocha/string-literal.js | 4 +- test/run-tests.js | 2 + tools/exports.js | 4 + tools/node.js | 5 +- 8 files changed, 361 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index b0ac7121..51d0b629 100644 --- a/README.md +++ b/README.md @@ -452,7 +452,7 @@ can pass additional arguments that control the code output: objects - `space-colon` (default `true`) -- insert a space after the colon signs - `ascii-only` (default `false`) -- escape Unicode characters in strings and - regexps + regexps (affects directives with non-ascii characters becoming invalid) - `inline-script` (default `false`) -- escape the slash in occurrences of ` 0) output.with_block(function(){ - display_body(body, false, output); + display_body(body, false, output, allow_directives); }); else output.print("{}"); }; @@ -779,7 +807,7 @@ function OutputStream(options) { }); }); output.space(); - print_bracketed(self.body, output); + print_bracketed(self.body, output, true); }); DEFPRINT(AST_Lambda, function(self, output){ self._do_print(output); @@ -1185,7 +1213,7 @@ function OutputStream(options) { output.print(self.getValue()); }); DEFPRINT(AST_String, function(self, output){ - output.print_string(self.getValue(), self.quote); + output.print_string(self.getValue(), self.quote, in_directive); }); DEFPRINT(AST_Number, function(self, output){ if (use_asm && self.start.raw != null) { diff --git a/lib/parse.js b/lib/parse.js index 2c007965..9b5a142b 100644 --- a/lib/parse.js +++ b/lib/parse.js @@ -815,9 +815,10 @@ function parse($TEXT, options) { handle_regexp(); switch (S.token.type) { case "string": - if (S.in_directives) { - if (is_token(peek(), "punc", ";") || peek().nlb) { - S.input.add_directive(S.token.raw.substr(1, S.token.raw.length - 2)); + var dir = false; + if (S.in_directives === true) { + if ((is_token(peek(), "punc", ";") || peek().nlb) && S.token.raw.indexOf("\\") === -1) { + S.input.add_directive(S.token.value); } else { S.in_directives = false; } @@ -855,6 +856,7 @@ function parse($TEXT, options) { case "(": return simple_statement(); case ";": + S.in_directives = false; next(); return new AST_EmptyStatement(); default: diff --git a/test/mocha/directives.js b/test/mocha/directives.js index 4433e429..604b0329 100644 --- a/test/mocha/directives.js +++ b/test/mocha/directives.js @@ -58,4 +58,313 @@ describe("Directives", function() { assert.strictEqual(tokenizer.has_directive("use asm"), false); assert.strictEqual(tokenizer.has_directive("use thing"), false); }); + + it("Should know which strings are directive and which ones are not", function() { + var test_directive = function(tokenizer, test) { + test.directives.map(function(directive) { + assert.strictEqual(tokenizer.has_directive(directive), true, directive + " in " + test.input); + }); + test.non_directives.map(function(fake_directive) { + assert.strictEqual(tokenizer.has_directive(fake_directive), false, fake_directive + " in " + test.input); + }); + } + + var tests = [ + { + input: '"use strict"\n', + directives: ["use strict"], + non_directives: ["use asm"] + }, + { + input: '"use\\\nstrict";', + directives: [], + non_directives: ["use strict", "use\nstrict", "use \nstrict", "use asm"] + }, + { + input: '"use strict"\n"use asm"\n"use bar"\n', + directives: ["use strict", "use asm", "use bar"], + non_directives: ["use foo", "use\\x20strict"] + }, + { + input: '"use \\\nstrict";"use strict";', + directives: [], + non_directives: ["use strict", "use\nstrict", "use \nstrict", "use asm"] + }, + { + input: '"\\76";', + directives: [], + non_directives: [">", "\\76"] + }, + { + input: '"use strict"', // no ; or newline + directives: [], + non_directives: ["use strict", "use\nstrict", "use \nstrict", "use asm"] + }, + { + input: ';"use strict"', + directives: [], + non_directives: ["use strict", "use\nstrict", "use \nstrict", "use asm"] + }, + // Duplicate above code but put it in a function + { + input: 'function foo() {"use strict"\n', + directives: ["use strict"], + non_directives: ["use asm"] + }, + { + input: 'function foo() {"use\\\nstrict";', + directives: [], + non_directives: ["use strict", "use\nstrict", "use \nstrict", "use asm"] + }, + { + input: 'function foo() {"use strict"\n"use asm"\n"use bar"\n', + directives: ["use strict", "use asm", "use bar"], + non_directives: ["use foo", "use\\x20strict"] + }, + { + input: 'function foo() {"use \\\nstrict";"use strict";', + directives: [], + non_directives: ["use strict", "use\nstrict", "use \nstrict", "use asm"] + }, + { + input: 'var foo = function() {"\\76";', + directives: [], + non_directives: [">", "\\76"] + }, + { + input: 'var foo = function() {"use strict"', // no ; or newline + directives: [], + non_directives: ["use strict", "use\nstrict", "use \nstrict", "use asm"] + }, + { + input: 'var foo = function() {;"use strict"', + directives: [], + non_directives: ["use strict", "use\nstrict", "use \nstrict", "use asm"] + }, + // Special cases + { + input: '"1";"2";"3";"4";;"5"', + directives: ["1", "2", "3", "4"], + non_directives: ["5", "6", "use strict", "use asm"] + }, + { + input: 'if(1){"use strict";', + directives: [], + non_directives: ["use strict", "use\nstrict", "use \nstrict", "use asm"] + }, + { + input: '"use strict";try{"use asm";', + directives: ["use strict"], + non_directives: ["use\nstrict", "use \nstrict", "use asm"] + } + ]; + + for (var i = 0; i < tests.length; i++) { + // Fail parser deliberately to get state at failure + var tokenizer = uglify.tokenizer(tests[i].input + "]", "foo.js"); + + try { + var parser = uglify.parse(tokenizer); + throw new Error("Expected parser to fail"); + } catch (e) { + assert.strictEqual(e instanceof uglify.JS_Parse_Error, true); + assert.strictEqual(e.message, "Unexpected token: punc (])"); + } + + test_directive(tokenizer, tests[i]); + } + }); + + it("Should test EXPECT_DIRECTIVE RegExp", function() { + var tests = [ + ["", true], + ["'test';", true], + ["'test';;", true], + ["'tests';\n", true], + ["'tests'", false], + ["'tests'; \n\t", true], + ["'tests';\n\n", true], + ["\n\n\"use strict\";\n\n", true] + ]; + + for (var i = 0; i < tests.length; i++) { + assert.strictEqual(uglify.EXPECT_DIRECTIVE.test(tests[i][0]), tests[i][1], tests[i][0]); + } + }); + + it("Should only print 2 semicolons spread over 2 lines in beautify mode", function() { + assert.strictEqual( + uglify.minify( + '"use strict";\'use strict\';"use strict";"use strict";;\'use strict\';console.log(\'use strict\');', + {fromString: true, output: {beautify: true, quote_style: 3}, compress: false} + ).code, + '"use strict";\n\n\'use strict\';\n\n"use strict";\n\n"use strict";\n\n;\'use strict\';\n\nconsole.log(\'use strict\');' + ); + }); + + it("Should not add double semicolons in non-scoped block statements to avoid strings becoming directives", function() { + var tests = [ + [ + '{"use\x20strict"}', + '{"use strict"}' + ], + [ + 'function foo(){"use\x20strict";}', // Valid place for directives + 'function foo(){"use strict"}' + ], + [ + 'try{"use\x20strict"}catch(e){}finally{"use\x20strict"}', + 'try{"use strict"}catch(e){}finally{"use strict"}' + ], + [ + 'if(1){"use\x20strict"} else {"use strict"}', + 'if(1){"use strict"}else{"use strict"}' + ] + ]; + + for (var i = 0; i < tests.length; i++) { + assert.strictEqual( + uglify.minify(tests[i][0], {fromString: true, quote_style: 3, compress: false}).code, + tests[i][1], + tests[i][0] + ); + } + }); + + it("Should add double semicolon when relying on automatic semicolon insertion", function() { + var code = uglify.minify('"use strict";"use\\x20strict";', + {fromString: true, output: {semicolons: false}, compress: false} + ).code; + assert.strictEqual(code, '"use strict";;"use strict"\n'); + }); + + it("Should check quote style of directives", function() { + var tests = [ + // 0. Prefer double quotes, unless string contains more double quotes than single quotes + [ + '"testing something";', + 0, + '"testing something";' + ], + [ + "'use strict';", + 0, + '"use strict";' + ], + [ + '"\\\'use strict\\\'";', // Not a directive as it contains quotes + 0, + ';"\'use strict\'";', + ], + [ + "'\"use strict\"';", + 0, + "'\"use strict\"';", + ], + // 1. Always use single quote + [ + '"testing something";', + 1, + "'testing something';" + ], + [ + "'use strict';", + 1, + "'use strict';" + ], + [ + '"\'use strict\'";', + 1, + // Intentionally causes directive breakage at cost of less logic, usage should be rare anyway + "'\\'use strict\\'';", + ], + [ + "'\\'use strict\\'';", // Not a valid directive + 1, + "'\\'use strict\\'';" // But no ; necessary as directive stays invalid + ], + [ + "'\"use strict\"';", + 1, + "'\"use strict\"';", + ], + // 2. Always use double quote + [ + '"testing something";', + 2, + '"testing something";' + ], + [ + "'use strict';", + 2, + '"use strict";' + ], + [ + '"\'use strict\'";', + 2, + "\"'use strict'\";", + ], + [ + "'\"use strict\"';", + 2, + // Intentionally causes directive breakage at cost of less logic, usage should be rare anyway + '"\\\"use strict\\\"";', + ], + [ + '"\\"use strict\\"";', // Not a valid directive + 2, + '"\\"use strict\\"";' // But no ; necessary as directive stays invalid + ], + // 3. Always use original + [ + '"testing something";', + 3, + '"testing something";' + ], + [ + "'use strict';", + 3, + "'use strict';", + ], + [ + '"\'use strict\'";', + 3, + '"\'use strict\'";', + ], + [ + "'\"use strict\"';", + 3, + "'\"use strict\"';", + ], + ]; + for (var i = 0; i < tests.length; i++) { + assert.strictEqual( + uglify.minify(tests[i][0], {fromString: true, output:{quote_style: tests[i][1]}, compress: false}).code, + tests[i][2], + tests[i][0] + " using mode " + tests[i][1] + ); + } + }); + it("Should be able to compress without side effects", function() { + // NOTE: the "use asm" directive disables any optimisation after being defined + var tests = [ + [ + '"use strict";"use strict";"use strict";"use foo";"use strict";;"use sloppy";doSomething("foo");', + '"use strict";"use foo";doSomething("foo");' + ], + [ + // Nothing gets optimised in the compressor because "use asm" is the first statement + '"use asm";"use\\x20strict";1+1;', + '"use asm";;"use strict";1+1;' // Yet, the parser noticed that "use strict" wasn't a directive + ] + ]; + + for (var i = 0; i < tests.length; i++) { + assert.strictEqual( + uglify.minify(tests[i][0], {fromString: true, compress: {collapse_vars: true, side_effects: true}}).code, + tests[i][1], + tests[i][0] + ); + } + }); }); \ No newline at end of file diff --git a/test/mocha/string-literal.js b/test/mocha/string-literal.js index b363a07f..ea984213 100644 --- a/test/mocha/string-literal.js +++ b/test/mocha/string-literal.js @@ -59,13 +59,13 @@ describe("String literals", function() { it("Should not throw error outside strict mode if string contains escaped octalIntegerLiteral", function() { var tests = [ - ['"\\76";', '">";'], + ['"\\76";', ';">";'], ['"\\0"', '"\\0";'], ['"\\08"', '"\\08";'], ['"\\008"', '"\\08";'], ['"\\0008"', '"\\08";'], ['"use strict" === "use strict";\n"\\76";', '"use strict"==="use strict";">";'], - // ['"use\\\n strict";\n"\\07";', '"use\\\n strict";\n"\\u0007";'] // TODO No way to store this content literally yet as directive + ['"use\\\n strict";\n"\\07";', ';"use strict";"\07";'] ]; for (var test in tests) { diff --git a/test/run-tests.js b/test/run-tests.js index 6614b2a5..5fc69c69 100755 --- a/test/run-tests.js +++ b/test/run-tests.js @@ -1,5 +1,7 @@ #! /usr/bin/env node +global.UGLIFY_DEBUG = true; + var U = require("../tools/node"); var path = require("path"); var fs = require("fs"); diff --git a/tools/exports.js b/tools/exports.js index 09acc13e..a481143c 100644 --- a/tools/exports.js +++ b/tools/exports.js @@ -17,3 +17,7 @@ exports["string_template"] = string_template; exports["tokenizer"] = tokenizer; exports["is_identifier"] = is_identifier; exports["SymbolDef"] = SymbolDef; + +if (DEBUG) { + exports["EXPECT_DIRECTIVE"] = EXPECT_DIRECTIVE; +} diff --git a/tools/node.js b/tools/node.js index 8cd3e4be..39976371 100644 --- a/tools/node.js +++ b/tools/node.js @@ -25,11 +25,12 @@ var FILES = exports.FILES = [ var UglifyJS = exports; -new Function("MOZ_SourceMap", "exports", FILES.map(function(file){ +new Function("MOZ_SourceMap", "exports", "DEBUG", FILES.map(function(file){ return fs.readFileSync(file, "utf8"); }).join("\n\n"))( require("source-map"), - UglifyJS + UglifyJS, + !!global.UGLIFY_DEBUG ); UglifyJS.AST_Node.warn_function = function(txt) { -- 2.34.1