From: Alex Lam S.L Date: Sun, 23 Jul 2017 04:38:21 +0000 (+0800) Subject: unescape surrogate pairs only (#2246) X-Git-Url: https://git.ndcode.org/public/gitweb.cgi?a=commitdiff_plain;h=6a5e74b44e65811b2152f72aeec8df3f75457663;p=UglifyJS.git unescape surrogate pairs only (#2246) fixes #2242 --- diff --git a/lib/output.js b/lib/output.js index edb8d182..4c873f10 100644 --- a/lib/output.js +++ b/lib/output.js @@ -109,7 +109,7 @@ function OutputStream(options) { var current_pos = 0; var OUTPUT = ""; - function to_ascii(str, identifier) { + var to_utf8 = options.ascii_only ? function(str, identifier) { return str.replace(/[\u0000-\u001f\u007f-\uffff]/g, function(ch) { var code = ch.charCodeAt(0).toString(16); if (code.length <= 2 && !identifier) { @@ -120,6 +120,12 @@ function OutputStream(options) { return "\\u" + code; } }); + } : function(str) { + return str.replace(/[\ud800-\udbff](?![\udc00-\udfff])/g, function(ch) { + return "\\u" + ch.charCodeAt(0).toString(16); + }).replace(/(^|[^\ud800-\udbff])([\udc00-\udfff])/g, function(match, prefix, ch) { + return prefix + "\\u" + ch.charCodeAt(0).toString(16); + }); }; function make_string(str, quote) { @@ -150,7 +156,7 @@ function OutputStream(options) { function quote_double() { return '"' + str.replace(/\x22/g, '\\"') + '"'; } - if (options.ascii_only) str = to_ascii(str); + str = to_utf8(str); switch (options.quote_style) { case 1: return quote_single(); @@ -175,8 +181,7 @@ function OutputStream(options) { function make_name(name) { name = name.toString(); - if (options.ascii_only) - name = to_ascii(name, true); + name = to_utf8(name, true); return name; }; @@ -433,7 +438,7 @@ function OutputStream(options) { last : function() { return last }, semicolon : semicolon, force_semicolon : force_semicolon, - to_ascii : to_ascii, + to_utf8 : to_utf8, print_name : function(name) { print(make_name(name)) }, print_string : function(str, quote, escape_directive) { var encoded = encode_string(str, quote); @@ -1318,9 +1323,7 @@ function OutputStream(options) { if (regexp.raw_source) { str = "/" + regexp.raw_source + str.slice(str.lastIndexOf("/")); } - if (output.option("ascii_only")) { - str = output.to_ascii(str); - } + str = output.to_utf8(str); output.print(str); var p = output.parent(); if (p instanceof AST_Binary && /^in/.test(p.operator) && p.left === self) diff --git a/test/compress/unicode.js b/test/compress/unicode.js index 9fb9ab8c..4dbc197c 100644 --- a/test/compress/unicode.js +++ b/test/compress/unicode.js @@ -15,3 +15,43 @@ unicode_parse_variables: { var l০ = 3; } } + +issue_2242_1: { + beautify = { + ascii_only: false, + } + input: { + console.log("\ud83d", "\ude00", "\ud83d\ude00", "\ud83d@\ude00"); + } + expect_exact: 'console.log("\\ud83d","\\ude00","\ud83d\ude00","\\ud83d@\\ude00");' +} + +issue_2242_2: { + beautify = { + ascii_only: true, + } + input: { + console.log("\ud83d", "\ude00", "\ud83d\ude00", "\ud83d@\ude00"); + } + expect_exact: 'console.log("\\ud83d","\\ude00","\\ud83d\\ude00","\\ud83d@\\ude00");' +} + +issue_2242_3: { + options = { + evaluate: false, + } + input: { + console.log("\ud83d" + "\ude00", "\ud83d" + "@" + "\ude00"); + } + expect_exact: 'console.log("\\ud83d"+"\\ude00","\\ud83d"+"@"+"\\ude00");' +} + +issue_2242_4: { + options = { + evaluate: true, + } + input: { + console.log("\ud83d" + "\ude00", "\ud83d" + "@" + "\ude00"); + } + expect_exact: 'console.log("\ud83d\ude00","\\ud83d@\\ude00");' +} diff --git a/test/mocha/string-literal.js b/test/mocha/string-literal.js index fde6db59..d2eb6a80 100644 --- a/test/mocha/string-literal.js +++ b/test/mocha/string-literal.js @@ -78,4 +78,41 @@ describe("String literals", function() { assert.equal(UglifyJS.parse('"use strict";"\\08"').print_to_string(), '"use strict";"\\08";'); assert.equal(UglifyJS.parse('"use strict";"\\09"').print_to_string(), '"use strict";"\\09";'); }); + + it("Should not unescape unpaired surrogates", function() { + var code = []; + for (var i = 0; i <= 0xF; i++) { + code.push("\\u000" + i.toString(16)); + } + for (;i <= 0xFF; i++) { + code.push("\\u00" + i.toString(16)); + } + for (;i <= 0xFFF; i++) { + code.push("\\u0" + i.toString(16)); + } + for (; i <= 0xFFFF; i++) { + code.push("\\u" + i.toString(16)); + } + code = '"' + code.join() + '"'; + var normal = UglifyJS.minify(code, { + compress: false, + mangle: false, + output: { + ascii_only: false + } + }); + if (normal.error) throw normal.error; + assert.ok(code.length > normal.code.length); + assert.strictEqual(eval(code), eval(normal.code)); + var ascii = UglifyJS.minify(code, { + compress: false, + mangle: false, + output: { + ascii_only: false + } + }); + if (ascii.error) throw ascii.error; + assert.ok(code.length > ascii.code.length); + assert.strictEqual(eval(code), eval(ascii.code)); + }); });