unescape surrogate pairs only (#2246)
authorAlex Lam S.L <alexlamsl@gmail.com>
Sun, 23 Jul 2017 04:38:21 +0000 (12:38 +0800)
committerGitHub <noreply@github.com>
Sun, 23 Jul 2017 04:38:21 +0000 (12:38 +0800)
fixes #2242

lib/output.js
test/compress/unicode.js
test/mocha/string-literal.js

index edb8d18..4c873f1 100644 (file)
@@ -109,7 +109,7 @@ function OutputStream(options) {
     var current_pos = 0;
     var OUTPUT = "";
 
-    function to_ascii(str, identifier) {
+    var to_utf8 = options.ascii_only ? function(str, identifier) {
         return str.replace(/[\u0000-\u001f\u007f-\uffff]/g, function(ch) {
             var code = ch.charCodeAt(0).toString(16);
             if (code.length <= 2 && !identifier) {
@@ -120,6 +120,12 @@ function OutputStream(options) {
                 return "\\u" + code;
             }
         });
+    } : function(str) {
+        return str.replace(/[\ud800-\udbff](?![\udc00-\udfff])/g, function(ch) {
+            return "\\u" + ch.charCodeAt(0).toString(16);
+        }).replace(/(^|[^\ud800-\udbff])([\udc00-\udfff])/g, function(match, prefix, ch) {
+            return prefix + "\\u" + ch.charCodeAt(0).toString(16);
+        });
     };
 
     function make_string(str, quote) {
@@ -150,7 +156,7 @@ function OutputStream(options) {
         function quote_double() {
             return '"' + str.replace(/\x22/g, '\\"') + '"';
         }
-        if (options.ascii_only) str = to_ascii(str);
+        str = to_utf8(str);
         switch (options.quote_style) {
           case 1:
             return quote_single();
@@ -175,8 +181,7 @@ function OutputStream(options) {
 
     function make_name(name) {
         name = name.toString();
-        if (options.ascii_only)
-            name = to_ascii(name, true);
+        name = to_utf8(name, true);
         return name;
     };
 
@@ -433,7 +438,7 @@ function OutputStream(options) {
         last            : function() { return last },
         semicolon       : semicolon,
         force_semicolon : force_semicolon,
-        to_ascii        : to_ascii,
+        to_utf8         : to_utf8,
         print_name      : function(name) { print(make_name(name)) },
         print_string    : function(str, quote, escape_directive) {
             var encoded = encode_string(str, quote);
@@ -1318,9 +1323,7 @@ function OutputStream(options) {
         if (regexp.raw_source) {
             str = "/" + regexp.raw_source + str.slice(str.lastIndexOf("/"));
         }
-        if (output.option("ascii_only")) {
-            str = output.to_ascii(str);
-        }
+        str = output.to_utf8(str);
         output.print(str);
         var p = output.parent();
         if (p instanceof AST_Binary && /^in/.test(p.operator) && p.left === self)
index 9fb9ab8..4dbc197 100644 (file)
@@ -15,3 +15,43 @@ unicode_parse_variables: {
         var l০ = 3;
     }
 }
+
+issue_2242_1: {
+    beautify = {
+        ascii_only: false,
+    }
+    input: {
+        console.log("\ud83d", "\ude00", "\ud83d\ude00", "\ud83d@\ude00");
+    }
+    expect_exact: 'console.log("\\ud83d","\\ude00","\ud83d\ude00","\\ud83d@\\ude00");'
+}
+
+issue_2242_2: {
+    beautify = {
+        ascii_only: true,
+    }
+    input: {
+        console.log("\ud83d", "\ude00", "\ud83d\ude00", "\ud83d@\ude00");
+    }
+    expect_exact: 'console.log("\\ud83d","\\ude00","\\ud83d\\ude00","\\ud83d@\\ude00");'
+}
+
+issue_2242_3: {
+    options = {
+        evaluate: false,
+    }
+    input: {
+        console.log("\ud83d" + "\ude00", "\ud83d" + "@" + "\ude00");
+    }
+    expect_exact: 'console.log("\\ud83d"+"\\ude00","\\ud83d"+"@"+"\\ude00");'
+}
+
+issue_2242_4: {
+    options = {
+        evaluate: true,
+    }
+    input: {
+        console.log("\ud83d" + "\ude00", "\ud83d" + "@" + "\ude00");
+    }
+    expect_exact: 'console.log("\ud83d\ude00","\\ud83d@\\ude00");'
+}
index fde6db5..d2eb6a8 100644 (file)
@@ -78,4 +78,41 @@ describe("String literals", function() {
         assert.equal(UglifyJS.parse('"use strict";"\\08"').print_to_string(), '"use strict";"\\08";');
         assert.equal(UglifyJS.parse('"use strict";"\\09"').print_to_string(), '"use strict";"\\09";');
     });
+
+    it("Should not unescape unpaired surrogates", function() {
+        var code = [];
+        for (var i = 0; i <= 0xF; i++) {
+            code.push("\\u000" + i.toString(16));
+        }
+        for (;i <= 0xFF; i++) {
+            code.push("\\u00" + i.toString(16));
+        }
+        for (;i <= 0xFFF; i++) {
+            code.push("\\u0" + i.toString(16));
+        }
+        for (; i <= 0xFFFF; i++) {
+            code.push("\\u" + i.toString(16));
+        }
+        code = '"' + code.join() + '"';
+        var normal = UglifyJS.minify(code, {
+            compress: false,
+            mangle: false,
+            output: {
+                ascii_only: false
+            }
+        });
+        if (normal.error) throw normal.error;
+        assert.ok(code.length > normal.code.length);
+        assert.strictEqual(eval(code), eval(normal.code));
+        var ascii = UglifyJS.minify(code, {
+            compress: false,
+            mangle: false,
+            output: {
+                ascii_only: false
+            }
+        });
+        if (ascii.error) throw ascii.error;
+        assert.ok(code.length > ascii.code.length);
+        assert.strictEqual(eval(code), eval(ascii.code));
+    });
 });