From 93f3b2b114877af17db219e501ae4551df61738d Mon Sep 17 00:00:00 2001 From: "Alex Lam S.L" Date: Mon, 11 Dec 2017 01:15:44 +0800 Subject: [PATCH] escape consecutive unpaired surrogates (#2576) fixes #2569 --- lib/output.js | 15 ++++++++++----- lib/parse.js | 12 ++++++++++++ test/compress/unicode.js | 7 +++++++ 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/lib/output.js b/lib/output.js index 1aa63450..a4c41f11 100644 --- a/lib/output.js +++ b/lib/output.js @@ -121,11 +121,16 @@ function OutputStream(options) { } }); } : function(str) { - return str.replace(/[\ud800-\udbff](?![\udc00-\udfff])/g, function(ch) { - return "\\u" + ch.charCodeAt(0).toString(16); - }).replace(/(^|[^\ud800-\udbff])([\udc00-\udfff])/g, function(match, prefix, ch) { - return prefix + "\\u" + ch.charCodeAt(0).toString(16); - }); + var s = ""; + for (var i = 0, len = str.length; i < len; i++) { + if (is_surrogate_pair_head(str[i]) && !is_surrogate_pair_tail(str[i + 1]) + || is_surrogate_pair_tail(str[i]) && !is_surrogate_pair_head(str[i - 1])) { + s += "\\u" + str.charCodeAt(i).toString(16); + } else { + s += str[i]; + } + } + return s; }; function make_string(str, quote) { diff --git a/lib/parse.js b/lib/parse.js index 099fc49a..f0098c75 100644 --- a/lib/parse.js +++ b/lib/parse.js @@ -132,6 +132,18 @@ function is_letter(code) { || (code >= 0xaa && UNICODE.letter.test(String.fromCharCode(code))); }; +function is_surrogate_pair_head(code) { + if (typeof code == "string") + code = code.charCodeAt(0); + return code >= 0xd800 && code <= 0xdbff; +} + +function is_surrogate_pair_tail(code) { + if (typeof code == "string") + code = code.charCodeAt(0); + return code >= 0xdc00 && code <= 0xdfff; +} + function is_digit(code) { return code >= 48 && code <= 57; }; diff --git a/test/compress/unicode.js b/test/compress/unicode.js index 4dbc197c..c4bd5de8 100644 --- a/test/compress/unicode.js +++ b/test/compress/unicode.js @@ -55,3 +55,10 @@ issue_2242_4: { } expect_exact: 'console.log("\ud83d\ude00","\\ud83d@\\ude00");' } + +issue_2569: { + input: { + new RegExp("[\udc42-\udcaa\udd74-\udd96\ude45-\ude4f\udea3-\udecc]"); + } + expect_exact: 'new RegExp("[\\udc42-\\udcaa\\udd74-\\udd96\\ude45-\\ude4f\\udea3-\\udecc]");' +} -- 2.34.1