From: Mihai Bazon Date: Thu, 11 Oct 2012 10:00:58 +0000 (+0300) Subject: stealing more hacks from acorn in the name of speed X-Git-Url: https://git.ndcode.org/public/gitweb.cgi?a=commitdiff_plain;h=fb5c01c073d06034815d5f3b782fd11cbdf6d6f5;p=UglifyJS.git stealing more hacks from acorn in the name of speed --- diff --git a/lib/parse.js b/lib/parse.js index 82fc2fd5..074e118a 100644 --- a/lib/parse.js +++ b/lib/parse.js @@ -126,17 +126,18 @@ var UNICODE = { connector_punctuation: new RegExp("[\\u005F\\u203F\\u2040\\u2054\\uFE33\\uFE34\\uFE4D-\\uFE4F\\uFF3F]") }; -function is_letter(ch) { - return UNICODE.letter.test(ch); +function is_letter(code) { + return (code >= 97 && code <= 122) + || (code >= 65 && code <= 90) + || (code >= 0xaa && UNICODE.letter.test(String.fromCharCode(code))); }; -function is_digit(ch) { - ch = ch.charCodeAt(0); - return ch >= 48 && ch <= 57; //XXX: find out if "UnicodeDigit" means something else than 0..9 +function is_digit(code) { + return code >= 48 && code <= 57; //XXX: find out if "UnicodeDigit" means something else than 0..9 }; -function is_alphanumeric_char(ch) { - return is_digit(ch) || is_letter(ch); +function is_alphanumeric_char(code) { + return is_digit(code) || is_letter(code); }; function is_unicode_combining_mark(ch) { @@ -148,21 +149,21 @@ function is_unicode_connector_punctuation(ch) { }; function is_identifier(name) { - return /^[a-z_$][a-z0-9_$]*$/i.test(name) - && !RESERVED_WORDS(name) + return /^[a-z_$][a-z0-9_$]*$/i.test(name) && !RESERVED_WORDS(name); }; -function is_identifier_start(ch) { - return ch == "$" || ch == "_" || is_letter(ch); +function is_identifier_start(code) { + return code == 36 || code == 95 || is_letter(code); }; function is_identifier_char(ch) { - return is_identifier_start(ch) + var code = ch.charCodeAt(0); + return is_identifier_start(code) + || is_digit(code) + || code == 8204 // \u200c: zero-width non-joiner + || code == 8205 // \u200d: zero-width joiner (in my ECMA-262 PDF, this is also 200c) || is_unicode_combining_mark(ch) - || is_digit(ch) || is_unicode_connector_punctuation(ch) - || ch == "\u200c" // zero-width non-joiner - || ch == "\u200d" // zero-width joiner (in my ECMA-262 PDF, this is also 200c) ; }; @@ -280,11 +281,9 @@ function tokenizer($TEXT, filename) { }; function read_while(pred) { - var ret = "", ch = peek(), i = 0; - while (ch && pred(ch, i++)) { + var ret = "", ch, i = 0; + while ((ch = peek()) && pred(ch, i++)) ret += next(); - ch = peek(); - } return ret; }; @@ -295,29 +294,22 @@ function tokenizer($TEXT, filename) { function read_num(prefix) { var has_e = false, after_e = false, has_x = false, has_dot = prefix == "."; var num = read_while(function(ch, i){ - if (ch == "x" || ch == "X") { - if (has_x) return false; - return has_x = true; - } - if (!has_x && (ch == "E" || ch == "e")) { - if (has_e) return false; - return has_e = after_e = true; - } - if (ch == "-") { - if (after_e || (i == 0 && !prefix)) return true; - return false; + var code = ch.charCodeAt(0); + switch (code) { + case 120: case 88: // xX + return has_x ? false : (has_x = true); + case 101: case 69: // eE + return has_x ? true : has_e ? false : (has_e = after_e = true); + case 45: // - + return after_e || (i == 0 && !prefix); + case 43: // + + return after_e; + case (after_e = false, 46): // . + return (!has_dot && !has_x && !has_e) ? (has_dot = true) : false; } - if (ch == "+") return after_e; - after_e = false; - if (ch == ".") { - if (!has_dot && !has_x && !has_e) - return has_dot = true; - return false; - } - return is_alphanumeric_char(ch); + return is_alphanumeric_char(code); }); - if (prefix) - num = prefix + num; + if (prefix) num = prefix + num; var valid = parse_js_number(num); if (!isNaN(valid)) { return token("num", valid); @@ -328,17 +320,17 @@ function tokenizer($TEXT, filename) { function read_escaped_char(in_string) { var ch = next(true, in_string); - switch (ch) { - case "n" : return "\n"; - case "r" : return "\r"; - case "t" : return "\t"; - case "b" : return "\b"; - case "v" : return "\u000b"; - case "f" : return "\f"; - case "0" : return "\0"; - case "x" : return String.fromCharCode(hex_bytes(2)); - case "u" : return String.fromCharCode(hex_bytes(4)); - case "\n": return ""; + switch (ch.charCodeAt(0)) { + case 110 : return "\n"; + case 114 : return "\r"; + case 116 : return "\t"; + case 98 : return "\b"; + case 118 : return "\u000b"; // \v + case 102 : return "\f"; + case 48 : return "\0"; + case 120 : return String.fromCharCode(hex_bytes(2)); // \x + case 117 : return String.fromCharCode(hex_bytes(4)); // \u + case 10 : return ""; // newline default : return ch; } }; @@ -354,35 +346,33 @@ function tokenizer($TEXT, filename) { return num; }; - function read_string() { - return with_eof_error("Unterminated string constant", function(){ - var quote = next(), ret = ""; - for (;;) { - var ch = next(true); - if (ch == "\\") { - // read OctalEscapeSequence (XXX: deprecated if "strict mode") - // https://github.com/mishoo/UglifyJS/issues/178 - var octal_len = 0, first = null; - ch = read_while(function(ch){ - if (ch >= "0" && ch <= "7") { - if (!first) { - first = ch; - return ++octal_len; - } - else if (first <= "3" && octal_len <= 2) return ++octal_len; - else if (first >= "4" && octal_len <= 1) return ++octal_len; + var read_string = with_eof_error("Unterminated string constant", function(){ + var quote = next(), ret = ""; + for (;;) { + var ch = next(true); + if (ch == "\\") { + // read OctalEscapeSequence (XXX: deprecated if "strict mode") + // https://github.com/mishoo/UglifyJS/issues/178 + var octal_len = 0, first = null; + ch = read_while(function(ch){ + if (ch >= "0" && ch <= "7") { + if (!first) { + first = ch; + return ++octal_len; } - return false; - }); - if (octal_len > 0) ch = String.fromCharCode(parseInt(ch, 8)); - else ch = read_escaped_char(true); - } - else if (ch == quote) break; - ret += ch; + else if (first <= "3" && octal_len <= 2) return ++octal_len; + else if (first >= "4" && octal_len <= 1) return ++octal_len; + } + return false; + }); + if (octal_len > 0) ch = String.fromCharCode(parseInt(ch, 8)); + else ch = read_escaped_char(true); } - return token("string", ret); - }); - }; + else if (ch == quote) break; + ret += ch; + } + return token("string", ret); + }); function read_line_comment() { next(); @@ -397,17 +387,20 @@ function tokenizer($TEXT, filename) { return token("comment1", ret, true); }; - function read_multiline_comment() { + var read_multiline_comment = with_eof_error("Unterminated multiline comment", function(){ next(); - return with_eof_error("Unterminated multiline comment", function(){ - var i = find("*/", true), - text = S.text.substring(S.pos, i); - S.pos = i + 2; - S.line += text.split("\n").length - 1; - S.newline_before = S.newline_before || text.indexOf("\n") >= 0; - return token("comment2", text, true); - }); - }; + var i = find("*/", true); + var text = S.text.substring(S.pos, i); + var a = text.split("\n"), n = a.length; + // update stream position + S.pos = i + 2; + S.line += n - 1; + if (n > 1) S.col = a[n - 1].length; + else S.col += a[n - 1].length; + S.col += 2; + S.newline_before = S.newline_before || text.indexOf("\n") >= 0; + return token("comment2", text, true); + }); function read_name() { var backslash = false, name = "", ch, escaped = false, hex; @@ -432,29 +425,27 @@ function tokenizer($TEXT, filename) { return name; }; - function read_regexp(regexp) { - return with_eof_error("Unterminated regular expression", function(){ - var prev_backslash = false, ch, in_class = false; - while ((ch = next(true))) if (prev_backslash) { - regexp += "\\" + ch; - prev_backslash = false; - } else if (ch == "[") { - in_class = true; - regexp += ch; - } else if (ch == "]" && in_class) { - in_class = false; - regexp += ch; - } else if (ch == "/" && !in_class) { - break; - } else if (ch == "\\") { - prev_backslash = true; - } else { - regexp += ch; - } - var mods = read_name(); - return token("regexp", new RegExp(regexp, mods)); - }); - }; + var read_regexp = with_eof_error("Unterminated regular expression", function(regexp){ + var prev_backslash = false, ch, in_class = false; + while ((ch = next(true))) if (prev_backslash) { + regexp += "\\" + ch; + prev_backslash = false; + } else if (ch == "[") { + in_class = true; + regexp += ch; + } else if (ch == "]" && in_class) { + in_class = false; + regexp += ch; + } else if (ch == "/" && !in_class) { + break; + } else if (ch == "\\") { + prev_backslash = true; + } else { + regexp += ch; + } + var mods = read_name(); + return token("regexp", new RegExp(regexp, mods)); + }); function read_operator(prefix) { function grow(op) { @@ -488,7 +479,7 @@ function tokenizer($TEXT, filename) { function handle_dot() { next(); - return is_digit(peek()) + return is_digit(peek().charCodeAt(0)) ? read_num(".") : token("punc", "."); }; @@ -502,12 +493,14 @@ function tokenizer($TEXT, filename) { }; function with_eof_error(eof_error, cont) { - try { - return cont(); - } catch(ex) { - if (ex === EX_EOF) parse_error(eof_error); - else throw ex; - } + return function(x) { + try { + return cont(x); + } catch(ex) { + if (ex === EX_EOF) parse_error(eof_error); + else throw ex; + } + }; }; function next_token(force_regexp) { @@ -517,13 +510,16 @@ function tokenizer($TEXT, filename) { start_token(); var ch = peek(); if (!ch) return token("eof"); - if (is_digit(ch)) return read_num(); - if (ch == '"' || ch == "'") return read_string(); + var code = ch.charCodeAt(0); + switch (code) { + case 34: case 39: return read_string(); + case 46: return handle_dot(); + case 47: return handle_slash(); + } + if (is_digit(code)) return read_num(); if (PUNC_CHARS(ch)) return token("punc", next()); - if (ch == ".") return handle_dot(); - if (ch == "/") return handle_slash(); if (OPERATOR_CHARS(ch)) return read_operator(); - if (ch == "\\" || is_identifier_start(ch)) return read_word(); + if (code == 92 || is_identifier_start(code)) return read_word(); parse_error("Unexpected character '" + ch + "'"); }; @@ -538,7 +534,7 @@ function tokenizer($TEXT, filename) { /* -----[ Parser (constants) ]----- */ -var UNARY_PREFIX = array_to_hash([ +var UNARY_PREFIX = makePredicate([ "typeof", "void", "delete", @@ -550,19 +546,9 @@ var UNARY_PREFIX = array_to_hash([ "+" ]); -var UNARY_POSTFIX = array_to_hash([ "--", "++" ]); +var UNARY_POSTFIX = makePredicate([ "--", "++" ]); -var ASSIGNMENT = (function(a, ret, i){ - while (i < a.length) { - ret[a[i]] = a[i]; - i++; - } - return ret; -})( - [ "=", "+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "|=", "^=", "&=" ], - {}, - 0 -); +var ASSIGNMENT = makePredicate([ "=", "+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "|=", "^=", "&=" ]); var PRECEDENCE = (function(a, ret){ for (var i = 0, n = 1; i < a.length; ++i, ++n) { @@ -680,7 +666,7 @@ function parse($TEXT, options) { function parenthesised() { expect("("); - var exp = expression(); + var exp = expression(true); expect(")"); return exp; }; @@ -688,7 +674,7 @@ function parse($TEXT, options) { function embed_tokens(parser) { return function() { var start = S.token; - var expr = parser.apply(this, arguments); + var expr = parser(); var end = prev(); expr.start = start; expr.end = end; @@ -697,6 +683,7 @@ function parse($TEXT, options) { }; var statement = embed_tokens(function() { + var tmp; if (is("operator", "/") || is("operator", "/=")) { S.peeked = null; S.token = S.input(S.token.value.substr(1)); // force regexp @@ -738,7 +725,7 @@ function parse($TEXT, options) { } case "keyword": - switch (prog1(S.token.value, next)) { + switch (tmp = S.token.value, next(), tmp) { case "break": return break_cont(AST_Break); @@ -752,7 +739,7 @@ function parse($TEXT, options) { case "do": return new AST_Do({ body : in_loop(statement), - condition : (expect_token("keyword", "while"), prog1(parenthesised, semicolon)) + condition : (expect_token("keyword", "while"), tmp = parenthesised(), semicolon(), tmp) }); case "while": @@ -778,30 +765,30 @@ function parse($TEXT, options) { ? (next(), null) : can_insert_semicolon() ? null - : prog1(expression, semicolon) ) + : (tmp = expression(true), semicolon(), tmp) ) }); case "switch": return new AST_Switch({ expression : parenthesised(), - body : switch_body_() + body : in_loop(switch_body_) }); case "throw": if (S.token.nlb) croak("Illegal newline after 'throw'"); return new AST_Throw({ - value: prog1(expression, semicolon) + value: (tmp = expression(true), semicolon(), tmp) }); case "try": return try_(); case "var": - return prog1(var_, semicolon); + return tmp = var_(), semicolon(), tmp; case "const": - return prog1(const_, semicolon); + return tmp = const_(), semicolon(), tmp; case "with": return new AST_With({ @@ -831,8 +818,8 @@ function parse($TEXT, options) { return new AST_LabeledStatement({ body: stat, label: label }); }; - function simple_statement() { - return new AST_SimpleStatement({ body: prog1(expression, semicolon) }); + function simple_statement(tmp) { + return new AST_SimpleStatement({ body: (tmp = expression(true), semicolon(), tmp) }); }; function break_cont(type) { @@ -869,9 +856,9 @@ function parse($TEXT, options) { function regular_for(init) { expect(";"); - var test = is("punc", ";") ? null : expression(); + var test = is("punc", ";") ? null : expression(true); expect(";"); - var step = is("punc", ")") ? null : expression(); + var step = is("punc", ")") ? null : expression(true); expect(")"); return new AST_For({ init : init, @@ -883,7 +870,7 @@ function parse($TEXT, options) { function for_in(init) { var lhs = init instanceof AST_Var ? init.definitions[0].name : null; - var obj = expression(); + var obj = expression(true); expect(")"); return new AST_ForIn({ init : init, @@ -911,10 +898,8 @@ function parse($TEXT, options) { next(); return a; })(true, []), - body: embed_tokens(function(){ + body: (function(loop, labels){ ++S.in_function; - var loop = S.in_loop; - var labels = S.labels; S.in_directives = true; S.in_loop = 0; S.labels = []; @@ -923,7 +908,7 @@ function parse($TEXT, options) { S.in_loop = loop; S.labels = labels; return a; - })() + })(S.in_loop, S.labels) }); }; @@ -951,17 +936,17 @@ function parse($TEXT, options) { return a; }; - var switch_body_ = curry(in_loop, function(){ + function switch_body_() { expect("{"); - var a = [], cur = null, branch = null; + var a = [], cur = null, branch = null, tmp; while (!is("punc", "}")) { if (is("eof")) unexpected(); if (is("keyword", "case")) { if (branch) branch.end = prev(); cur = []; branch = new AST_Case({ - start : prog1(S.token, next), - expression : expression(), + start : (tmp = S.token, next(), tmp), + expression : expression(true), body : cur }); a.push(branch); @@ -971,9 +956,9 @@ function parse($TEXT, options) { if (branch) branch.end = prev(); cur = []; branch = new AST_Default({ - start : prog1(S.token, next, curry(expect, ":")), + start : (tmp = S.token, next(), expect(":"), tmp), body : cur - }) + }); a.push(branch); } else { @@ -984,7 +969,7 @@ function parse($TEXT, options) { if (branch) branch.end = prev(); next(); return a; - }); + }; function try_() { var body = block_(), bcatch = null, bfinally = null; @@ -1110,7 +1095,7 @@ function parse($TEXT, options) { switch (start.value) { case "(": next(); - var ex = expression(); + var ex = expression(true); ex.start = start; ex.end = S.token; expect(")"); @@ -1201,6 +1186,7 @@ function parse($TEXT, options) { }); function as_property_name() { + var tmp; switch (S.token.type) { case "num": case "string": @@ -1208,19 +1194,20 @@ function parse($TEXT, options) { case "operator": case "keyword": case "atom": - return prog1(S.token.value, next); + return (tmp = S.token.value, next(), tmp); default: unexpected(); } }; function as_name() { + var tmp; switch (S.token.type) { case "name": case "operator": case "keyword": case "atom": - return prog1(S.token.value, next); + return (tmp = S.token.value, next(), tmp); default: unexpected(); } @@ -1254,7 +1241,7 @@ function parse($TEXT, options) { } if (is("punc", "[")) { next(); - var prop = expression(); + var prop = expression(true); expect("]"); return subscripts(new AST_Sub({ start : start, @@ -1276,17 +1263,17 @@ function parse($TEXT, options) { }; var maybe_unary = function(allow_calls) { - var start = S.token; - if (is("operator") && UNARY_PREFIX[S.token.value]) { + var start = S.token, tmp; + if (is("operator") && UNARY_PREFIX(S.token.value)) { var ex = make_unary(AST_UnaryPrefix, - prog1(S.token.value, next), + (tmp = S.token.value, next(), tmp), maybe_unary(allow_calls)); ex.start = start; ex.end = prev(); return ex; } var val = expr_atom(allow_calls); - while (is("operator") && UNARY_POSTFIX[S.token.value] && !S.token.nlb) { + while (is("operator") && UNARY_POSTFIX(S.token.value) && !S.token.nlb) { val = make_unary(AST_UnaryPostfix, S.token.value, val); val.start = start; val.end = S.token; @@ -1357,13 +1344,13 @@ function parse($TEXT, options) { var maybe_assign = function(no_in) { var start = S.token; var left = maybe_conditional(no_in), val = S.token.value; - if (is("operator") && ASSIGNMENT[val]) { + if (is("operator") && ASSIGNMENT(val)) { if (is_assignable(left)) { next(); return new AST_Assign({ start : start, left : left, - operator : ASSIGNMENT[val], + operator : val, right : maybe_assign(no_in), end : peek() }); @@ -1374,8 +1361,6 @@ function parse($TEXT, options) { }; var expression = function(commas, no_in) { - if (arguments.length == 0) - commas = true; var start = S.token; var expr = maybe_assign(no_in); if (commas && is("punc", ",")) { diff --git a/lib/utils.js b/lib/utils.js index d18e62ee..79039665 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -43,19 +43,6 @@ "use strict"; -function curry(f) { - var args = slice(arguments, 1); - return function() { return f.apply(this, args.concat(slice(arguments))); }; -}; - -function prog1(ret) { - if (ret instanceof Function) - ret = ret(); - for (var i = 1, n = arguments.length; --n > 0; ++i) - arguments[i](); - return ret; -}; - function array_to_hash(a) { var ret = Object.create(null); for (var i = 0; i < a.length; ++i)