stealing more hacks from acorn in the name of speed

author Mihai Bazon <mihai@bazon.net>

Thu, 11 Oct 2012 10:00:58 +0000 (13:00 +0300)

committer Mihai Bazon <mihai@bazon.net>

Thu, 11 Oct 2012 12:17:42 +0000 (15:17 +0300)
author Mihai Bazon <mihai@bazon.net>
Thu, 11 Oct 2012 10:00:58 +0000 (13:00 +0300)
committer Mihai Bazon <mihai@bazon.net>
Thu, 11 Oct 2012 12:17:42 +0000 (15:17 +0300)
diff --git a/lib/parse.js b/lib/parse.js

index 82fc2fd..074e118 100644 (file)
--- a/lib/parse.js
+++ b/lib/parse.js
@@ -126,17 +126,18 @@ var UNICODE = {
      connector_punctuation: new RegExp("[\\u005F\\u203F\\u2040\\u2054\\uFE33\\uFE34\\uFE4D-\\uFE4F\\uFF3F]")
  };
  
-function is_letter(ch) {
-    return UNICODE.letter.test(ch);
+function is_letter(code) {
+    return (code >= 97 && code <= 122)
+        || (code >= 65 && code <= 90)
+        || (code >= 0xaa && UNICODE.letter.test(String.fromCharCode(code)));
  };
  
-function is_digit(ch) {
-    ch = ch.charCodeAt(0);
-    return ch >= 48 && ch <= 57; //XXX: find out if "UnicodeDigit" means something else than 0..9
+function is_digit(code) {
+    return code >= 48 && code <= 57; //XXX: find out if "UnicodeDigit" means something else than 0..9
  };
  
-function is_alphanumeric_char(ch) {
-    return is_digit(ch) || is_letter(ch);
+function is_alphanumeric_char(code) {
+    return is_digit(code) || is_letter(code);
  };
  
  function is_unicode_combining_mark(ch) {
@@ -148,21 +149,21 @@ function is_unicode_connector_punctuation(ch) {
  };
  
  function is_identifier(name) {
-    return /^[a-z_$][a-z0-9_$]*$/i.test(name)
-        && !RESERVED_WORDS(name)
+    return /^[a-z_$][a-z0-9_$]*$/i.test(name) && !RESERVED_WORDS(name);
  };
  
-function is_identifier_start(ch) {
-    return ch == "$" || ch == "_" || is_letter(ch);
+function is_identifier_start(code) {
+    return code == 36 || code == 95 || is_letter(code);
  };
  
  function is_identifier_char(ch) {
-    return is_identifier_start(ch)
+    var code = ch.charCodeAt(0);
+    return is_identifier_start(code)
+        || is_digit(code)
+        || code == 8204 // \u200c: zero-width non-joiner <ZWNJ>
+        || code == 8205 // \u200d: zero-width joiner <ZWJ> (in my ECMA-262 PDF, this is also 200c)
          || is_unicode_combining_mark(ch)
-        || is_digit(ch)
          || is_unicode_connector_punctuation(ch)
-        || ch == "\u200c" // zero-width non-joiner <ZWNJ>
-        || ch == "\u200d" // zero-width joiner <ZWJ> (in my ECMA-262 PDF, this is also 200c)
      ;
  };
  
@@ -280,11 +281,9 @@ function tokenizer($TEXT, filename) {
      };
  
      function read_while(pred) {
-        var ret = "", ch = peek(), i = 0;
-        while (ch && pred(ch, i++)) {
+        var ret = "", ch, i = 0;
+        while ((ch = peek()) && pred(ch, i++))
              ret += next();
-            ch = peek();
-        }
          return ret;
      };
  
@@ -295,29 +294,22 @@ function tokenizer($TEXT, filename) {
      function read_num(prefix) {
          var has_e = false, after_e = false, has_x = false, has_dot = prefix == ".";
          var num = read_while(function(ch, i){
-            if (ch == "x" || ch == "X") {
-                if (has_x) return false;
-                return has_x = true;
-            }
-            if (!has_x && (ch == "E" || ch == "e")) {
-                if (has_e) return false;
-                return has_e = after_e = true;
-            }
-            if (ch == "-") {
-                if (after_e || (i == 0 && !prefix)) return true;
-                return false;
+            var code = ch.charCodeAt(0);
+            switch (code) {
+              case 120: case 88: // xX
+                return has_x ? false : (has_x = true);
+              case 101: case 69: // eE
+                return has_x ? true : has_e ? false : (has_e = after_e = true);
+              case 45: // -
+                return after_e || (i == 0 && !prefix);
+              case 43: // +
+                return after_e;
+              case (after_e = false, 46): // .
+                return (!has_dot && !has_x && !has_e) ? (has_dot = true) : false;
              }
-            if (ch == "+") return after_e;
-            after_e = false;
-            if (ch == ".") {
-                if (!has_dot && !has_x && !has_e)
-                    return has_dot = true;
-                return false;
-            }
-            return is_alphanumeric_char(ch);
+            return is_alphanumeric_char(code);
          });
-        if (prefix)
-            num = prefix + num;
+        if (prefix) num = prefix + num;
          var valid = parse_js_number(num);
          if (!isNaN(valid)) {
              return token("num", valid);
@@ -328,17 +320,17 @@ function tokenizer($TEXT, filename) {
  
      function read_escaped_char(in_string) {
          var ch = next(true, in_string);
-        switch (ch) {
-          case "n" : return "\n";
-          case "r" : return "\r";
-          case "t" : return "\t";
-          case "b" : return "\b";
-          case "v" : return "\u000b";
-          case "f" : return "\f";
-          case "0" : return "\0";
-          case "x" : return String.fromCharCode(hex_bytes(2));
-          case "u" : return String.fromCharCode(hex_bytes(4));
-          case "\n": return "";
+        switch (ch.charCodeAt(0)) {
+          case 110 : return "\n";
+          case 114 : return "\r";
+          case 116 : return "\t";
+          case 98  : return "\b";
+          case 118 : return "\u000b"; // \v
+          case 102 : return "\f";
+          case 48  : return "\0";
+          case 120 : return String.fromCharCode(hex_bytes(2)); // \x
+          case 117 : return String.fromCharCode(hex_bytes(4)); // \u
+          case 10  : return ""; // newline
            default  : return ch;
          }
      };
@@ -354,35 +346,33 @@ function tokenizer($TEXT, filename) {
          return num;
      };
  
-    function read_string() {
-        return with_eof_error("Unterminated string constant", function(){
-            var quote = next(), ret = "";
-            for (;;) {
-                var ch = next(true);
-                if (ch == "\\") {
-                    // read OctalEscapeSequence (XXX: deprecated if "strict mode")
-                    // https://github.com/mishoo/UglifyJS/issues/178
-                    var octal_len = 0, first = null;
-                    ch = read_while(function(ch){
-                        if (ch >= "0" && ch <= "7") {
-                            if (!first) {
-                                first = ch;
-                                return ++octal_len;
-                            }
-                            else if (first <= "3" && octal_len <= 2) return ++octal_len;
-                            else if (first >= "4" && octal_len <= 1) return ++octal_len;
+    var read_string = with_eof_error("Unterminated string constant", function(){
+        var quote = next(), ret = "";
+        for (;;) {
+            var ch = next(true);
+            if (ch == "\\") {
+                // read OctalEscapeSequence (XXX: deprecated if "strict mode")
+                // https://github.com/mishoo/UglifyJS/issues/178
+                var octal_len = 0, first = null;
+                ch = read_while(function(ch){
+                    if (ch >= "0" && ch <= "7") {
+                        if (!first) {
+                            first = ch;
+                            return ++octal_len;
                          }
-                        return false;
-                    });
-                    if (octal_len > 0) ch = String.fromCharCode(parseInt(ch, 8));
-                    else ch = read_escaped_char(true);
-                }
-                else if (ch == quote) break;
-                ret += ch;
+                        else if (first <= "3" && octal_len <= 2) return ++octal_len;
+                        else if (first >= "4" && octal_len <= 1) return ++octal_len;
+                    }
+                    return false;
+                });
+                if (octal_len > 0) ch = String.fromCharCode(parseInt(ch, 8));
+                else ch = read_escaped_char(true);
              }
-            return token("string", ret);
-        });
-    };
+            else if (ch == quote) break;
+            ret += ch;
+        }
+        return token("string", ret);
+    });
  
      function read_line_comment() {
          next();
@@ -397,17 +387,20 @@ function tokenizer($TEXT, filename) {
          return token("comment1", ret, true);
      };
  
-    function read_multiline_comment() {
+    var read_multiline_comment = with_eof_error("Unterminated multiline comment", function(){
          next();
-        return with_eof_error("Unterminated multiline comment", function(){
-            var i = find("*/", true),
-            text = S.text.substring(S.pos, i);
-            S.pos = i + 2;
-            S.line += text.split("\n").length - 1;
-            S.newline_before = S.newline_before || text.indexOf("\n") >= 0;
-            return token("comment2", text, true);
-        });
-    };
+        var i = find("*/", true);
+        var text = S.text.substring(S.pos, i);
+        var a = text.split("\n"), n = a.length;
+        // update stream position
+        S.pos = i + 2;
+        S.line += n - 1;
+        if (n > 1) S.col = a[n - 1].length;
+        else S.col += a[n - 1].length;
+        S.col += 2;
+        S.newline_before = S.newline_before || text.indexOf("\n") >= 0;
+        return token("comment2", text, true);
+    });
  
      function read_name() {
          var backslash = false, name = "", ch, escaped = false, hex;
@@ -432,29 +425,27 @@ function tokenizer($TEXT, filename) {
          return name;
      };
  
-    function read_regexp(regexp) {
-        return with_eof_error("Unterminated regular expression", function(){
-            var prev_backslash = false, ch, in_class = false;
-            while ((ch = next(true))) if (prev_backslash) {
-                regexp += "\\" + ch;
-                prev_backslash = false;
-            } else if (ch == "[") {
-                in_class = true;
-                regexp += ch;
-            } else if (ch == "]" && in_class) {
-                in_class = false;
-                regexp += ch;
-            } else if (ch == "/" && !in_class) {
-                break;
-            } else if (ch == "\\") {
-                prev_backslash = true;
-            } else {
-                regexp += ch;
-            }
-            var mods = read_name();
-            return token("regexp", new RegExp(regexp, mods));
-        });
-    };
+    var read_regexp = with_eof_error("Unterminated regular expression", function(regexp){
+        var prev_backslash = false, ch, in_class = false;
+        while ((ch = next(true))) if (prev_backslash) {
+            regexp += "\\" + ch;
+            prev_backslash = false;
+        } else if (ch == "[") {
+            in_class = true;
+            regexp += ch;
+        } else if (ch == "]" && in_class) {
+            in_class = false;
+            regexp += ch;
+        } else if (ch == "/" && !in_class) {
+            break;
+        } else if (ch == "\\") {
+            prev_backslash = true;
+        } else {
+            regexp += ch;
+        }
+        var mods = read_name();
+        return token("regexp", new RegExp(regexp, mods));
+    });
  
      function read_operator(prefix) {
          function grow(op) {
@@ -488,7 +479,7 @@ function tokenizer($TEXT, filename) {
  
      function handle_dot() {
          next();
-        return is_digit(peek())
+        return is_digit(peek().charCodeAt(0))
              ? read_num(".")
              : token("punc", ".");
      };
@@ -502,12 +493,14 @@ function tokenizer($TEXT, filename) {
      };
  
      function with_eof_error(eof_error, cont) {
-        try {
-            return cont();
-        } catch(ex) {
-            if (ex === EX_EOF) parse_error(eof_error);
-            else throw ex;
-        }
+        return function(x) {
+            try {
+                return cont(x);
+            } catch(ex) {
+                if (ex === EX_EOF) parse_error(eof_error);
+                else throw ex;
+            }
+        };
      };
  
      function next_token(force_regexp) {
@@ -517,13 +510,16 @@ function tokenizer($TEXT, filename) {
          start_token();
          var ch = peek();
          if (!ch) return token("eof");
-        if (is_digit(ch)) return read_num();
-        if (ch == '"' || ch == "'") return read_string();
+        var code = ch.charCodeAt(0);
+        switch (code) {
+          case 34: case 39: return read_string();
+          case 46: return handle_dot();
+          case 47: return handle_slash();
+        }
+        if (is_digit(code)) return read_num();
          if (PUNC_CHARS(ch)) return token("punc", next());
-        if (ch == ".") return handle_dot();
-        if (ch == "/") return handle_slash();
          if (OPERATOR_CHARS(ch)) return read_operator();
-        if (ch == "\\" || is_identifier_start(ch)) return read_word();
+        if (code == 92 || is_identifier_start(code)) return read_word();
          parse_error("Unexpected character '" + ch + "'");
      };
  
@@ -538,7 +534,7 @@ function tokenizer($TEXT, filename) {
  
  /* -----[ Parser (constants) ]----- */
  
-var UNARY_PREFIX = array_to_hash([
+var UNARY_PREFIX = makePredicate([
      "typeof",
      "void",
      "delete",
@@ -550,19 +546,9 @@ var UNARY_PREFIX = array_to_hash([
      "+"
  ]);
  
-var UNARY_POSTFIX = array_to_hash([ "--", "++" ]);
+var UNARY_POSTFIX = makePredicate([ "--", "++" ]);
  
-var ASSIGNMENT = (function(a, ret, i){
-    while (i < a.length) {
-        ret[a[i]] = a[i];
-        i++;
-    }
-    return ret;
-})(
-    [ "=", "+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "|=", "^=", "&=" ],
-    {},
-    0
-);
+var ASSIGNMENT = makePredicate([ "=", "+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "|=", "^=", "&=" ]);
  
  var PRECEDENCE = (function(a, ret){
      for (var i = 0, n = 1; i < a.length; ++i, ++n) {
@@ -680,7 +666,7 @@ function parse($TEXT, options) {
  
      function parenthesised() {
          expect("(");
-        var exp = expression();
+        var exp = expression(true);
          expect(")");
          return exp;
      };
@@ -688,7 +674,7 @@ function parse($TEXT, options) {
      function embed_tokens(parser) {
          return function() {
              var start = S.token;
-            var expr = parser.apply(this, arguments);
+            var expr = parser();
              var end = prev();
              expr.start = start;
              expr.end = end;
@@ -697,6 +683,7 @@ function parse($TEXT, options) {
      };
  
      var statement = embed_tokens(function() {
+        var tmp;
          if (is("operator", "/") || is("operator", "/=")) {
              S.peeked = null;
              S.token = S.input(S.token.value.substr(1)); // force regexp
@@ -738,7 +725,7 @@ function parse($TEXT, options) {
              }
  
            case "keyword":
-            switch (prog1(S.token.value, next)) {
+            switch (tmp = S.token.value, next(), tmp) {
                case "break":
                  return break_cont(AST_Break);
  
@@ -752,7 +739,7 @@ function parse($TEXT, options) {
                case "do":
                  return new AST_Do({
                      body      : in_loop(statement),
-                    condition : (expect_token("keyword", "while"), prog1(parenthesised, semicolon))
+                    condition : (expect_token("keyword", "while"), tmp = parenthesised(), semicolon(), tmp)
                  });
  
                case "while":
@@ -778,30 +765,30 @@ function parse($TEXT, options) {
                               ? (next(), null)
                               : can_insert_semicolon()
                               ? null
-                             : prog1(expression, semicolon) )
+                             : (tmp = expression(true), semicolon(), tmp) )
                  });
  
                case "switch":
                  return new AST_Switch({
                      expression : parenthesised(),
-                    body       : switch_body_()
+                    body       : in_loop(switch_body_)
                  });
  
                case "throw":
                  if (S.token.nlb)
                      croak("Illegal newline after 'throw'");
                  return new AST_Throw({
-                    value: prog1(expression, semicolon)
+                    value: (tmp = expression(true), semicolon(), tmp)
                  });
  
                case "try":
                  return try_();
  
                case "var":
-                return prog1(var_, semicolon);
+                return tmp = var_(), semicolon(), tmp;
  
                case "const":
-                return prog1(const_, semicolon);
+                return tmp = const_(), semicolon(), tmp;
  
                case "with":
                  return new AST_With({
@@ -831,8 +818,8 @@ function parse($TEXT, options) {
          return new AST_LabeledStatement({ body: stat, label: label });
      };
  
-    function simple_statement() {
-        return new AST_SimpleStatement({ body: prog1(expression, semicolon) });
+    function simple_statement(tmp) {
+        return new AST_SimpleStatement({ body: (tmp = expression(true), semicolon(), tmp) });
      };
  
      function break_cont(type) {
@@ -869,9 +856,9 @@ function parse($TEXT, options) {
  
      function regular_for(init) {
          expect(";");
-        var test = is("punc", ";") ? null : expression();
+        var test = is("punc", ";") ? null : expression(true);
          expect(";");
-        var step = is("punc", ")") ? null : expression();
+        var step = is("punc", ")") ? null : expression(true);
          expect(")");
          return new AST_For({
              init      : init,
@@ -883,7 +870,7 @@ function parse($TEXT, options) {
  
      function for_in(init) {
          var lhs = init instanceof AST_Var ? init.definitions[0].name : null;
-        var obj = expression();
+        var obj = expression(true);
          expect(")");
          return new AST_ForIn({
              init   : init,
@@ -911,10 +898,8 @@ function parse($TEXT, options) {
                  next();
                  return a;
              })(true, []),
-            body: embed_tokens(function(){
+            body: (function(loop, labels){
                  ++S.in_function;
-                var loop = S.in_loop;
-                var labels = S.labels;
                  S.in_directives = true;
                  S.in_loop = 0;
                  S.labels = [];
@@ -923,7 +908,7 @@ function parse($TEXT, options) {
                  S.in_loop = loop;
                  S.labels = labels;
                  return a;
-            })()
+            })(S.in_loop, S.labels)
          });
      };
  
@@ -951,17 +936,17 @@ function parse($TEXT, options) {
          return a;
      };
  
-    var switch_body_ = curry(in_loop, function(){
+    function switch_body_() {
          expect("{");
-        var a = [], cur = null, branch = null;
+        var a = [], cur = null, branch = null, tmp;
          while (!is("punc", "}")) {
              if (is("eof")) unexpected();
              if (is("keyword", "case")) {
                  if (branch) branch.end = prev();
                  cur = [];
                  branch = new AST_Case({
-                    start      : prog1(S.token, next),
-                    expression : expression(),
+                    start      : (tmp = S.token, next(), tmp),
+                    expression : expression(true),
                      body       : cur
                  });
                  a.push(branch);
@@ -971,9 +956,9 @@ function parse($TEXT, options) {
                  if (branch) branch.end = prev();
                  cur = [];
                  branch = new AST_Default({
-                    start : prog1(S.token, next, curry(expect, ":")),
+                    start : (tmp = S.token, next(), expect(":"), tmp),
                      body  : cur
-                })
+                });
                  a.push(branch);
              }
              else {
@@ -984,7 +969,7 @@ function parse($TEXT, options) {
          if (branch) branch.end = prev();
          next();
          return a;
-    });
+    };
  
      function try_() {
          var body = block_(), bcatch = null, bfinally = null;
@@ -1110,7 +1095,7 @@ function parse($TEXT, options) {
              switch (start.value) {
                case "(":
                  next();
-                var ex = expression();
+                var ex = expression(true);
                  ex.start = start;
                  ex.end = S.token;
                  expect(")");
@@ -1201,6 +1186,7 @@ function parse($TEXT, options) {
      });
  
      function as_property_name() {
+        var tmp;
          switch (S.token.type) {
            case "num":
            case "string":
@@ -1208,19 +1194,20 @@ function parse($TEXT, options) {
            case "operator":
            case "keyword":
            case "atom":
-            return prog1(S.token.value, next);
+            return (tmp = S.token.value, next(), tmp);
            default:
              unexpected();
          }
      };
  
      function as_name() {
+        var tmp;
          switch (S.token.type) {
            case "name":
            case "operator":
            case "keyword":
            case "atom":
-            return prog1(S.token.value, next);
+            return (tmp = S.token.value, next(), tmp);
            default:
              unexpected();
          }
@@ -1254,7 +1241,7 @@ function parse($TEXT, options) {
          }
          if (is("punc", "[")) {
              next();
-            var prop = expression();
+            var prop = expression(true);
              expect("]");
              return subscripts(new AST_Sub({
                  start      : start,
@@ -1276,17 +1263,17 @@ function parse($TEXT, options) {
      };
  
      var maybe_unary = function(allow_calls) {
-        var start = S.token;
-        if (is("operator") && UNARY_PREFIX[S.token.value]) {
+        var start = S.token, tmp;
+        if (is("operator") && UNARY_PREFIX(S.token.value)) {
              var ex = make_unary(AST_UnaryPrefix,
-                                prog1(S.token.value, next),
+                                (tmp = S.token.value, next(), tmp),
                                  maybe_unary(allow_calls));
              ex.start = start;
              ex.end = prev();
              return ex;
          }
          var val = expr_atom(allow_calls);
-        while (is("operator") && UNARY_POSTFIX[S.token.value] && !S.token.nlb) {
+        while (is("operator") && UNARY_POSTFIX(S.token.value) && !S.token.nlb) {
              val = make_unary(AST_UnaryPostfix, S.token.value, val);
              val.start = start;
              val.end = S.token;
@@ -1357,13 +1344,13 @@ function parse($TEXT, options) {
      var maybe_assign = function(no_in) {
          var start = S.token;
          var left = maybe_conditional(no_in), val = S.token.value;
-        if (is("operator") && ASSIGNMENT[val]) {
+        if (is("operator") && ASSIGNMENT(val)) {
              if (is_assignable(left)) {
                  next();
                  return new AST_Assign({
                      start    : start,
                      left     : left,
-                    operator : ASSIGNMENT[val],
+                    operator : val,
                      right    : maybe_assign(no_in),
                      end      : peek()
                  });
@@ -1374,8 +1361,6 @@ function parse($TEXT, options) {
      };
  
      var expression = function(commas, no_in) {
-        if (arguments.length == 0)
-            commas = true;
          var start = S.token;
          var expr = maybe_assign(no_in);
          if (commas && is("punc", ",")) {
diff --git a/lib/utils.js b/lib/utils.js

index d18e62e..7903966 100644 (file)
--- a/lib/utils.js
+++ b/lib/utils.js
@@ -43,19 +43,6 @@
  
  "use strict";
  
-function curry(f) {
-    var args = slice(arguments, 1);
-    return function() { return f.apply(this, args.concat(slice(arguments))); };
-};
-
-function prog1(ret) {
-    if (ret instanceof Function)
-        ret = ret();
-    for (var i = 1, n = arguments.length; --n > 0; ++i)
-        arguments[i]();
-    return ret;
-};
-
  function array_to_hash(a) {
      var ret = Object.create(null);
      for (var i = 0; i < a.length; ++i)
author	Mihai Bazon <mihai@bazon.net>
	Thu, 11 Oct 2012 10:00:58 +0000 (13:00 +0300)
committer	Mihai Bazon <mihai@bazon.net>
	Thu, 11 Oct 2012 12:17:42 +0000 (15:17 +0300)
lib/parse.js		patch \| blob \| history
lib/utils.js		patch \| blob \| history