WIP
authorMihai Bazon <mihai@bazon.net>
Sun, 27 May 2012 14:25:31 +0000 (17:25 +0300)
committerMihai Bazon <mihai@bazon.net>
Sun, 3 Jun 2012 20:10:31 +0000 (23:10 +0300)
lib/ast.js
lib/node.js
lib/output.js [new file with mode: 0644]
lib/parse.js
lib/test.js
lib/utils.js

index 9920e1b..683f602 100644 (file)
@@ -18,42 +18,75 @@ function DEFNODE(type, props, methods, base) {
     if (type) {
         ctor.prototype.TYPE = ctor.TYPE = type;
     }
-    if (methods) for (var i in methods) if (HOP(methods, i)) {
+    if (methods) for (i in methods) if (HOP(methods, i)) {
         ctor.prototype[i] = methods[i];
     }
     return ctor;
 };
 
-var AST_Token = DEFNODE("Token", "type value line col pos endpos nlb", {
+var AST_Token = DEFNODE("Token", "type value line col pos endpos nlb comments_before", {
 
 }, null);
 
 var AST_Node = DEFNODE("Node", "start end", {
-
+    renew: function(args) {
+        var ctor = this.CTOR, props = ctor.props;
+        for (var i in props) if (!HOP(args, i)) args[i] = this[i];
+        return new ctor(args);
+    },
+    walk: function(w) {
+        w._visit(this);
+    }
 }, null);
 
 var AST_Directive = DEFNODE("Directive", "value", {
-
+    print: function(output) {
+        output.string(this.value);
+    }
 });
 
 var AST_Debugger = DEFNODE("Debugger", null, {
-
+    print: function(output) {
+        output.print("debugger");
+    }
 });
 
 var AST_Parenthesized = DEFNODE("Parenthesized", "expression", {
-    documentation: "Represents an expression which is always parenthesized.  Used for the \
-conditions in IF/WHILE."
+    $documentation: "Represents an expression which is always parenthesized.  Used for the \
+conditions in IF/WHILE/DO and expression in SWITCH/WITH.",
+    walk: function(w) {
+        w._visit(this, function(){
+            this.expression.walk(w);
+        });
+    }
 });
 
 var AST_Bracketed = DEFNODE("Bracketed", "body", {
-    documentation: "Represents a block of statements that are always included in brackets. \
-Used for bodies of FUNCTION/TRY/CATCH/THROW/SWITCH."
+    $documentation: "Represents a block of statements that are always included in brackets. \
+Used for bodies of FUNCTION/TRY/CATCH/THROW/SWITCH.",
+    walk: function(w) {
+        w._visit(this, function(){
+            this.body.forEach(function(stat){
+                stat.walk(w);
+            });
+        });
+    }
 });
 
 /* -----[ loops ]----- */
 
 var AST_LabeledStatement = DEFNODE("LabeledStatement", "label body", {
-
+    walk: function(w) {
+        w._visit(this, function(){
+            if (this.label) this.label.walk(w);
+            if (this.body) {
+                if (this.body instanceof Array)
+                    AST_Bracketed.prototype.walk.call(this, w);
+                else
+                    this.body.walk(w);
+            }
+        });
+    }
 });
 
 var AST_Statement = DEFNODE("Statement", null, {
@@ -61,39 +94,64 @@ var AST_Statement = DEFNODE("Statement", null, {
 }, AST_LabeledStatement);
 
 var AST_Do = DEFNODE("Do", "condition", {
-
+    walk: function(w) {
+        w._visit(this, function(){
+            this.condition.walk(w);
+            AST_LabeledStatement.prototype.walk.call(this, w);
+        });
+    }
 }, AST_LabeledStatement);
 
 var AST_While = DEFNODE("While", "condition", {
-
+    walk: function(w) {
+        w._visit(this, function(){
+            this.condition.walk(w);
+            AST_LabeledStatement.prototype.walk.call(this, w);
+        });
+    }
 }, AST_LabeledStatement);
 
 var AST_For = DEFNODE("For", "init condition step", {
-
+    walk: function(w) {
+        w._visit(this, function(){
+            if (this.init) this.init.walk(w);
+            if (this.condition) this.condition.walk(w);
+            if (this.step) this.step.walk(w);
+            AST_LabeledStatement.prototype.walk.call(this, w);
+        });
+    }
 }, AST_LabeledStatement);
 
 var AST_ForIn = DEFNODE("ForIn", "init name object", {
-
+    walk: function(w) {
+        w._visit(this, function(){
+            if (this.init) this.init.walk(w);
+            this.object.walk(w);
+            AST_LabeledStatement.prototype.walk.call(this, w);
+        });
+    }
 }, AST_LabeledStatement);
 
 var AST_With = DEFNODE("With", "expression body", {
-
-});
-
-var AST_LoopControl = DEFNODE("LoopControl", "label", {
-
+    walk: function(w) {
+        w._visit(this, function(){
+            this.expression.walk(w);
+            AST_LabeledStatement.prototype.walk.call(this, w);
+        });
+    }
 });
-var AST_Break = DEFNODE("Break", null, {
-
-}, AST_LoopControl);
-var AST_Continue = DEFNODE("Continue", null, {
-
-}, AST_LoopControl);
 
 /* -----[ functions ]----- */
 
 var AST_Scope = DEFNODE("Scope", "identifiers body", {
-
+    walk: function(w) {
+        w._visit(this, function(){
+            if (this.identifiers) this.identifiers.forEach(function(el){
+                el.walk(w);
+            });
+            AST_LabeledStatement.prototype.walk.call(this, w);
+        });
+    }
 });
 
 var AST_Toplevel = DEFNODE("Toplevel", null, {
@@ -101,37 +159,84 @@ var AST_Toplevel = DEFNODE("Toplevel", null, {
 }, AST_Scope);
 
 var AST_Lambda = DEFNODE("Lambda", "name argnames", {
-
+    walk: function(w) {
+        w._visit(this, function(){
+            if (this.name) this.name.walk(w);
+            this.argnames.forEach(function(el){
+                el.walk(w);
+            });
+            AST_Scope.prototype.walk.call(this, w);
+        });
+    }
 }, AST_Scope);
+
 var AST_Function = DEFNODE("Function", null, {
 
 }, AST_Lambda);
+
 var AST_Defun = DEFNODE("Defun", null, {
 
 }, AST_Function);
 
 /* -----[ JUMPS ]----- */
 
-var AST_Jump = DEFNODE("Jump", "value");
+var AST_Jump = DEFNODE("Jump", null, {
 
-var AST_Return = DEFNODE("Return", null, {
+});
 
+var AST_Exit = DEFNODE("Exit", "value", {
+    walk: function(w) {
+        w._visit(this, function(){
+            if (this.value) this.value.walk(w);
+        });
+    }
 }, AST_Jump);
 
+var AST_Return = DEFNODE("Return", null, {
+
+}, AST_Exit);
+
 var AST_Throw = DEFNODE("Throw", null, {
 
+}, AST_Exit);
+
+var AST_LoopControl = DEFNODE("LoopControl", "label", {
+    walk: function(w) {
+        w._visit(this, function(){
+            if (this.label) this.label.walk(w);
+        });
+    }
 }, AST_Jump);
 
+var AST_Break = DEFNODE("Break", null, {
+
+}, AST_LoopControl);
+
+var AST_Continue = DEFNODE("Continue", null, {
+
+}, AST_LoopControl);
+
 /* -----[ IF ]----- */
 
 var AST_If = DEFNODE("If", "condition consequent alternative", {
-
+    walk: function(w) {
+        w._visit(this, function(){
+            this.condition.walk(w);
+            this.consequent.walk(w);
+            if (this.alternative) this.alternative.walk(w);
+        });
+    }
 });
 
 /* -----[ SWITCH ]----- */
 
 var AST_Switch = DEFNODE("Switch", "expression", {
-
+    walk: function(w) {
+        w._visit(this, function(){
+            this.expression.walk(w);
+            AST_LabeledStatement.prototype.walk.call(this, w);
+        });
+    }
 }, AST_LabeledStatement);
 
 var AST_SwitchBlock = DEFNODE("SwitchBlock", null, {
@@ -143,21 +248,41 @@ var AST_SwitchBranch = DEFNODE("SwitchBranch", "body", {
 });
 
 var AST_Default = DEFNODE("Default", null, {
-
+    walk: function(w) {
+        w._visit(this, function(){
+            AST_Statement.prototype.walk.call(this, w);
+        });
+    }
 }, AST_SwitchBranch);
 
 var AST_Case = DEFNODE("Case", "expression", {
-
+    walk: function(w) {
+        w._visit(this, function(){
+            this.expression.walk(w);
+            AST_Statement.prototype.walk.call(this, w);
+        });
+    }
 }, AST_SwitchBranch);
 
 /* -----[ EXCEPTIONS ]----- */
 
 var AST_Try = DEFNODE("Try", "btry bcatch bfinally", {
-
+    walk: function(w) {
+        w._visit(this, function(){
+            this.btry.walk(w);
+            if (this.bcatch) this.bcatch.walk(w);
+            if (this.bfinally) this.bfinally.walk(w);
+        });
+    }
 });
 
 var AST_Catch = DEFNODE("Catch", "argname body", {
-
+    walk: function(w) {
+        w._visit(this, function(){
+            this.argname.walk(w);
+            this.body.walk(w);
+        });
+    }
 });
 
 var AST_Finally = DEFNODE("Finally", null, {
@@ -167,7 +292,13 @@ var AST_Finally = DEFNODE("Finally", null, {
 /* -----[ VAR/CONST ]----- */
 
 var AST_Definitions = DEFNODE("Definitions", "definitions", {
-
+    walk: function(w) {
+        w._visit(this, function(){
+            this.definitions.forEach(function(el){
+                el.walk(w);
+            });
+        });
+    }
 });
 
 var AST_Var = DEFNODE("Var", null, {
@@ -179,13 +310,25 @@ var AST_Const = DEFNODE("Const", null, {
 }, AST_Definitions);
 
 var AST_VarDef = DEFNODE("VarDef", "name value", {
-
+    walk: function(w) {
+        w._visit(this, function(){
+            this.name.walk(w);
+            if (this.value) this.value.walk(w);
+        });
+    }
 });
 
 /* -----[ OTHER ]----- */
 
 var AST_Call = DEFNODE("Call", "expression args", {
-
+    walk: function(w) {
+        w._visit(this, function(){
+            this.expression.walk(w);
+            this.args.forEach(function(el){
+                el.walk(w);
+            });
+        });
+    }
 });
 
 var AST_New = DEFNODE("New", null, {
@@ -193,7 +336,12 @@ var AST_New = DEFNODE("New", null, {
 }, AST_Call);
 
 var AST_Seq = DEFNODE("Seq", "first second", {
-
+    walk: function(w) {
+        w._visit(this, function(){
+            this.first.walk(w);
+            this.second.walk(w);
+        });
+    }
 });
 
 var AST_PropAccess = DEFNODE("PropAccess", "expression property", {
@@ -201,15 +349,28 @@ var AST_PropAccess = DEFNODE("PropAccess", "expression property", {
 });
 
 var AST_Dot = DEFNODE("Dot", null, {
-
+    walk: function(w) {
+        w._visit(this, function(){
+            this.expression.walk(w);
+        });
+    }
 }, AST_PropAccess);
 
 var AST_Sub = DEFNODE("Sub", null, {
-
+    walk: function(w) {
+        w._visit(this, function(){
+            this.expression.walk(w);
+            this.property.walk(w);
+        });
+    }
 }, AST_PropAccess);
 
 var AST_Unary = DEFNODE("Unary", "operator expression", {
-
+    walk: function(w) {
+        w._visit(this, function(){
+            this.expression.walk(w);
+        });
+    }
 });
 
 var AST_UnaryPrefix = DEFNODE("UnaryPrefix", null, {
@@ -221,77 +382,129 @@ var AST_UnaryPostfix = DEFNODE("UnaryPostfix", null, {
 }, AST_Unary);
 
 var AST_Binary = DEFNODE("Binary", "left operator right", {
-
+    walk: function(w) {
+        w._visit(this, function(){
+            this.left.walk(w);
+            this.right.walk(w);
+        });
+    }
 });
 
 var AST_Conditional = DEFNODE("Conditional", "condition consequent alternative", {
-
+    walk: function(w) {
+        w._visit(this, function(){
+            this.condition.walk(w);
+            this.consequent.walk(w);
+            this.alternative.walk(w);
+        });
+    }
 });
 
-var AST_Assign = DEFNODE("Assign", "left operator right", {
+var AST_Assign = DEFNODE("Assign", null, {
 
-});
+}, AST_Binary);
 
 /* -----[ LITERALS ]----- */
 
-var AST_RegExp = DEFNODE("Regexp", "pattern mods", {
-
-});
-
 var AST_Array = DEFNODE("Array", "elements", {
-
+    walk: function(w) {
+        w._visit(this, function(){
+            this.elements.forEach(function(el){
+                el.walk(w);
+            });
+        });
+    }
 });
 
 var AST_Object = DEFNODE("Object", "properties", {
-
+    walk: function(w) {
+        w._visit(this, function(){
+            this.properties.forEach(function(prop){
+                prop.walk(w);
+            });
+        });
+    }
 });
 
 var AST_ObjectProperty = DEFNODE("ObjectProperty");
 
 var AST_ObjectKeyVal = DEFNODE("ObjectKeyval", "key value", {
-
+    walk: function(w) {
+        w._visit(this, function(){
+            this.value.walk(w);
+        });
+    }
 }, AST_ObjectProperty);
 
 var AST_ObjectSetter = DEFNODE("ObjectSetter", "name func", {
-
+    walk: function(w) {
+        w._visit(this, function(){
+            this.func.walk(w);
+        });
+    }
 }, AST_ObjectProperty);
 
 var AST_ObjectGetter = DEFNODE("ObjectGetter", "name func", {
-
+    walk: function(w) {
+        w._visit(this, function(){
+            this.func.walk(w);
+        });
+    }
 }, AST_ObjectProperty);
 
 var AST_Symbol = DEFNODE("Symbol", "name", {
+});
+
+var AST_This = DEFNODE("This", null, {
+
+}, AST_Symbol);
+
+var AST_SymbolRef = DEFNODE("SymbolRef", "scope symbol", {
+
+}, AST_Symbol);
 
+var AST_Label = DEFNODE("Label", null, {
+
+}, AST_SymbolRef);
+
+var AST_Constant = DEFNODE("Constant", null, {
+    getValue: function() {
+        return this.value;
+    }
 });
 
 var AST_String = DEFNODE("String", "value", {
 
-});
+}, AST_Constant);
 
 var AST_Number = DEFNODE("Number", "value", {
 
-});
-
-var AST_Boolean = DEFNODE("Boolean", "value", {
+}, AST_Constant);
 
-});
+var AST_RegExp = DEFNODE("Regexp", "pattern mods", {
+    getValue: function() {
+        return this._regexp || (
+            this._regexp = new RegExp(this.pattern, this.mods)
+        );
+    }
+}, AST_Constant);
 
 var AST_Atom = DEFNODE("Atom", null, {
 
-});
+}, AST_Constant);
 
 var AST_Null = DEFNODE("Null", null, {
-
+    getValue: function() { return null }
 }, AST_Atom);
 
 var AST_Undefined = DEFNODE("Undefined", null, {
-
+    getValue: function() { return (function(){}()) }
 }, AST_Atom);
 
 var AST_False = DEFNODE("False", null, {
-
+    getValue: function() { return false }
 }, AST_Atom);
 
 var AST_True = DEFNODE("True", null, {
-
+    getValue: function() { return true }
 }, AST_Atom);
index 36bc18a..9089a5f 100755 (executable)
@@ -1,20 +1,35 @@
 #! /usr/bin/env node
 
-var fs = require("fs");
+(function(){
 
-function load_global(file) {
-    var code = fs.readFileSync(file, "utf8");
-    return global.eval(code);
-};
+    var fs = require("fs");
+    var vm = require("vm");
+    var sys = require("util");
 
-load_global("./utils.js");
-load_global("./ast.js");
-load_global("./parse.js");
+    function load_global(file) {
+        var code = fs.readFileSync(file, "utf8");
+        return vm.runInThisContext(code, file);
+    };
 
-/// 
+    load_global("./utils.js");
+    load_global("./output.js");
+    load_global("./ast.js");
+    load_global("./parse.js");
 
-var filename = process.argv[2];
-console.time("parse");
-var ast = parse(fs.readFileSync(filename, "utf8"));
-console.timeEnd("parse");
+    ///
 
+    var filename = process.argv[2];
+    console.time("parse");
+    var ast = parse(fs.readFileSync(filename, "utf8"));
+    console.timeEnd("parse");
+
+    console.time("walk");
+    ast.walk({
+        _visit: function(node, descend) {
+            //console.log(node);
+            if (descend) descend.call(node);
+        }
+    });
+    console.timeEnd("walk");
+
+})();
diff --git a/lib/output.js b/lib/output.js
new file mode 100644 (file)
index 0000000..2c4c6fd
--- /dev/null
@@ -0,0 +1,134 @@
+function OutputStream(options) {
+    options = defaults(options, {
+        indent_start  : 0,
+        indent_level  : 4,
+        quote_keys    : false,
+        space_colon   : false,
+        beautify      : true,
+        ascii_only    : false,
+        inline_script : false,
+        width         : 80
+    });
+
+    var indentation = 0;
+    var current_col = 0;
+    var OUTPUT = "";
+
+    function to_ascii(str) {
+        return str.replace(/[\u0080-\uffff]/g, function(ch) {
+            var code = ch.charCodeAt(0).toString(16);
+            while (code.length < 4) code = "0" + code;
+            return "\\u" + code;
+        });
+    };
+
+    function make_string(str) {
+        var dq = 0, sq = 0;
+        str = str.replace(/[\\\b\f\n\r\t\x22\x27\u2028\u2029\0]/g, function(s){
+            switch (s) {
+              case "\\": return "\\\\";
+              case "\b": return "\\b";
+              case "\f": return "\\f";
+              case "\n": return "\\n";
+              case "\r": return "\\r";
+              case "\u2028": return "\\u2028";
+              case "\u2029": return "\\u2029";
+              case '"': ++dq; return '"';
+              case "'": ++sq; return "'";
+              case "\0": return "\\0";
+            }
+            return s;
+        });
+        if (options.ascii_only) str = to_ascii(str);
+        if (dq > sq) return "'" + str.replace(/\x27/g, "\\'") + "'";
+        else return '"' + str.replace(/\x22/g, '\\"') + '"';
+    };
+
+    function print(str) {
+        var nl = str.lastIndexOf("\n");
+        if (nl >= 0) {
+            current_col = nl;
+        } else {
+            current_col += str.length;
+        }
+        OUTPUT += str;
+    };
+
+    function encode_string(str) {
+        var ret = make_string(str);
+        if (options.inline_script)
+            ret = ret.replace(/<\x2fscript([>\/\t\n\f\r ])/gi, "<\\/script$1");
+        return ret;
+    };
+
+    function make_name(name) {
+        name = name.toString();
+        if (options.ascii_only)
+            name = to_ascii(name);
+        return name;
+    };
+
+    function make_indent(line) {
+        if (line == null)
+            line = "";
+        if (beautify)
+            line = repeat_string(" ", options.indent_start + indentation) + line;
+        return line;
+    };
+
+    function with_indent(col, cont) {
+        var save_indentation = indentation;
+        indentation = col;
+        var ret = cont();
+        indentation = save_indentation;
+        return ret;
+    };
+
+    function indent() {
+        if (options.beautify) print(make_indent());
+    };
+
+    function newline() {
+        if (options.beautify) {
+            print("\n");
+            print(make_indent());
+        }
+    };
+
+    function next_indent() {
+        return indentation + options.indent_level;
+    };
+
+    function with_block(cont) {
+        var ret;
+        print("{");
+        with_indent(next_indent(), function(){
+            newline();
+            ret = cont();
+            newline();
+        });
+        indent();
+        print("}");
+        return ret;
+    };
+
+    function with_parens(cont) {
+        print("(");
+        var ret = with_indent(current_col, cont);
+        print(")");
+        return ret;
+    };
+
+    return {
+        get         : function() { return OUTPUT },
+        indent      : indent,
+        newline     : newline,
+        print       : print,
+        string      : function(str) { print(encode_string(str)) },
+        with_indent : with_indent,
+        with_block  : with_block,
+        with_parens : with_parens,
+        options     : function() { return options }
+    };
+
+};
index 7164f3e..9dcfa63 100644 (file)
@@ -577,13 +577,13 @@ var UNARY_POSTFIX = array_to_hash([ "--", "++" ]);
 
 var ASSIGNMENT = (function(a, ret, i){
     while (i < a.length) {
-        ret[a[i]] = a[i].substr(0, a[i].length - 1);
+        ret[a[i]] = a[i];
         i++;
     }
     return ret;
 })(
-    ["+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "|=", "^=", "&="],
-    { "=": true },
+    [ "=", "+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "|=", "^=", "&=" ],
+    {},
     0
 );
 
@@ -695,10 +695,11 @@ function parse($TEXT, exigent_mode) {
     };
 
     function parenthesised() {
-        expect("(");
-        var ex = expression();
-        expect(")");
-        return ex;
+        return new AST_Parenthesized({
+            start      : expect("("),
+            expression : expression(),
+            end        : expect(")")
+        });
     };
 
     function embed_tokens(parser) {
@@ -828,8 +829,7 @@ function parse($TEXT, exigent_mode) {
     });
 
     function labeled_statement() {
-        var label = S.token.value;
-        next();
+        var label = as_symbol(true);
         expect(":");
         S.labels.push(label);
         var start = S.token, stat = statement();
@@ -845,19 +845,21 @@ function parse($TEXT, exigent_mode) {
     };
 
     function break_cont(type) {
-        var name = null;
+        var name = null, label = null;
         if (!can_insert_semicolon()) {
             name = is("name") ? S.token.value : null;
         }
         if (name != null) {
             next();
-            if (!member(name, S.labels))
+            label = find_if(function(l){ return l.name == name }, S.labels);
+            if (!label)
                 croak("Label " + name + " without matching loop or statement");
+            label = new AST_Label({ name: name, symbol: label });
         }
         else if (S.in_loop == 0)
             croak(type.TYPE + " not inside a loop or switch");
         semicolon();
-        return new type({ label: name });
+        return new type({ label: label });
     };
 
     function for_() {
@@ -892,19 +894,19 @@ function parse($TEXT, exigent_mode) {
     };
 
     function for_in(init) {
-        var lhs = init instanceof AST_Var ? init.definitions[0].name : init;
+        var lhs = init instanceof AST_Var ? init.definitions[0].name : null;
         var obj = expression();
         expect(")");
         return new AST_ForIn({
             init   : init,
-            lhs    : lhs,
+            name   : lhs,
             object : obj,
             body   : in_loop(statement)
         });
     };
 
     var function_ = function(in_statement) {
-        var name = is("name") ? as_symbol() : null;
+        var name = is("name") ? as_symbol(true) : null;
         if (in_statement && !name)
             unexpected();
         expect("(");
@@ -914,7 +916,7 @@ function parse($TEXT, exigent_mode) {
             argnames: (function(first, a){
                 while (!is("punc", ")")) {
                     if (first) first = false; else expect(",");
-                    a.push(as_symbol());
+                    a.push(as_symbol(true));
                 }
                 next();
                 return a;
@@ -922,11 +924,14 @@ function parse($TEXT, exigent_mode) {
             body: embed_tokens(function(){
                 ++S.in_function;
                 var loop = S.in_loop;
+                var labels = S.labels;
                 S.in_directives = true;
                 S.in_loop = 0;
+                S.labels = [];
                 var a = block_();
                 --S.in_function;
                 S.in_loop = loop;
+                S.labels = labels;
                 return new AST_Bracketed({ body: a });
             })()
         });
@@ -958,47 +963,70 @@ function parse($TEXT, exigent_mode) {
 
     var switch_block_ = embed_tokens(curry(in_loop, function(){
         expect("{");
-        var a = [], cur = null;
+        var a = [], cur = null, branch = null;
         while (!is("punc", "}")) {
             if (is("eof")) unexpected();
             if (is("keyword", "case")) {
-                next();
+                if (branch) branch.end = prev();
                 cur = [];
-                a.push(new AST_Case({ expression: expression(), body: cur }));
+                branch = new AST_Case({
+                    start      : prog1(S.token, next),
+                    expression : expression(),
+                    body       : cur
+                });
+                a.push(branch);
                 expect(":");
             }
             else if (is("keyword", "default")) {
-                next();
-                expect(":");
+                if (branch) branch.end = prev();
                 cur = [];
-                a.push(new AST_Default({ body: cur }));
+                branch = new AST_Default({
+                    start : prog1(S.token, next, curry(expect, ":")),
+                    body  : cur
+                })
+                a.push(branch);
             }
             else {
                 if (!cur) unexpected();
                 cur.push(statement());
             }
         }
+        if (branch) branch.end = prev();
         next();
         return new AST_SwitchBlock({ body: a });
     }));
 
     function try_() {
         var body = new AST_Bracketed({
-            body: block_()
+            start : S.token,
+            body  : block_(),
+            end   : prev()
         }), bcatch = null, bfinally = null;
         if (is("keyword", "catch")) {
+            var start = S.token;
             next();
             expect("(");
-            var name = as_symbol();
+            var name = as_symbol(true);
             expect(")");
             bcatch = new AST_Catch({
+                start   : start,
                 argname : name,
-                body    : new AST_Bracketed({ body: block_() })
+                body    : new AST_Bracketed({
+                    start : S.token,
+                    body  : block_(),
+                    end   : prev()
+                }),
+                end     : prev()
             });
         }
         if (is("keyword", "finally")) {
+            var start = S.token;
             next();
-            bfinally = new AST_Finally({ body: block_() });
+            bfinally = new AST_Finally({
+                start : start,
+                body  : block_(),
+                end   : prev()
+            });
         }
         if (!bcatch && !bfinally)
             croak("Missing catch/finally blocks");
@@ -1014,7 +1042,7 @@ function parse($TEXT, exigent_mode) {
         for (;;) {
             a.push(new AST_VarDef({
                 start : S.token,
-                name  : as_symbol(),
+                name  : as_symbol(true),
                 value : is("operator", "=") ? (next(), expression(false, no_in)) : null,
                 end   : prev()
             }));
@@ -1025,19 +1053,25 @@ function parse($TEXT, exigent_mode) {
         return a;
     };
 
-    var var_ = embed_tokens(function(no_in) {
+    var var_ = function(no_in) {
         return new AST_Var({
-            definitions: vardefs(no_in)
+            start       : prev(),
+            definitions : vardefs(no_in),
+            end         : prev()
         });
-    });
+    };
 
-    var const_ = embed_tokens(function() {
+    var const_ = function() {
         return new AST_Const({
-            definitions: vardefs()
+            start       : prev(),
+            definitions : vardefs(),
+            end         : prev()
         });
-    });
+    };
 
-    var new_ = embed_tokens(function() {
+    var new_ = function() {
+        var start = S.token;
+        expect_token("operator", "new");
         var newexp = expr_atom(false), args;
         if (is("punc", "(")) {
             next();
@@ -1046,10 +1080,12 @@ function parse($TEXT, exigent_mode) {
             args = [];
         }
         return subscripts(new AST_New({
+            start      : start,
             expression : newexp,
-            args       : args
+            args       : args,
+            end        : prev()
         }), true);
-    });
+    };
 
     function as_atom_node() {
         var tok = S.token, ret;
@@ -1085,25 +1121,26 @@ function parse($TEXT, exigent_mode) {
 
     var expr_atom = function(allow_calls) {
         if (is("operator", "new")) {
-            next();
             return new_();
         }
+        var start = S.token;
         if (is("punc")) {
-            switch (S.token.value) {
+            switch (start.value) {
               case "(":
                 next();
-                return subscripts(prog1(expression, curry(expect, ")")), allow_calls);
+                var ex = expression();
+                ex.start = start;
+                ex.end = S.token;
+                expect(")");
+                return subscripts(ex, allow_calls);
               case "[":
-                next();
                 return subscripts(array_(), allow_calls);
               case "{":
-                next();
                 return subscripts(object_(), allow_calls);
             }
             unexpected();
         }
         if (is("keyword", "function")) {
-            var start = S.token;
             next();
             var func = function_(false);
             func.start = start;
@@ -1131,13 +1168,15 @@ function parse($TEXT, exigent_mode) {
         return a;
     };
 
-    function array_() {
+    var array_ = embed_tokens(function() {
+        expect("[");
         return new AST_Array({
             elements: expr_list("]", !exigent_mode, true)
         });
-    };
+    });
 
     var object_ = embed_tokens(function() {
+        expect("{");
         var first = true, a = [];
         while (!is("punc", "}")) {
             if (first) first = false; else expect(",");
@@ -1183,9 +1222,14 @@ function parse($TEXT, exigent_mode) {
         switch (S.token.type) {
           case "num":
           case "string":
-            return as_symbol(true);
+          case "name":
+          case "operator":
+          case "keyword":
+          case "atom":
+            return prog1(S.token.value, next);
+          default:
+            unexpected();
         }
-        return as_name();
     };
 
     function as_name() {
@@ -1194,15 +1238,16 @@ function parse($TEXT, exigent_mode) {
           case "operator":
           case "keyword":
           case "atom":
-            return as_symbol(true);
+            return prog1(S.token.value, next);
           default:
             unexpected();
         }
     };
 
-    function as_symbol(noerror) {
-        if (!noerror && !is("name")) croak("Name expected");
-        var sym = new AST_Symbol({
+    function as_symbol(def) {
+        if (!is("name")) croak("Name expected");
+        var name = S.token.value;
+        var sym = new (name == "this" ? AST_This : def ? AST_Symbol : AST_SymbolRef)({
             name  : String(S.token.value),
             start : S.token,
             end   : S.token
@@ -1211,44 +1256,59 @@ function parse($TEXT, exigent_mode) {
         return sym;
     };
 
-    var subscripts = embed_tokens(function(expr, allow_calls) {
+    var subscripts = function(expr, allow_calls) {
+        var start = expr.start;
         if (is("punc", ".")) {
             next();
             return subscripts(new AST_Dot({
+                start      : start,
                 expression : expr,
-                property   : as_name()
+                property   : as_name(),
+                end        : prev()
             }), allow_calls);
         }
         if (is("punc", "[")) {
             next();
+            var prop = expression();
+            expect("]");
             return subscripts(new AST_Sub({
+                start      : start,
                 expression : expr,
-                property   : prog1(expression, curry(expect, "]"))
+                property   : prop,
+                end        : prev()
             }), allow_calls);
         }
         if (allow_calls && is("punc", "(")) {
             next();
             return subscripts(new AST_Call({
+                start      : start,
                 expression : expr,
-                args       : expr_list(")")
+                args       : expr_list(")"),
+                end        : prev()
             }), true);
         }
         return expr;
-    });
+    };
 
-    var maybe_unary = embed_tokens(function(allow_calls) {
+    var maybe_unary = function(allow_calls) {
+        var start = S.token;
         if (is("operator") && HOP(UNARY_PREFIX, S.token.value)) {
-            return make_unary(AST_UnaryPrefix,
-                              prog1(S.token.value, next),
-                              maybe_unary(allow_calls));
+            var ex = make_unary(AST_UnaryPrefix,
+                                prog1(S.token.value, next),
+                                maybe_unary(allow_calls));
+            ex.start = start;
+            ex.end = prev();
+            return ex;
         }
         var val = expr_atom(allow_calls);
         while (is("operator") && HOP(UNARY_POSTFIX, S.token.value) && !S.token.nlb) {
             val = make_unary(AST_UnaryPostfix, S.token.value, val);
+            val.start = start;
+            val.end = S.token;
             next();
         }
         return val;
-    });
+    };
 
     function make_unary(ctor, op, expr) {
         if ((op == "++" || op == "--") && !is_assignable(expr))
@@ -1256,7 +1316,7 @@ function parse($TEXT, exigent_mode) {
         return new ctor({ operator: op, expression: expr });
     };
 
-    var expr_op = embed_tokens(function(left, min_prec, no_in) {
+    var expr_op = function(left, min_prec, no_in) {
         var op = is("operator") ? S.token.value : null;
         if (op == "in" && no_in) op = null;
         var prec = op != null ? PRECEDENCE[op] : null;
@@ -1264,32 +1324,37 @@ function parse($TEXT, exigent_mode) {
             next();
             var right = expr_op(maybe_unary(true), prec, no_in);
             return expr_op(new AST_Binary({
+                start    : left.start,
                 left     : left,
                 operator : op,
-                right    : right
+                right    : right,
+                end      : right.end
             }), min_prec, no_in);
         }
         return left;
-    });
+    };
 
     function expr_ops(no_in) {
         return expr_op(maybe_unary(true), 0, no_in);
     };
 
-    var maybe_conditional = embed_tokens(function(no_in) {
+    var maybe_conditional = function(no_in) {
+        var start = S.token;
         var expr = expr_ops(no_in);
         if (is("operator", "?")) {
             next();
             var yes = expression(false);
             expect(":");
             return new AST_Conditional({
-                condition: expr,
-                consequent: yes,
-                alternative: expression(false, no_in)
+                start       : start,
+                condition   : expr,
+                consequent  : yes,
+                alternative : expression(false, no_in),
+                end         : peek()
             });
         }
         return expr;
-    });
+    };
 
     function is_assignable(expr) {
         if (!exigent_mode) return true;
@@ -1304,35 +1369,41 @@ function parse($TEXT, exigent_mode) {
         }
     };
 
-    var maybe_assign = embed_tokens(function(no_in) {
+    var maybe_assign = function(no_in) {
+        var start = S.token;
         var left = maybe_conditional(no_in), val = S.token.value;
         if (is("operator") && HOP(ASSIGNMENT, val)) {
             if (is_assignable(left)) {
                 next();
                 return new AST_Assign({
+                    start    : start,
                     left     : left,
                     operator : ASSIGNMENT[val],
-                    right    : maybe_assign(no_in)
+                    right    : maybe_assign(no_in),
+                    end      : peek()
                 });
             }
             croak("Invalid assignment");
         }
         return left;
-    });
+    };
 
-    var expression = embed_tokens(function(commas, no_in) {
+    var expression = function(commas, no_in) {
         if (arguments.length == 0)
             commas = true;
+        var start = S.token;
         var expr = maybe_assign(no_in);
         if (commas && is("punc", ",")) {
             next();
             return new AST_Seq({
+                start  : start,
                 first  : expr,
-                second : expression(true, no_in)
+                second : expression(true, no_in),
+                end    : peek()
             });
         }
         return expr;
-    });
+    };
 
     function in_loop(cont) {
         ++S.in_loop;
@@ -1342,11 +1413,13 @@ function parse($TEXT, exigent_mode) {
     };
 
     return new AST_Toplevel({
+        start: S.token,
         body: (function(a){
             while (!is("eof"))
                 a.push(statement());
             return a;
-        })([])
+        })([]),
+        end: prev()
     });
 
 };
index f594ccd..b765132 100644 (file)
-var func = function parse($TEXT, exigent_mode) {
+var func = function tokenizer($TEXT) {
 
     var S = {
-        input         : typeof $TEXT == "string" ? tokenizer($TEXT, true) : $TEXT,
-        token         : null,
-        prev          : null,
-        peeked        : null,
-        in_function   : 0,
-        in_directives : true,
-        in_loop       : 0,
-        labels        : []
-    };
-
-    S.token = next();
-
-    function is(type, value) {
-        return is_token(S.token, type, value);
-    };
-
-    function peek() { return S.peeked || (S.peeked = S.input()); };
-
-    function next() {
-        S.prev = S.token;
-        if (S.peeked) {
-            S.token = S.peeked;
-            S.peeked = null;
+        text            : $TEXT.replace(/\r\n?|[\n\u2028\u2029]/g, "\n").replace(/^\uFEFF/, ''),
+        pos             : 0,
+        tokpos          : 0,
+        line            : 0,
+        tokline         : 0,
+        col             : 0,
+        tokcol          : 0,
+        newline_before  : false,
+        regex_allowed   : false,
+        comments_before : []
+    };
+
+    function peek() { return S.text.charAt(S.pos); };
+
+    function next(signal_eof, in_string) {
+        var ch = S.text.charAt(S.pos++);
+        if (signal_eof && !ch)
+            throw EX_EOF;
+        if (ch == "\n") {
+            S.newline_before = S.newline_before || !in_string;
+            ++S.line;
+            S.col = 0;
         } else {
-            S.token = S.input();
+            ++S.col;
         }
-        S.in_directives = S.in_directives && (
-            S.token.type == "string" || is("punc", ";")
-        );
-        return S.token;
-    };
-
-    function prev() {
-        return S.prev;
+        return ch;
     };
 
-    function croak(msg, line, col, pos) {
-        var ctx = S.input.context();
-        js_error(msg,
-                 line != null ? line : ctx.tokline,
-                 col != null ? col : ctx.tokcol,
-                 pos != null ? pos : ctx.tokpos);
+    function eof() {
+        return !S.peek();
     };
 
-    function token_error(token, msg) {
-        croak(msg, token.line, token.col);
+    function find(what, signal_eof) {
+        var pos = S.text.indexOf(what, S.pos);
+        if (signal_eof && pos == -1) throw EX_EOF;
+        return pos;
     };
 
-    function unexpected(token) {
-        if (token == null)
-            token = S.token;
-        token_error(token, "Unexpected token: " + token.type + " (" + token.value + ")");
+    function start_token() {
+        S.tokline = S.line;
+        S.tokcol = S.col;
+        S.tokpos = S.pos;
     };
 
-    function expect_token(type, val) {
-        if (is(type, val)) {
-            return next();
-        }
-        token_error(S.token, "Unexpected token " + S.token.type + ", expected " + type);
-    };
-
-    function expect(punc) { return expect_token("punc", punc); };
-
-    function can_insert_semicolon() {
-        return !exigent_mode && (
-            S.token.nlb || is("eof") || is("punc", "}")
-        );
-    };
-
-    function semicolon() {
-        if (is("punc", ";")) next();
-        else if (!can_insert_semicolon()) unexpected();
-    };
-
-    function parenthesised() {
-        expect("(");
-        var ex = expression();
-        expect(")");
-        return ex;
-    };
-
-    function embed_tokens(parser) {
-        return function() {
-            var start = S.token;
-            var expr = parser.apply(this, arguments);
-            var end = prev();
-            expr.start = start;
-            expr.end = end;
-            return expr;
+    function token(type, value, is_comment) {
+        S.regex_allowed = ((type == "operator" && !HOP(UNARY_POSTFIX, value)) ||
+                           (type == "keyword" && HOP(KEYWORDS_BEFORE_EXPRESSION, value)) ||
+                           (type == "punc" && HOP(PUNC_BEFORE_EXPRESSION, value)));
+        var ret = {
+            type   : type,
+            value  : value,
+            line   : S.tokline,
+            col    : S.tokcol,
+            pos    : S.tokpos,
+            endpos : S.pos,
+            nlb    : S.newline_before
         };
-    };
-
-    var statement = embed_tokens(function() {
-        if (is("operator", "/") || is("operator", "/=")) {
-            S.peeked = null;
-            S.token = S.input(S.token.value.substr(1)); // force regexp
-        }
-        switch (S.token.type) {
-          case "string":
-            var dir = S.in_directives, stat = simple_statement();
-            // XXXv2: decide how to fix directives
-            // if (dir && stat instanceof AST_String && !is("punc", ","))
-            //     return new AST_Directive({ value: stat.value });
-            return stat;
-          case "num":
-          case "regexp":
-          case "operator":
-          case "atom":
-            return simple_statement();
-
-          case "name":
-            return is_token(peek(), "punc", ":")
-                ? labeled_statement()
-                : simple_statement();
-
-          case "punc":
-            switch (S.token.value) {
-              case "{":
-                return new AST_Statement({ body: block_() });
-              case "[":
-              case "(":
-                return simple_statement();
-              case ";":
-                next();
-                return new AST_Statement();
-              default:
-                unexpected();
-            }
-
-          case "keyword":
-            switch (prog1(S.token.value, next)) {
-              case "break":
-                return break_cont(AST_Break);
-
-              case "continue":
-                return break_cont(AST_Continue);
-
-              case "debugger":
-                semicolon();
-                return new AST_Debugger();
-
-              case "do":
-                return new AST_Do({
-                    body      : in_loop(statement),
-                    condition : (expect_token("while"), prog1(parenthesised, semicolon))
-                });
-
-              case "while":
-                return new AST_While({
-                    condition : parenthesised(),
-                    body      : in_loop(statement)
-                });
-
-              case "for":
-                return for_();
-
-              case "function":
-                return function_(true);
-
-              case "if":
-                return if_();
-
-              case "return":
-                if (S.in_function == 0)
-                    croak("'return' outside of function");
-                return new AST_Return({
-                    value: ( is("punc", ";")
-                             ? (next(), null)
-                             : can_insert_semicolon()
-                             ? null
-                             : prog1(expression, semicolon) )
-                });
-
-              case "switch":
-                return new AST_Switch({
-                    expression : parenthesised(),
-                    body       : switch_block_()
-                });
-
-              case "throw":
-                if (S.token.nlb)
-                    croak("Illegal newline after 'throw'");
-                return new AST_Throw({
-                    value: prog1(expression, semicolon)
-                });
-
-              case "try":
-                return try_();
-
-              case "var":
-                return prog1(var_, semicolon);
-
-              case "const":
-                return prog1(const_, semicolon);
-
-              case "with":
-                return new AST_With({
-                    expression : parenthesised(),
-                    body       : statement()
-                });
-
-              default:
-                unexpected();
+        if (!is_comment) {
+            ret.comments_before = S.comments_before;
+            S.comments_before = [];
+            // make note of any newlines in the comments that came before
+            for (var i = 0, len = ret.comments_before.length; i < len; i++) {
+                ret.nlb = ret.nlb || ret.comments_before[i].nlb;
             }
         }
-    });
-
-    function labeled_statement() {
-        var label = S.token.value;
-        next();
-        expect(":");
-        S.labels.push(label);
-        var start = S.token, stat = statement();
-        if (exigent_mode && !(stat instanceof AST_LabeledStatement))
-            unexpected(start);
-        S.labels.pop();
-        stat.label = label;
-        return stat;
-    };
-
-    function simple_statement() {
-        return new AST_Statement({ body: prog1(expression, semicolon) });
+        S.newline_before = false;
+        return new AST_Token(ret);
     };
 
-    function break_cont(type) {
-        var name = null;
-        if (!can_insert_semicolon()) {
-            name = is("name") ? S.token.value : null;
-        }
-        if (name != null) {
+    function skip_whitespace() {
+        while (HOP(WHITESPACE_CHARS, peek()))
             next();
-            if (!member(name, S.labels))
-                croak("Label " + name + " without matching loop or statement");
-        }
-        else if (S.in_loop == 0)
-            croak(type.TYPE + " not inside a loop or switch");
-        semicolon();
-        return new type({ label: name });
     };
 
-    function for_() {
-        expect("(");
-        var init = null;
-        if (!is("punc", ";")) {
-            init = is("keyword", "var")
-                ? (next(), var_(true))
-                : expression(true, true);
-            if (is("operator", "in")) {
-                if (init instanceof AST_Var && init.definitions.length > 1)
-                    croak("Only one variable declaration allowed in for..in loop");
-                next();
-                return for_in(init);
-            }
+    function read_while(pred) {
+        var ret = "", ch = peek(), i = 0;
+        while (ch && pred(ch, i++)) {
+            ret += next();
+            ch = peek();
         }
-        return regular_for(init);
+        return ret;
     };
 
-    function regular_for(init) {
-        expect(";");
-        var test = is("punc", ";") ? null : expression();
-        expect(";");
-        var step = is("punc", ")") ? null : expression();
-        expect(")");
-        return new AST_For({
-            init      : init,
-            condition : test,
-            step      : step,
-            body      : in_loop(statement)
-        });
+    function parse_error(err) {
+        js_error(err, S.tokline, S.tokcol, S.tokpos);
     };
 
-    function for_in(init) {
-        var lhs = init instanceof AST_Var ? init.definitions[0].name : init;
-        var obj = expression();
-        expect(")");
-        return new AST_ForIn({
-            init   : init,
-            lhs    : lhs,
-            object : obj,
-            body   : in_loop(statement)
+    function read_num(prefix) {
+        var has_e = false, after_e = false, has_x = false, has_dot = prefix == ".";
+        var num = read_while(function(ch, i){
+            if (ch == "x" || ch == "X") {
+                if (has_x) return false;
+                return has_x = true;
+            }
+            if (!has_x && (ch == "E" || ch == "e")) {
+                if (has_e) return false;
+                return has_e = after_e = true;
+            }
+            if (ch == "-") {
+                if (after_e || (i == 0 && !prefix)) return true;
+                return false;
+            }
+            if (ch == "+") return after_e;
+            after_e = false;
+            if (ch == ".") {
+                if (!has_dot && !has_x && !has_e)
+                    return has_dot = true;
+                return false;
+            }
+            return is_alphanumeric_char(ch);
         });
-    };
-
-    var function_ = function(in_statement) {
-        var name = is("name") ? as_symbol() : null;
-        if (in_statement && !name)
-            unexpected();
-        expect("(");
-        var ctor = in_statement ? AST_Defun : AST_Function;
-        return new ctor({
-            name: name,
-            argnames: (function(first, a){
-                while (!is("punc", ")")) {
-                    if (first) first = false; else expect(",");
-                    a.push(as_symbol());
+        if (prefix)
+            num = prefix + num;
+        var valid = parse_js_number(num);
+        if (!isNaN(valid)) {
+            return token("num", valid);
+        } else {
+            parse_error("Invalid syntax: " + num);
+        }
+    };
+
+    function read_escaped_char(in_string) {
+        var ch = next(true, in_string);
+        switch (ch) {
+          case "n" : return "\n";
+          case "r" : return "\r";
+          case "t" : return "\t";
+          case "b" : return "\b";
+          case "v" : return "\u000b";
+          case "f" : return "\f";
+          case "0" : return "\0";
+          case "x" : return String.fromCharCode(hex_bytes(2));
+          case "u" : return String.fromCharCode(hex_bytes(4));
+          case "\n": return "";
+          default  : return ch;
+        }
+    };
+
+    function hex_bytes(n) {
+        var num = 0;
+        for (; n > 0; --n) {
+            var digit = parseInt(next(true), 16);
+            if (isNaN(digit))
+                parse_error("Invalid hex-character pattern in string");
+            num = (num << 4) | digit;
+        }
+        return num;
+    };
+
+    function read_string() {
+        return with_eof_error("Unterminated string constant", function(){
+            var quote = next(), ret = "";
+            for (;;) {
+                var ch = next(true);
+                if (ch == "\\") {
+                    // read OctalEscapeSequence (XXX: deprecated if "strict mode")
+                    // https://github.com/mishoo/UglifyJS/issues/178
+                    var octal_len = 0, first = null;
+                    ch = read_while(function(ch){
+                        if (ch >= "0" && ch <= "7") {
+                            if (!first) {
+                                first = ch;
+                                return ++octal_len;
+                            }
+                            else if (first <= "3" && octal_len <= 2) return ++octal_len;
+                            else if (first >= "4" && octal_len <= 1) return ++octal_len;
+                        }
+                        return false;
+                    });
+                    if (octal_len > 0) ch = String.fromCharCode(parseInt(ch, 8));
+                    else ch = read_escaped_char(true);
                 }
-                next();
-                return a;
-            })(true, []),
-            body: embed_tokens(function(){
-                ++S.in_function;
-                var loop = S.in_loop;
-                S.in_directives = true;
-                S.in_loop = 0;
-                var a = block_();
-                --S.in_function;
-                S.in_loop = loop;
-                return new AST_Bracketed({ body: a });
-            })()
+                else if (ch == quote) break;
+                ret += ch;
+            }
+            return token("string", ret);
         });
     };
 
-    function if_() {
-        var cond = parenthesised(), body = statement(), belse = null;
-        if (is("keyword", "else")) {
-            next();
-            belse = statement();
+    function read_line_comment() {
+        next();
+        var i = find("\n"), ret;
+        if (i == -1) {
+            ret = S.text.substr(S.pos);
+            S.pos = S.text.length;
+        } else {
+            ret = S.text.substring(S.pos, i);
+            S.pos = i;
         }
-        return new AST_If({
-            condition   : cond,
-            consequent  : body,
-            alternative : belse
-        });
+        return token("comment1", ret, true);
     };
 
-    function block_() {
-        expect("{");
-        var a = [];
-        while (!is("punc", "}")) {
-            if (is("eof")) unexpected();
-            a.push(statement());
-        }
+    function read_multiline_comment() {
         next();
-        return a;
+        return with_eof_error("Unterminated multiline comment", function(){
+            var i = find("*/", true),
+            text = S.text.substring(S.pos, i);
+            S.pos = i + 2;
+            S.line += text.split("\n").length - 1;
+            S.newline_before = S.newline_before || text.indexOf("\n") >= 0;
+
+            // https://github.com/mishoo/UglifyJS/issues/#issue/100
+            if (/^@cc_on/i.test(text)) {
+                warn("WARNING: at line " + S.line);
+                warn("*** Found \"conditional comment\": " + text);
+                warn("*** UglifyJS DISCARDS ALL COMMENTS.  This means your code might no longer work properly in Internet Explorer.");
+            }
+
+            return token("comment2", text, true);
+        });
     };
 
-    var switch_block_ = embed_tokens(curry(in_loop, function(){
-        expect("{");
-        var a = [], cur = null;
-        while (!is("punc", "}")) {
-            if (is("eof")) unexpected();
-            if (is("keyword", "case")) {
-                next();
-                cur = [];
-                a.push(new AST_Case({ expression: expression(), body: cur }));
-                expect(":");
-            }
-            else if (is("keyword", "default")) {
-                next();
-                expect(":");
-                cur = [];
-                a.push(new AST_Default({ body: cur }));
+    function read_name() {
+        var backslash = false, name = "", ch, escaped = false, hex;
+        while ((ch = peek()) != null) {
+            if (!backslash) {
+                if (ch == "\\") escaped = backslash = true, next();
+                else if (is_identifier_char(ch)) name += next();
+                else break;
             }
             else {
-                if (!cur) unexpected();
-                cur.push(statement());
+                if (ch != "u") parse_error("Expecting UnicodeEscapeSequence -- uXXXX");
+                ch = read_escaped_char();
+                if (!is_identifier_char(ch)) parse_error("Unicode char: " + ch.charCodeAt(0) + " is not valid in identifier");
+                name += ch;
+                backslash = false;
             }
         }
-        next();
-        return new AST_SwitchBlock({ body: a });
-    }));
-
-    function try_() {
-        var body = new AST_Bracketed({
-            body: block_()
-        }), bcatch = null, bfinally = null;
-        if (is("keyword", "catch")) {
-            next();
-            expect("(");
-            var name = as_symbol();
-            next();
-            expect(")");
-            bcatch = new AST_Catch({
-                argname : name,
-                body    : new AST_Bracketed({ body: block_() })
-            });
-        }
-        if (is("keyword", "finally")) {
-            next();
-            bfinally = new AST_Finally({ body: block_() });
-        }
-        if (!bcatch && !bfinally)
-            croak("Missing catch/finally blocks");
-        return new AST_Try({
-            btry     : body,
-            bcatch   : bcatch,
-            bfinally : bfinally
-        });
-    };
-
-    function vardefs(no_in) {
-        var a = [];
-        for (;;) {
-            a.push(new AST_VarDef({
-                start : S.token,
-                name  : as_symbol(),
-                value : is("operator", "=") ? (next(), expression(false, no_in)) : null,
-                end   : prev()
-            }));
-            if (!is("punc", ","))
-                break;
-            next();
-        }
-        return a;
-    };
-
-    var var_ = embed_tokens(function(no_in) {
-        return new AST_Var({
-            definitions: vardefs(no_in)
-        });
-    });
-
-    var const_ = embed_tokens(function() {
-        return new AST_Const({
-            definitions: vardefs()
-        });
-    });
-
-    var new_ = embed_tokens(function() {
-        var newexp = expr_atom(false), args;
-        if (is("punc", "(")) {
-            next();
-            args = expr_list(")");
-        } else {
-            args = [];
-        }
-        return subscripts(new AST_New({
-            expression : newexp,
-            args       : args
-        }), true);
-    });
-
-    function as_atom_node() {
-        var tok = S.token, ret;
-        switch (tok.type) {
-          case "name":
-            return as_symbol();
-          case "num":
-            ret = new AST_Number({ start: tok, end: tok, value: tok.value });
-            break;
-          case "string":
-            ret = new AST_String({ start: tok, end: tok, value: tok.value });
-            break;
-          case "regexp":
-            ret = new AST_RegExp({ start: tok, end: tok, pattern: tok.value[0], mods: tok.value[1] });
-            break;
-          case "atom":
-            switch (tok.value) {
-              case "false":
-                ret = new AST_False({ start: tok, end: tok });
-                break;
-              case "true":
-                ret = new AST_True({ start: tok, end: tok });
-                break;
-              case "null":
-                ret = new AST_Null({ start: tok, end: tok });
+        if (HOP(KEYWORDS, name) && escaped) {
+            hex = name.charCodeAt(0).toString(16).toUpperCase();
+            name = "\\u" + "0000".substr(hex.length) + hex + name.slice(1);
+        }
+        return name;
+    };
+
+    function read_regexp(regexp) {
+        return with_eof_error("Unterminated regular expression", function(){
+            var prev_backslash = false, ch, in_class = false;
+            while ((ch = next(true))) if (prev_backslash) {
+                regexp += "\\" + ch;
+                prev_backslash = false;
+            } else if (ch == "[") {
+                in_class = true;
+                regexp += ch;
+            } else if (ch == "]" && in_class) {
+                in_class = false;
+                regexp += ch;
+            } else if (ch == "/" && !in_class) {
                 break;
+            } else if (ch == "\\") {
+                prev_backslash = true;
+            } else {
+                regexp += ch;
             }
-            break;
-        }
-        next();
-        return ret;
+            var mods = read_name();
+            return token("regexp", [ regexp, mods ]);
+        });
     };
 
-    var expr_atom = function(allow_calls) {
-        if (is("operator", "new")) {
-            next();
-            return new_();
-        }
-        if (is("punc")) {
-            switch (S.token.value) {
-              case "(":
-                next();
-                return subscripts(prog1(expression, curry(expect, ")")), allow_calls);
-              case "[":
+    function read_operator(prefix) {
+        function grow(op) {
+            if (!peek()) return op;
+            var bigger = op + peek();
+            if (HOP(OPERATORS, bigger)) {
                 next();
-                return subscripts(array_(), allow_calls);
-              case "{":
-                next();
-                return subscripts(object_(), allow_calls);
-            }
-            unexpected();
-        }
-        if (is("keyword", "function")) {
-            var start = S.token;
-            next();
-            var func = function_(false);
-            func.start = start;
-            func.end = prev();
-            return subscripts(func, allow_calls);
-        }
-        if (HOP(ATOMIC_START_TOKEN, S.token.type)) {
-            return subscripts(as_atom_node(), allow_calls);
-        }
-        unexpected();
-    };
-
-    function expr_list(closing, allow_trailing_comma, allow_empty) {
-        var first = true, a = [];
-        while (!is("punc", closing)) {
-            if (first) first = false; else expect(",");
-            if (allow_trailing_comma && is("punc", closing)) break;
-            if (is("punc", ",") && allow_empty) {
-                a.push(new AST_Undefined({ start: S.token, end: S.token }));
+                return grow(bigger);
             } else {
-                a.push(expression(false));
+                return op;
             }
-        }
-        next();
-        return a;
-    };
-
-    function array_() {
-        return new AST_Array({
-            elements: expr_list("]", !exigent_mode, true)
-        });
+        };
+        return token("operator", grow(prefix || next()));
     };
 
-    var object_ = embed_tokens(function() {
-        var first = true, a = [];
-        while (!is("punc", "}")) {
-            if (first) first = false; else expect(",");
-            if (!exigent_mode && is("punc", "}"))
-                // allow trailing comma
-                break;
-            var start = S.token;
-            var type = start.type;
-            var name = as_property_name();
-            if (type == "name" && !is("punc", ":")) {
-                if (name.name == "get") {
-                    a.push(new AST_ObjectGetter({
-                        start : start,
-                        name  : name,
-                        func  : function_(false),
-                        end   : prev()
-                    }));
-                    continue;
-                }
-                if (name.name == "set") {
-                    a.push(new AST_ObjectSetter({
-                        start : start,
-                        name  : name,
-                        func  : function_(false),
-                        end   : prev()
-                    }));
-                    continue;
-                }
-            }
-            expect(":");
-            a.push(new AST_ObjectKeyVal({
-                start : start,
-                key   : name,
-                value : expression(false),
-                end   : prev()
-            }));
-        }
+    function handle_slash() {
         next();
-        return new AST_Object({ properties: a });
-    });
-
-    function as_property_name() {
-        switch (S.token.type) {
-          case "num":
-          case "string":
-            return as_symbol(true);
-        }
-        return as_name();
-    };
-
-    function as_name() {
-        switch (S.token.type) {
-          case "name":
-          case "operator":
-          case "keyword":
-          case "atom":
-            return as_symbol(true);
-          default:
-            unexpected();
+        var regex_allowed = S.regex_allowed;
+        switch (peek()) {
+          case "/":
+            S.comments_before.push(read_line_comment());
+            S.regex_allowed = regex_allowed;
+            return next_token();
+          case "*":
+            S.comments_before.push(read_multiline_comment());
+            S.regex_allowed = regex_allowed;
+            return next_token();
         }
+        return S.regex_allowed ? read_regexp("") : read_operator("/");
     };
 
-    function as_symbol(noerror) {
-        if (!noerror && !is("name")) croak("Name expected");
-        var sym = new AST_Symbol({
-            name  : String(S.token.value),
-            start : S.token,
-            end   : S.token
-        });
+    function handle_dot() {
         next();
-        return sym;
+        return is_digit(peek())
+            ? read_num(".")
+            : token("punc", ".");
     };
 
-    var subscripts = embed_tokens(function(expr, allow_calls) {
-        if (is("punc", ".")) {
-            next();
-            return subscripts(new AST_Dot({
-                expression : expr,
-                property   : as_name()
-            }), allow_calls);
-        }
-        if (is("punc", "[")) {
-            next();
-            return subscripts(new AST_Sub({
-                expression : expr,
-                property   : prog1(expression, curry(expect, "]"))
-            }), allow_calls);
-        }
-        if (allow_calls && is("punc", "(")) {
-            next();
-            return subscripts(new AST_Call({
-                expression : expr,
-                args       : expr_list(")")
-            }), true);
-        }
-        return expr;
-    });
-
-    var maybe_unary = embed_tokens(function(allow_calls) {
-        if (is("operator") && HOP(UNARY_PREFIX, S.token.value)) {
-            return make_unary(AST_UnaryPrefix,
-                              prog1(S.token.value, next),
-                              maybe_unary(allow_calls));
-        }
-        var val = expr_atom(allow_calls);
-        while (is("operator") && HOP(UNARY_POSTFIX, S.token.value) && !S.token.nlb) {
-            val = make_unary(AST_UnaryPostfix, S.token.value, val);
-            next();
-        }
-        return val;
-    });
-
-    function make_unary(ctor, op, expr) {
-        if ((op == "++" || op == "--") && !is_assignable(expr))
-            croak("Invalid use of " + op + " operator");
-        return new ctor({ operator: op, expression: expr });
+    function read_word() {
+        var word = read_name();
+        return HOP(KEYWORDS_ATOM, word)
+            ? token("atom", word)
+            : !HOP(KEYWORDS, word)
+            ? token("name", word)
+            : HOP(OPERATORS, word)
+            ? token("operator", word)
+            : token("keyword", word);
     };
 
-    var expr_op = embed_tokens(function(left, min_prec, no_in) {
-        var op = is("operator") ? S.token.value : null;
-        if (op == "in" && no_in) op = null;
-        var prec = op != null ? PRECEDENCE[op] : null;
-        if (prec != null && prec > min_prec) {
-            next();
-            var right = expr_op(maybe_unary(true), prec, no_in);
-            return expr_op(new AST_Binary({
-                left     : left,
-                operator : op,
-                right    : right
-            }), min_prec, no_in);
+    function with_eof_error(eof_error, cont) {
+        try {
+            return cont();
+        } catch(ex) {
+            if (ex === EX_EOF) parse_error(eof_error);
+            else throw ex;
         }
-        return left;
-    });
-
-    function expr_ops(no_in) {
-        return expr_op(maybe_unary(true), 0, no_in);
     };
 
-    var maybe_conditional = embed_tokens(function(no_in) {
-        var expr = expr_ops(no_in);
-        if (is("operator", "?")) {
-            next();
-            var yes = expression(false);
-            expect(":");
-            return new AST_Conditional({
-                condition: expr,
-                consequent: yes,
-                alternative: expression(false, no_in)
-            });
-        }
-        return expr;
-    });
-
-    function is_assignable(expr) {
-        if (!exigent_mode) return true;
-        switch (expr[0]+"") {
-          case "dot":
-          case "sub":
-          case "new":
-          case "call":
-            return true;
-          case "name":
-            return expr[1] != "this";
-        }
+    function next_token(force_regexp) {
+        if (force_regexp != null)
+            return read_regexp(force_regexp);
+        skip_whitespace();
+        start_token();
+        var ch = peek();
+        if (!ch) return token("eof");
+        if (is_digit(ch)) return read_num();
+        if (ch == '"' || ch == "'") return read_string();
+        if (HOP(PUNC_CHARS, ch)) return token("punc", next());
+        if (ch == ".") return handle_dot();
+        if (ch == "/") return handle_slash();
+        if (HOP(OPERATOR_CHARS, ch)) return read_operator();
+        if (ch == "\\" || is_identifier_start(ch)) return read_word();
+        parse_error("Unexpected character '" + ch + "'");
     };
 
-    var maybe_assign = embed_tokens(function(no_in) {
-        var left = maybe_conditional(no_in), val = S.token.value;
-        if (is("operator") && HOP(ASSIGNMENT, val)) {
-            if (is_assignable(left)) {
-                next();
-                return new AST_Assign({
-                    left     : left,
-                    operator : ASSIGNMENT[val],
-                    right    : maybe_assign(no_in)
-                });
-            }
-            croak("Invalid assignment");
-        }
-        return left;
-    });
-
-    var expression = embed_tokens(function(commas, no_in) {
-        if (arguments.length == 0)
-            commas = true;
-        var expr = maybe_assign(no_in);
-        if (commas && is("punc", ",")) {
-            next();
-            return new AST_Seq({
-                first  : expr,
-                second : expression(true, no_in)
-            });
-        }
-        return expr;
-    });
-
-    function in_loop(cont) {
-        ++S.in_loop;
-        var ret = cont();
-        --S.in_loop;
-        return ret;
+    next_token.context = function(nc) {
+        if (nc) S = nc;
+        return S;
     };
 
-    return new AST_Toplevel({
-        body: (function(a){
-            while (!is("eof"))
-                a.push(statement());
-            return a;
-        })([])
-    });
+    return next_token;
 
 };
 
@@ -737,16 +347,12 @@ console.time("parse");
 var ast = parse(func.toString());
 console.timeEnd("parse");
 
-console.log(ast);
-
 
 
-    // var moo = 1, i, man = moo + bar;
-    // try {
-    //     loop: while (/foobar/.test(bar)) {
-    //         alert(bar);
-    //         continue loop;
-    //     }
-    // } finally {
-    //     return crap;
-    // }
+ast.walk({
+    _visit: function(node, descend) {
+        console.log(node);
+        console.log(node.TYPE, ":", node.start.pos);
+        if (descend) descend.call(node);
+    }
+});
index 01d477e..4e4f58f 100644 (file)
@@ -33,6 +33,32 @@ function member(name, array) {
     return false;
 };
 
+function find_if(func, array) {
+    for (var i = 0, n = array.length; i < n; ++i) {
+        if (func(array[i]))
+            return array[i];
+    }
+};
+
 function HOP(obj, prop) {
     return Object.prototype.hasOwnProperty.call(obj, prop);
 };
+
+function repeat_string(str, i) {
+    if (i <= 0) return "";
+    if (i == 1) return str;
+    var d = repeat_string(str, i >> 1);
+    d += d;
+    if (i & 1) d += str;
+    return d;
+};
+
+function defaults(args, defs) {
+    var ret = {};
+    if (args === true)
+        args = {};
+    for (var i in defs) if (HOP(defs, i)) {
+        ret[i] = (args && HOP(args, i)) ? args[i] : defs[i];
+    }
+    return ret;
+};