fixed some mess with symbols/scope
authorMihai Bazon <mihai@bazon.net>
Tue, 11 Sep 2012 12:42:28 +0000 (15:42 +0300)
committerMihai Bazon <mihai@bazon.net>
Tue, 11 Sep 2012 12:42:28 +0000 (15:42 +0300)
- all symbols now have a `thedef` property which is a SymbolDef object,
  instead of the `uniq` that we had before (pointing to the first occurrence
  of the name as declaration).

- for undeclared symbols we still create a SymbolDef object in the toplevel
  scope but mark it "undeclared"

- we can now call figure_out_scope after squeezing, which is useful in order
  not to mangle names that were dropped by the squeezer

bin/uglifyjs2
lib/ast.js
lib/output.js
lib/scope.js
lib/utils.js
tmp/test-node.js

index b378836..0eb0e15 100755 (executable)
@@ -73,6 +73,7 @@ UglifyJS.base54.sort();
 files.forEach(do_file_3);
 if (ARGS.v) {
     sys.error("BASE54 digits: " + UglifyJS.base54.get());
+    //sys.error("Frequency: " + sys.inspect(UglifyJS.base54.freq()));
 }
 
 output = output.get();
index 3ede1d1..5187b5a 100644 (file)
@@ -549,11 +549,11 @@ var AST_ObjectGetter = DEFNODE("ObjectGetter", null, {
     $documentation: "An object getter property",
 }, AST_ObjectProperty);
 
-var AST_Symbol = DEFNODE("Symbol", "scope name global undeclared", {
+var AST_Symbol = DEFNODE("Symbol", "scope name thedef", {
     $documentation: "Base class for all symbols",
 });
 
-var AST_SymbolDeclaration = DEFNODE("SymbolDeclaration", "references", {
+var AST_SymbolDeclaration = DEFNODE("SymbolDeclaration", null, {
     $documentation: "A declaration symbol (symbol in var/const, function name or argument, symbol in catch)",
 }, AST_Symbol);
 
@@ -581,7 +581,7 @@ var AST_Label = DEFNODE("Label", null, {
     $documentation: "Symbol naming a label (declaration)",
 }, AST_SymbolDeclaration);
 
-var AST_SymbolRef = DEFNODE("SymbolRef", "symbol", {
+var AST_SymbolRef = DEFNODE("SymbolRef", null, {
     $documentation: "Reference to some symbol (not definition/declaration)",
 }, AST_Symbol);
 
index 35c7ade..2571881 100644 (file)
@@ -884,16 +884,9 @@ function OutputStream(options) {
         self.value._do_print(output, true);
     });
     DEFPRINT(AST_Symbol, function(self, output){
-        output.print_name(self.name);
-    });
-    DEFPRINT(AST_SymbolDeclaration, function(self, output){
         var def = self.definition();
         output.print_name(def.mangled_name || def.name);
     });
-    DEFPRINT(AST_SymbolRef, function(self, output){
-        var def = self.symbol;
-        output.print_name(def ? def.mangled_name || def.name : self.name);
-    });
     DEFPRINT(AST_This, function(self, output){
         output.print("this");
     });
index 1224e60..ef0ac6e 100644 (file)
 
  ***********************************************************************/
 
+function SymbolDef(scope, orig) {
+    this.name = orig.name;
+    this.orig = [ orig ];
+    this.scope = scope;
+    this.references = [];
+    this.global = false;
+    this.mangled_name = null;
+    this.undeclared = false;
+};
+
+SymbolDef.prototype = {
+    unmangleable: function() {
+        return this.global || this.undeclared || this.scope.uses_eval || this.scope.uses_with;
+    },
+    mangle: function() {
+        if (!this.mangled_name && !this.unmangleable())
+            this.mangled_name = this.scope.next_mangled();
+    }
+};
+
 AST_Toplevel.DEFMETHOD("figure_out_scope", function(){
     // This does what ast_add_scope did in UglifyJS v1.
     //
@@ -76,7 +96,10 @@ AST_Toplevel.DEFMETHOD("figure_out_scope", function(){
             delete labels[l.name];
             return true;        // no descend again
         }
-        if (node instanceof AST_SymbolDeclaration) {
+        if (node instanceof AST_Symbol) {
+            node.scope = scope;
+        }
+        if (node instanceof AST_Label) {
             node.init_scope_vars();
         }
         if (node instanceof AST_SymbolLambda) {
@@ -88,7 +111,7 @@ AST_Toplevel.DEFMETHOD("figure_out_scope", function(){
             // scope when we encounter the AST_Defun node (which is
             // instanceof AST_Scope) but we get to the symbol a bit
             // later.
-            scope.parent_scope.def_function(node);
+            (node.scope = scope.parent_scope).def_function(node);
         }
         else if (node instanceof AST_SymbolVar) {
             scope.def_variable(node);
@@ -102,9 +125,6 @@ AST_Toplevel.DEFMETHOD("figure_out_scope", function(){
             // identifier in the enclosing scope)
             scope.def_variable(node);
         }
-        else if (node instanceof AST_SymbolRef) {
-            node.scope = scope;
-        }
         if (node instanceof AST_LabelRef) {
             var sym = labels[node.name];
             if (!sym) throw new Error("Undefined label " + node.name);
@@ -115,6 +135,7 @@ AST_Toplevel.DEFMETHOD("figure_out_scope", function(){
 
     // pass 2: find back references and eval
     var func = null;
+    var globals = self.globals = {};
     var tw = new TreeWalker(function(node, descend){
         if (node instanceof AST_Lambda) {
             var prev_func = func;
@@ -124,18 +145,28 @@ AST_Toplevel.DEFMETHOD("figure_out_scope", function(){
             return true;
         }
         if (node instanceof AST_SymbolRef) {
-            var sym = node.scope.find_variable(node);
-            node.reference(sym);
+            var name = node.name;
+            var sym = node.scope.find_variable(name);
             if (!sym) {
-                if (node.name == "eval") {
-                    for (var s = node.scope;
-                         s && !s.uses_eval;
-                         s = s.parent_scope) s.uses_eval = true;
+                var g;
+                if (HOP(globals, name)) {
+                    g = globals[name];
+                } else {
+                    g = new SymbolDef(self, node);
+                    g.undeclared = true;
+                }
+                node.thedef = g;
+                if (name == "eval") {
+                    for (var s = node.scope; s && !s.uses_eval; s = s.parent_scope)
+                        s.uses_eval = true;
                 }
-                if (node.name == "arguments") {
+                if (name == "arguments") {
                     func.uses_arguments = true;
                 }
+            } else {
+                node.thedef = sym;
             }
+            node.reference();
         }
     });
     self.walk(tw);
@@ -156,128 +187,47 @@ AST_Lambda.DEFMETHOD("init_scope_vars", function(){
     this.uses_arguments = false;
 });
 
-AST_SymbolDeclaration.DEFMETHOD("init_scope_vars", function(){
-    this.references = [];
+AST_SymbolRef.DEFMETHOD("reference", function() {
+    var def = this.definition();
+    def.references.push(this);
+    var orig_scope = def.scope, s = this.scope;
+    while (s) {
+        push_uniq(s.enclosed, def);
+        s = s.parent_scope;
+    }
 });
 
-AST_Toplevel.DEFMETHOD("scope_warnings", function(options){
-    options = defaults(options, {
-        undeclared       : false, // this makes a lot of noise
-        unreferenced     : true,
-        assign_to_global : true,
-        func_arguments   : true,
-        nested_defuns    : true,
-        eval             : true
-    });
-    var tw = new TreeWalker(function(node){
-        if (options.undeclared
-            && node instanceof AST_SymbolRef
-            && node.undeclared)
-        {
-            // XXX: this also warns about JS standard names,
-            // i.e. Object, Array, parseInt etc.  Should add a list of
-            // exceptions.
-            AST_Node.warn("Undeclared symbol: {name} [{line},{col}]", {
-                name: node.name,
-                line: node.start.line,
-                col: node.start.col
-            });
-        }
-        if (options.assign_to_global)
-        {
-            var sym = null;
-            if (node instanceof AST_Assign && node.left instanceof AST_SymbolRef)
-                sym = node.left;
-            else if (node instanceof AST_ForIn && node.init instanceof AST_SymbolRef)
-                sym = node.init;
-            if (sym
-                && (sym.undeclared
-                    || (sym.symbol.global
-                        && sym.scope !== sym.symbol.scope))) {
-                AST_Node.warn("{msg}: {name} [{line},{col}]", {
-                    msg: sym.undeclared ? "Accidental global?" : "Assignment to global",
-                    name: sym.name,
-                    line: sym.start.line,
-                    col: sym.start.col
-                });
-            }
-        }
-        if (options.eval
-            && node instanceof AST_SymbolRef
-            && node.undeclared
-            && node.name == "eval") {
-            AST_Node.warn("Eval is used [{line},{col}]", node.start);
-        }
-        if (options.unreferenced
-            && node instanceof AST_SymbolDeclaration
-            && node.unreferenced()) {
-            AST_Node.warn("{type} {name} is declared but not referenced [{line},{col}]", {
-                type: node instanceof AST_Label ? "Label" : "Symbol",
-                name: node.name,
-                line: node.start.line,
-                col: node.start.col
-            });
-        }
-        if (options.func_arguments
-            && node instanceof AST_Lambda
-            && node.uses_arguments) {
-            AST_Node.warn("arguments used in function {name} [{line},{col}]", {
-                name: node.name ? node.name.name : "anonymous",
-                line: node.start.line,
-                col: node.start.col
-            });
-        }
-        if (options.nested_defuns
-            && node instanceof AST_Defun
-            && !(tw.parent() instanceof AST_Scope)) {
-            AST_Node.warn("Function {name} declared in nested statement [{line},{col}]", {
-                name: node.name.name,
-                line: node.start.line,
-                col: node.start.col
-            });
-        }
-    });
-    this.walk(tw);
+AST_Label.DEFMETHOD("init_scope_vars", function(){
+    this.references = [];
 });
 
-AST_SymbolRef.DEFMETHOD("reference", function(symbol) {
-    if (symbol) {
-        this.symbol = symbol;
-        var origin = symbol.scope;
-        symbol.references.push(this);
-        for (var s = this.scope; s; s = s.parent_scope) {
-            push_uniq(s.enclosed, symbol);
-            if (s === origin) break;
-        }
-    } else {
-        this.undeclared = true;
-        for (var s = this.scope; s; s = s.parent_scope) {
-            push_uniq(s.enclosed, this);
-        }
-    }
+AST_LabelRef.DEFMETHOD("reference", function(def){
+    this.thedef = def;
+    def.references.push(this);
 });
 
 AST_Scope.DEFMETHOD("find_variable", function(name){
     if (name instanceof AST_Symbol) name = name.name;
     return HOP(this.variables, name)
         ? this.variables[name]
-        : ((this.name && this.name.name == name && this.name)
-           || (this.parent_scope && this.parent_scope.find_variable(name)));
+        : (this.parent_scope && this.parent_scope.find_variable(name));
 });
 
 AST_Scope.DEFMETHOD("def_function", function(symbol){
-    this.functions[symbol.name] = symbol;
-    this.def_variable(symbol);
+    this.functions[symbol.name] = this.def_variable(symbol);
 });
 
 AST_Scope.DEFMETHOD("def_variable", function(symbol){
-    symbol.global = !this.parent_scope;
+    var def;
     if (!HOP(this.variables, symbol.name)) {
-        this.variables[symbol.name] = symbol;
+        def = new SymbolDef(this, symbol);
+        this.variables[symbol.name] = def;
+        def.global = !this.parent_scope;
     } else {
-        symbol.uniq = this.variables[symbol.name];
+        def = this.variables[symbol.name];
+        def.orig.push(symbol);
     }
-    symbol.scope = this;
+    return symbol.thedef = def;
 });
 
 AST_Scope.DEFMETHOD("next_mangled", function(){
@@ -297,33 +247,29 @@ AST_Scope.DEFMETHOD("next_mangled", function(){
     }
 });
 
-AST_SymbolDeclaration.DEFMETHOD("unmangleable", function(){
-    return this.global || this.scope.uses_eval || this.scope.uses_with;
+AST_Symbol.DEFMETHOD("unmangleable", function(){
+    return this.definition().unmangleable();
 });
 
+// labels are always mangleable
 AST_Label.DEFMETHOD("unmangleable", function(){
     return false;
 });
 
-AST_SymbolDeclaration.DEFMETHOD("mangle", function(){
-    if (this.uniq && this.uniq !== this) {
-        this.uniq.mangle();
-    }
-    else if (!(this.mangled_name || this.unmangleable())) {
-        this.mangled_name = this.scope.next_mangled();
-    }
+AST_Symbol.DEFMETHOD("unreferenced", function(){
+    return this.definition().references.length == 0;
 });
 
-AST_Label.DEFMETHOD("mangle", function(){
-    throw new Error("Don't call this");
+AST_Symbol.DEFMETHOD("undeclared", function(){
+    return this.definition().undeclared;
 });
 
-AST_SymbolDeclaration.DEFMETHOD("unreferenced", function(){
-    return this.definition().references.length == 0;
+AST_Symbol.DEFMETHOD("definition", function(){
+    return this.thedef;
 });
 
-AST_SymbolDeclaration.DEFMETHOD("definition", function(){
-    return this.uniq || this;
+AST_Symbol.DEFMETHOD("global", function(){
+    return this.definition().global;
 });
 
 AST_Toplevel.DEFMETHOD("mangle_names", function(){
@@ -332,6 +278,7 @@ AST_Toplevel.DEFMETHOD("mangle_names", function(){
     // present (and for AST_SymbolRef-s it'll use the mangled name of
     // the AST_SymbolDeclaration that it points to).
     var lname = -1;
+    var to_mangle = [];
     var tw = new TreeWalker(function(node, descend){
         if (node instanceof AST_LabeledStatement) {
             // lname is incremented when we get to the AST_Label
@@ -347,7 +294,7 @@ AST_Toplevel.DEFMETHOD("mangle_names", function(){
             for (var i in a) if (HOP(a, i)) {
                 var symbol = a[i];
                 if (!(is_setget && symbol instanceof AST_SymbolLambda))
-                    symbol.mangle();
+                    to_mangle.push(symbol);
             }
             return;
         }
@@ -359,6 +306,16 @@ AST_Toplevel.DEFMETHOD("mangle_names", function(){
         }
     });
     this.walk(tw);
+
+    // strangely, if we try to give more frequently used variables
+    // shorter name, the size after gzip seems to be higher!  so leave
+    // this commented out I guess...
+    //
+    // to_mangle = mergeSort(to_mangle, function(a, b){
+    //     return b.references.length - a.references.length;
+    // });
+
+    to_mangle.forEach(function(def){ def.mangle() });
 });
 
 AST_Toplevel.DEFMETHOD("compute_char_frequency", function(){
@@ -419,9 +376,7 @@ AST_Toplevel.DEFMETHOD("compute_char_frequency", function(){
             base54.consider("catch");
         else if (node instanceof AST_Finally)
             base54.consider("finally");
-        else if (node instanceof AST_SymbolDeclaration && node.unmangleable())
-            base54.consider(node.name);
-        else if (node instanceof AST_SymbolRef && !node.uniq && !(node instanceof AST_LabelRef))
+        else if (node instanceof AST_Symbol && node.unmangleable())
             base54.consider(node.name);
         else if (node instanceof AST_Unary || node instanceof AST_Binary)
             base54.consider(node.operator);
@@ -442,12 +397,12 @@ var base54 = (function() {
     base54.consider = function(str){
         for (var i = str.length; --i >= 0;) {
             var ch = str.charAt(i);
-            if (string.indexOf(ch))
+            if (string.indexOf(ch) >= 0)
                 ++frequency[ch];
         }
     };
     base54.sort = function() {
-        chars.sort(function(a, b){
+        chars = mergeSort(chars, function(a, b){
             if (is_digit(a) && !is_digit(b)) return 1;
             if (is_digit(b) && !is_digit(a)) return -1;
             return frequency[b] - frequency[a];
@@ -456,6 +411,7 @@ var base54 = (function() {
     base54.reset = reset;
     reset();
     base54.get = function(){ return chars };
+    base54.freq = function(){ return frequency };
     function base54(num) {
         var ret = "", base = 54;
         do {
@@ -467,3 +423,82 @@ var base54 = (function() {
     };
     return base54;
 })();
+
+AST_Toplevel.DEFMETHOD("scope_warnings", function(options){
+    options = defaults(options, {
+        undeclared       : false, // this makes a lot of noise
+        unreferenced     : true,
+        assign_to_global : true,
+        func_arguments   : true,
+        nested_defuns    : true,
+        eval             : true
+    });
+    var tw = new TreeWalker(function(node){
+        if (options.undeclared
+            && node instanceof AST_SymbolRef
+            && node.undeclared())
+        {
+            // XXX: this also warns about JS standard names,
+            // i.e. Object, Array, parseInt etc.  Should add a list of
+            // exceptions.
+            AST_Node.warn("Undeclared symbol: {name} [{line},{col}]", {
+                name: node.name,
+                line: node.start.line,
+                col: node.start.col
+            });
+        }
+        if (options.assign_to_global)
+        {
+            var sym = null;
+            if (node instanceof AST_Assign && node.left instanceof AST_SymbolRef)
+                sym = node.left;
+            else if (node instanceof AST_ForIn && node.init instanceof AST_SymbolRef)
+                sym = node.init;
+            if (sym
+                && (sym.undeclared()
+                    || (sym.global() && sym.scope !== sym.definition().scope))) {
+                AST_Node.warn("{msg}: {name} [{line},{col}]", {
+                    msg: sym.undeclared() ? "Accidental global?" : "Assignment to global",
+                    name: sym.name,
+                    line: sym.start.line,
+                    col: sym.start.col
+                });
+            }
+        }
+        if (options.eval
+            && node instanceof AST_SymbolRef
+            && node.undeclared()
+            && node.name == "eval") {
+            AST_Node.warn("Eval is used [{line},{col}]", node.start);
+        }
+        if (options.unreferenced
+            && node instanceof AST_SymbolDeclaration
+            && node.unreferenced()) {
+            AST_Node.warn("{type} {name} is declared but not referenced [{line},{col}]", {
+                type: node instanceof AST_Label ? "Label" : "Symbol",
+                name: node.name,
+                line: node.start.line,
+                col: node.start.col
+            });
+        }
+        if (options.func_arguments
+            && node instanceof AST_Lambda
+            && node.uses_arguments) {
+            AST_Node.warn("arguments used in function {name} [{line},{col}]", {
+                name: node.name ? node.name.name : "anonymous",
+                line: node.start.line,
+                col: node.start.col
+            });
+        }
+        if (options.nested_defuns
+            && node instanceof AST_Defun
+            && !(tw.parent() instanceof AST_Scope)) {
+            AST_Node.warn("Function {name} declared in nested statement [{line},{col}]", {
+                name: node.name.name,
+                line: node.start.line,
+                col: node.start.col
+            });
+        }
+    });
+    this.walk(tw);
+});
index 6a73c71..e9a2f80 100644 (file)
@@ -151,3 +151,27 @@ function string_template(text, props) {
         return props[p];
     });
 };
+
+function mergeSort(array, cmp) {
+    if (array.length < 2) return array.slice();
+    function merge(a, b) {
+        var r = [], ai = 0, bi = 0, i = 0;
+        while (ai < a.length && bi < b.length) {
+            cmp(a[ai], b[bi]) <= 0
+                ? r[i++] = a[ai++]
+                : r[i++] = b[bi++];
+        }
+        if (ai < a.length) r.push.apply(r, a.slice(ai));
+        if (bi < b.length) r.push.apply(r, b.slice(bi));
+        return r;
+    };
+    function _ms(a) {
+        if (a.length <= 1)
+            return a;
+        var m = Math.floor(a.length / 2), left = a.slice(0, m), right = a.slice(m);
+        left = _ms(left);
+        right = _ms(right);
+        return merge(left, right);
+    };
+    return _ms(array);
+};
index 4da5e9f..fdabafb 100755 (executable)
@@ -16,6 +16,7 @@ ast.compute_char_frequency();
 UglifyJS.base54.sort();
 
 ast.figure_out_scope();
+ast.scope_warnings();
 ast.mangle_names();
 
 sys.error(UglifyJS.base54.get());