From 8e82d8d94cd60f7423ee61032a30a35222680dc3 Mon Sep 17 00:00:00 2001 From: Mihai Bazon Date: Tue, 11 Sep 2012 15:42:28 +0300 Subject: [PATCH] fixed some mess with symbols/scope - all symbols now have a `thedef` property which is a SymbolDef object, instead of the `uniq` that we had before (pointing to the first occurrence of the name as declaration). - for undeclared symbols we still create a SymbolDef object in the toplevel scope but mark it "undeclared" - we can now call figure_out_scope after squeezing, which is useful in order not to mangle names that were dropped by the squeezer --- bin/uglifyjs2 | 1 + lib/ast.js | 6 +- lib/output.js | 7 -- lib/scope.js | 307 ++++++++++++++++++++++++++--------------------- lib/utils.js | 24 ++++ tmp/test-node.js | 1 + 6 files changed, 200 insertions(+), 146 deletions(-) diff --git a/bin/uglifyjs2 b/bin/uglifyjs2 index b3788364..0eb0e158 100755 --- a/bin/uglifyjs2 +++ b/bin/uglifyjs2 @@ -73,6 +73,7 @@ UglifyJS.base54.sort(); files.forEach(do_file_3); if (ARGS.v) { sys.error("BASE54 digits: " + UglifyJS.base54.get()); + //sys.error("Frequency: " + sys.inspect(UglifyJS.base54.freq())); } output = output.get(); diff --git a/lib/ast.js b/lib/ast.js index 3ede1d13..5187b5ad 100644 --- a/lib/ast.js +++ b/lib/ast.js @@ -549,11 +549,11 @@ var AST_ObjectGetter = DEFNODE("ObjectGetter", null, { $documentation: "An object getter property", }, AST_ObjectProperty); -var AST_Symbol = DEFNODE("Symbol", "scope name global undeclared", { +var AST_Symbol = DEFNODE("Symbol", "scope name thedef", { $documentation: "Base class for all symbols", }); -var AST_SymbolDeclaration = DEFNODE("SymbolDeclaration", "references", { +var AST_SymbolDeclaration = DEFNODE("SymbolDeclaration", null, { $documentation: "A declaration symbol (symbol in var/const, function name or argument, symbol in catch)", }, AST_Symbol); @@ -581,7 +581,7 @@ var AST_Label = DEFNODE("Label", null, { $documentation: "Symbol naming a label (declaration)", }, AST_SymbolDeclaration); -var AST_SymbolRef = DEFNODE("SymbolRef", "symbol", { +var AST_SymbolRef = DEFNODE("SymbolRef", null, { $documentation: "Reference to some symbol (not definition/declaration)", }, AST_Symbol); diff --git a/lib/output.js b/lib/output.js index 35c7ade7..25718816 100644 --- a/lib/output.js +++ b/lib/output.js @@ -884,16 +884,9 @@ function OutputStream(options) { self.value._do_print(output, true); }); DEFPRINT(AST_Symbol, function(self, output){ - output.print_name(self.name); - }); - DEFPRINT(AST_SymbolDeclaration, function(self, output){ var def = self.definition(); output.print_name(def.mangled_name || def.name); }); - DEFPRINT(AST_SymbolRef, function(self, output){ - var def = self.symbol; - output.print_name(def ? def.mangled_name || def.name : self.name); - }); DEFPRINT(AST_This, function(self, output){ output.print("this"); }); diff --git a/lib/scope.js b/lib/scope.js index 1224e604..ef0ac6ed 100644 --- a/lib/scope.js +++ b/lib/scope.js @@ -41,6 +41,26 @@ ***********************************************************************/ +function SymbolDef(scope, orig) { + this.name = orig.name; + this.orig = [ orig ]; + this.scope = scope; + this.references = []; + this.global = false; + this.mangled_name = null; + this.undeclared = false; +}; + +SymbolDef.prototype = { + unmangleable: function() { + return this.global || this.undeclared || this.scope.uses_eval || this.scope.uses_with; + }, + mangle: function() { + if (!this.mangled_name && !this.unmangleable()) + this.mangled_name = this.scope.next_mangled(); + } +}; + AST_Toplevel.DEFMETHOD("figure_out_scope", function(){ // This does what ast_add_scope did in UglifyJS v1. // @@ -76,7 +96,10 @@ AST_Toplevel.DEFMETHOD("figure_out_scope", function(){ delete labels[l.name]; return true; // no descend again } - if (node instanceof AST_SymbolDeclaration) { + if (node instanceof AST_Symbol) { + node.scope = scope; + } + if (node instanceof AST_Label) { node.init_scope_vars(); } if (node instanceof AST_SymbolLambda) { @@ -88,7 +111,7 @@ AST_Toplevel.DEFMETHOD("figure_out_scope", function(){ // scope when we encounter the AST_Defun node (which is // instanceof AST_Scope) but we get to the symbol a bit // later. - scope.parent_scope.def_function(node); + (node.scope = scope.parent_scope).def_function(node); } else if (node instanceof AST_SymbolVar) { scope.def_variable(node); @@ -102,9 +125,6 @@ AST_Toplevel.DEFMETHOD("figure_out_scope", function(){ // identifier in the enclosing scope) scope.def_variable(node); } - else if (node instanceof AST_SymbolRef) { - node.scope = scope; - } if (node instanceof AST_LabelRef) { var sym = labels[node.name]; if (!sym) throw new Error("Undefined label " + node.name); @@ -115,6 +135,7 @@ AST_Toplevel.DEFMETHOD("figure_out_scope", function(){ // pass 2: find back references and eval var func = null; + var globals = self.globals = {}; var tw = new TreeWalker(function(node, descend){ if (node instanceof AST_Lambda) { var prev_func = func; @@ -124,18 +145,28 @@ AST_Toplevel.DEFMETHOD("figure_out_scope", function(){ return true; } if (node instanceof AST_SymbolRef) { - var sym = node.scope.find_variable(node); - node.reference(sym); + var name = node.name; + var sym = node.scope.find_variable(name); if (!sym) { - if (node.name == "eval") { - for (var s = node.scope; - s && !s.uses_eval; - s = s.parent_scope) s.uses_eval = true; + var g; + if (HOP(globals, name)) { + g = globals[name]; + } else { + g = new SymbolDef(self, node); + g.undeclared = true; + } + node.thedef = g; + if (name == "eval") { + for (var s = node.scope; s && !s.uses_eval; s = s.parent_scope) + s.uses_eval = true; } - if (node.name == "arguments") { + if (name == "arguments") { func.uses_arguments = true; } + } else { + node.thedef = sym; } + node.reference(); } }); self.walk(tw); @@ -156,128 +187,47 @@ AST_Lambda.DEFMETHOD("init_scope_vars", function(){ this.uses_arguments = false; }); -AST_SymbolDeclaration.DEFMETHOD("init_scope_vars", function(){ - this.references = []; +AST_SymbolRef.DEFMETHOD("reference", function() { + var def = this.definition(); + def.references.push(this); + var orig_scope = def.scope, s = this.scope; + while (s) { + push_uniq(s.enclosed, def); + s = s.parent_scope; + } }); -AST_Toplevel.DEFMETHOD("scope_warnings", function(options){ - options = defaults(options, { - undeclared : false, // this makes a lot of noise - unreferenced : true, - assign_to_global : true, - func_arguments : true, - nested_defuns : true, - eval : true - }); - var tw = new TreeWalker(function(node){ - if (options.undeclared - && node instanceof AST_SymbolRef - && node.undeclared) - { - // XXX: this also warns about JS standard names, - // i.e. Object, Array, parseInt etc. Should add a list of - // exceptions. - AST_Node.warn("Undeclared symbol: {name} [{line},{col}]", { - name: node.name, - line: node.start.line, - col: node.start.col - }); - } - if (options.assign_to_global) - { - var sym = null; - if (node instanceof AST_Assign && node.left instanceof AST_SymbolRef) - sym = node.left; - else if (node instanceof AST_ForIn && node.init instanceof AST_SymbolRef) - sym = node.init; - if (sym - && (sym.undeclared - || (sym.symbol.global - && sym.scope !== sym.symbol.scope))) { - AST_Node.warn("{msg}: {name} [{line},{col}]", { - msg: sym.undeclared ? "Accidental global?" : "Assignment to global", - name: sym.name, - line: sym.start.line, - col: sym.start.col - }); - } - } - if (options.eval - && node instanceof AST_SymbolRef - && node.undeclared - && node.name == "eval") { - AST_Node.warn("Eval is used [{line},{col}]", node.start); - } - if (options.unreferenced - && node instanceof AST_SymbolDeclaration - && node.unreferenced()) { - AST_Node.warn("{type} {name} is declared but not referenced [{line},{col}]", { - type: node instanceof AST_Label ? "Label" : "Symbol", - name: node.name, - line: node.start.line, - col: node.start.col - }); - } - if (options.func_arguments - && node instanceof AST_Lambda - && node.uses_arguments) { - AST_Node.warn("arguments used in function {name} [{line},{col}]", { - name: node.name ? node.name.name : "anonymous", - line: node.start.line, - col: node.start.col - }); - } - if (options.nested_defuns - && node instanceof AST_Defun - && !(tw.parent() instanceof AST_Scope)) { - AST_Node.warn("Function {name} declared in nested statement [{line},{col}]", { - name: node.name.name, - line: node.start.line, - col: node.start.col - }); - } - }); - this.walk(tw); +AST_Label.DEFMETHOD("init_scope_vars", function(){ + this.references = []; }); -AST_SymbolRef.DEFMETHOD("reference", function(symbol) { - if (symbol) { - this.symbol = symbol; - var origin = symbol.scope; - symbol.references.push(this); - for (var s = this.scope; s; s = s.parent_scope) { - push_uniq(s.enclosed, symbol); - if (s === origin) break; - } - } else { - this.undeclared = true; - for (var s = this.scope; s; s = s.parent_scope) { - push_uniq(s.enclosed, this); - } - } +AST_LabelRef.DEFMETHOD("reference", function(def){ + this.thedef = def; + def.references.push(this); }); AST_Scope.DEFMETHOD("find_variable", function(name){ if (name instanceof AST_Symbol) name = name.name; return HOP(this.variables, name) ? this.variables[name] - : ((this.name && this.name.name == name && this.name) - || (this.parent_scope && this.parent_scope.find_variable(name))); + : (this.parent_scope && this.parent_scope.find_variable(name)); }); AST_Scope.DEFMETHOD("def_function", function(symbol){ - this.functions[symbol.name] = symbol; - this.def_variable(symbol); + this.functions[symbol.name] = this.def_variable(symbol); }); AST_Scope.DEFMETHOD("def_variable", function(symbol){ - symbol.global = !this.parent_scope; + var def; if (!HOP(this.variables, symbol.name)) { - this.variables[symbol.name] = symbol; + def = new SymbolDef(this, symbol); + this.variables[symbol.name] = def; + def.global = !this.parent_scope; } else { - symbol.uniq = this.variables[symbol.name]; + def = this.variables[symbol.name]; + def.orig.push(symbol); } - symbol.scope = this; + return symbol.thedef = def; }); AST_Scope.DEFMETHOD("next_mangled", function(){ @@ -297,33 +247,29 @@ AST_Scope.DEFMETHOD("next_mangled", function(){ } }); -AST_SymbolDeclaration.DEFMETHOD("unmangleable", function(){ - return this.global || this.scope.uses_eval || this.scope.uses_with; +AST_Symbol.DEFMETHOD("unmangleable", function(){ + return this.definition().unmangleable(); }); +// labels are always mangleable AST_Label.DEFMETHOD("unmangleable", function(){ return false; }); -AST_SymbolDeclaration.DEFMETHOD("mangle", function(){ - if (this.uniq && this.uniq !== this) { - this.uniq.mangle(); - } - else if (!(this.mangled_name || this.unmangleable())) { - this.mangled_name = this.scope.next_mangled(); - } +AST_Symbol.DEFMETHOD("unreferenced", function(){ + return this.definition().references.length == 0; }); -AST_Label.DEFMETHOD("mangle", function(){ - throw new Error("Don't call this"); +AST_Symbol.DEFMETHOD("undeclared", function(){ + return this.definition().undeclared; }); -AST_SymbolDeclaration.DEFMETHOD("unreferenced", function(){ - return this.definition().references.length == 0; +AST_Symbol.DEFMETHOD("definition", function(){ + return this.thedef; }); -AST_SymbolDeclaration.DEFMETHOD("definition", function(){ - return this.uniq || this; +AST_Symbol.DEFMETHOD("global", function(){ + return this.definition().global; }); AST_Toplevel.DEFMETHOD("mangle_names", function(){ @@ -332,6 +278,7 @@ AST_Toplevel.DEFMETHOD("mangle_names", function(){ // present (and for AST_SymbolRef-s it'll use the mangled name of // the AST_SymbolDeclaration that it points to). var lname = -1; + var to_mangle = []; var tw = new TreeWalker(function(node, descend){ if (node instanceof AST_LabeledStatement) { // lname is incremented when we get to the AST_Label @@ -347,7 +294,7 @@ AST_Toplevel.DEFMETHOD("mangle_names", function(){ for (var i in a) if (HOP(a, i)) { var symbol = a[i]; if (!(is_setget && symbol instanceof AST_SymbolLambda)) - symbol.mangle(); + to_mangle.push(symbol); } return; } @@ -359,6 +306,16 @@ AST_Toplevel.DEFMETHOD("mangle_names", function(){ } }); this.walk(tw); + + // strangely, if we try to give more frequently used variables + // shorter name, the size after gzip seems to be higher! so leave + // this commented out I guess... + // + // to_mangle = mergeSort(to_mangle, function(a, b){ + // return b.references.length - a.references.length; + // }); + + to_mangle.forEach(function(def){ def.mangle() }); }); AST_Toplevel.DEFMETHOD("compute_char_frequency", function(){ @@ -419,9 +376,7 @@ AST_Toplevel.DEFMETHOD("compute_char_frequency", function(){ base54.consider("catch"); else if (node instanceof AST_Finally) base54.consider("finally"); - else if (node instanceof AST_SymbolDeclaration && node.unmangleable()) - base54.consider(node.name); - else if (node instanceof AST_SymbolRef && !node.uniq && !(node instanceof AST_LabelRef)) + else if (node instanceof AST_Symbol && node.unmangleable()) base54.consider(node.name); else if (node instanceof AST_Unary || node instanceof AST_Binary) base54.consider(node.operator); @@ -442,12 +397,12 @@ var base54 = (function() { base54.consider = function(str){ for (var i = str.length; --i >= 0;) { var ch = str.charAt(i); - if (string.indexOf(ch)) + if (string.indexOf(ch) >= 0) ++frequency[ch]; } }; base54.sort = function() { - chars.sort(function(a, b){ + chars = mergeSort(chars, function(a, b){ if (is_digit(a) && !is_digit(b)) return 1; if (is_digit(b) && !is_digit(a)) return -1; return frequency[b] - frequency[a]; @@ -456,6 +411,7 @@ var base54 = (function() { base54.reset = reset; reset(); base54.get = function(){ return chars }; + base54.freq = function(){ return frequency }; function base54(num) { var ret = "", base = 54; do { @@ -467,3 +423,82 @@ var base54 = (function() { }; return base54; })(); + +AST_Toplevel.DEFMETHOD("scope_warnings", function(options){ + options = defaults(options, { + undeclared : false, // this makes a lot of noise + unreferenced : true, + assign_to_global : true, + func_arguments : true, + nested_defuns : true, + eval : true + }); + var tw = new TreeWalker(function(node){ + if (options.undeclared + && node instanceof AST_SymbolRef + && node.undeclared()) + { + // XXX: this also warns about JS standard names, + // i.e. Object, Array, parseInt etc. Should add a list of + // exceptions. + AST_Node.warn("Undeclared symbol: {name} [{line},{col}]", { + name: node.name, + line: node.start.line, + col: node.start.col + }); + } + if (options.assign_to_global) + { + var sym = null; + if (node instanceof AST_Assign && node.left instanceof AST_SymbolRef) + sym = node.left; + else if (node instanceof AST_ForIn && node.init instanceof AST_SymbolRef) + sym = node.init; + if (sym + && (sym.undeclared() + || (sym.global() && sym.scope !== sym.definition().scope))) { + AST_Node.warn("{msg}: {name} [{line},{col}]", { + msg: sym.undeclared() ? "Accidental global?" : "Assignment to global", + name: sym.name, + line: sym.start.line, + col: sym.start.col + }); + } + } + if (options.eval + && node instanceof AST_SymbolRef + && node.undeclared() + && node.name == "eval") { + AST_Node.warn("Eval is used [{line},{col}]", node.start); + } + if (options.unreferenced + && node instanceof AST_SymbolDeclaration + && node.unreferenced()) { + AST_Node.warn("{type} {name} is declared but not referenced [{line},{col}]", { + type: node instanceof AST_Label ? "Label" : "Symbol", + name: node.name, + line: node.start.line, + col: node.start.col + }); + } + if (options.func_arguments + && node instanceof AST_Lambda + && node.uses_arguments) { + AST_Node.warn("arguments used in function {name} [{line},{col}]", { + name: node.name ? node.name.name : "anonymous", + line: node.start.line, + col: node.start.col + }); + } + if (options.nested_defuns + && node instanceof AST_Defun + && !(tw.parent() instanceof AST_Scope)) { + AST_Node.warn("Function {name} declared in nested statement [{line},{col}]", { + name: node.name.name, + line: node.start.line, + col: node.start.col + }); + } + }); + this.walk(tw); +}); diff --git a/lib/utils.js b/lib/utils.js index 6a73c714..e9a2f80a 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -151,3 +151,27 @@ function string_template(text, props) { return props[p]; }); }; + +function mergeSort(array, cmp) { + if (array.length < 2) return array.slice(); + function merge(a, b) { + var r = [], ai = 0, bi = 0, i = 0; + while (ai < a.length && bi < b.length) { + cmp(a[ai], b[bi]) <= 0 + ? r[i++] = a[ai++] + : r[i++] = b[bi++]; + } + if (ai < a.length) r.push.apply(r, a.slice(ai)); + if (bi < b.length) r.push.apply(r, b.slice(bi)); + return r; + }; + function _ms(a) { + if (a.length <= 1) + return a; + var m = Math.floor(a.length / 2), left = a.slice(0, m), right = a.slice(m); + left = _ms(left); + right = _ms(right); + return merge(left, right); + }; + return _ms(array); +}; diff --git a/tmp/test-node.js b/tmp/test-node.js index 4da5e9ff..fdabafbf 100755 --- a/tmp/test-node.js +++ b/tmp/test-node.js @@ -16,6 +16,7 @@ ast.compute_char_frequency(); UglifyJS.base54.sort(); ast.figure_out_scope(); +ast.scope_warnings(); ast.mangle_names(); sys.error(UglifyJS.base54.get()); -- 2.34.1