From 6b9aeb5325203a9f4c8e3bdd6bd3f45f8520ee2e Mon Sep 17 00:00:00 2001 From: Mihai Bazon Date: Mon, 10 Sep 2012 18:25:52 +0300 Subject: [PATCH] adaptive base54 digits depending on char frequency (WIP) --- bin/uglifyjs2 | 29 +++++++++--- lib/scope.js | 115 ++++++++++++++++++++++++++++++++++++++++++++--- lib/utils.js | 17 ------- tmp/test-node.js | 48 ++------------------ 4 files changed, 138 insertions(+), 71 deletions(-) diff --git a/bin/uglifyjs2 b/bin/uglifyjs2 index 0c2de4e6..e100b211 100755 --- a/bin/uglifyjs2 +++ b/bin/uglifyjs2 @@ -70,7 +70,12 @@ var output = UglifyJS.OutputStream({ source_map: SOURCE_MAP }); -files.forEach(do_file); +files = files.map(do_file_1); +files = files.map(do_file_2); +files.forEach(do_file_3); +if (ARGS.v) { + sys.error("BASE54 digits: " + UglifyJS.base54.get()); +} output = output.get(); @@ -100,7 +105,7 @@ if (ARGS.stats) { /* -----[ functions ]----- */ -function do_file(file) { +function do_file_1(file) { if (ARGS.v) { sys.error("Compressing " + file); } @@ -112,13 +117,27 @@ function do_file(file) { time_it("scope", function(){ ast.figure_out_scope(); }); - time_it("mangle", function(){ - ast.mangle_names(); - }); time_it("squeeze", function(){ var compressor = UglifyJS.Compressor({}); ast = ast.squeeze(compressor); }); + ast.filename = file; + return ast; +} + +function do_file_2(ast) { + time_it("scope", function(){ + //ast.figure_out_scope(); + ast.compute_char_frequency(); + }); + return ast; +} + +function do_file_3(ast) { + var file = ast.filename; + time_it("mangle", function(){ + ast.mangle_names(); + }); time_it("generate", function(){ if (SOURCE_MAP) { if (ARGS.p != null) { diff --git a/lib/scope.js b/lib/scope.js index adf36493..e3433340 100644 --- a/lib/scope.js +++ b/lib/scope.js @@ -296,14 +296,19 @@ AST_Scope.DEFMETHOD("next_mangled", function(){ } }); +AST_SymbolDeclaration.DEFMETHOD("unmangleable", function(){ + return this.global || this.scope.uses_eval || this.scope.uses_with; +}); + +AST_Label.DEFMETHOD("unmangleable", function(){ + return false; +}); + AST_SymbolDeclaration.DEFMETHOD("mangle", function(){ - if (this.uniq) { + if (this.uniq && this.uniq !== this) { this.uniq.mangle(); } - else if (!(this.global - || this.scope.uses_eval - || this.scope.uses_with - || this.mangled_name)) { + else if (!(this.mangled_name || this.unmangleable())) { this.mangled_name = this.scope.next_mangled(); } }); @@ -354,3 +359,103 @@ AST_Toplevel.DEFMETHOD("mangle_names", function(){ }); this.walk(tw); }); + +AST_Toplevel.DEFMETHOD("compute_char_frequency", function(){ + var tw = new TreeWalker(function(node){ + if (node instanceof AST_Constant) + base54.consider(node.print_to_string()); + else if (node instanceof AST_Debugger) + base54.consider("debugger"); + else if (node instanceof AST_Directive) + base54.consider(node.value); + else if (node instanceof AST_While) + base54.consider("while"); + else if (node instanceof AST_Do) + base54.consider("dowhile"); + else if (node instanceof AST_If) { + base54.consider("if"); + if (node.alternative) base54.consider("else"); + } + else if (node instanceof AST_Var) + base54.consider("var"); + else if (node instanceof AST_Const) + base54.consider("const"); + else if (node instanceof AST_Lambda) + base54.consider("function"); + else if (node instanceof AST_For) + base54.consider("for"); + else if (node instanceof AST_ForIn) + base54.consider("forin"); + else if (node instanceof AST_Switch) + base54.consider("switch"); + else if (node instanceof AST_Case) + base54.consider("case"); + else if (node instanceof AST_Default) + base54.consider("default"); + else if (node instanceof AST_With) + base54.consider("with"); + else if (node instanceof AST_ObjectSetter) + base54.consider("set" + node.key); + else if (node instanceof AST_ObjectGetter) + base54.consider("get" + node.key); + else if (node instanceof AST_ObjectKeyVal) + base54.consider(node.key); + else if (node instanceof AST_New) + base54.consider("new"); + else if (node instanceof AST_This) + base54.consider("this"); + else if (node instanceof AST_Try) + base54.consider("try"); + else if (node instanceof AST_Catch) + base54.consider("catch"); + else if (node instanceof AST_Finally) + base54.consider("finally"); + else if (node instanceof AST_SymbolDeclaration && node.unmangleable()) + base54.consider(node.name); + else if (node instanceof AST_SymbolRef && !node.uniq && !(node instanceof AST_LabelRef)) + base54.consider(node.name); + else if (node instanceof AST_Unary || node instanceof AST_Binary) + base54.consider(node.operator); + else if (node instanceof AST_Dot) + base54.consider(node.property); + }); + this.walk(tw); + base54.sort(); +}); + +var base54 = (function() { + var string = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$_0123456789"; + var chars, frequency; + function reset() { + frequency = {}; + chars = string.split(""); + chars.map(function(ch){ frequency[ch] = 0 }); + } + base54.consider = function(str){ + for (var i = str.length; --i >= 0;) { + var ch = str.charAt(i); + if (string.indexOf(ch)) + ++frequency[ch]; + } + }; + base54.sort = function() { + chars.sort(function(a, b){ + if (is_digit(a) && !is_digit(b)) return 1; + if (is_digit(b) && !is_digit(a)) return -1; + return frequency[b] - frequency[a]; + }); + }; + base54.reset = reset; + reset(); + base54.get = function(){ return chars }; + function base54(num) { + var ret = "", base = 54; + do { + ret += chars[num % base]; + num = Math.floor(num / base); + base = 64; + } while (num > 0); + return ret; + }; + return base54; +})(); diff --git a/lib/utils.js b/lib/utils.js index 79e612c6..6a73c714 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -141,23 +141,6 @@ var MAP = (function(){ return MAP; })(); -// XXX: currently this is optimized for jQuery, though I have the -// feeling it works well in general for many scripts (well, better -// than alphabetical order). It would be nice if we could adapt it to -// the currently running script. -var BASE54_DIGITS = "etnrisouaflchpdvmgybwESxTNCkLAOM_DPHBjFIqRUzWXV$JKQGYZ0516372984"; -//var BASE54_DIGITS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$_0123456789"; - -function base54(num) { - var ret = "", base = 54; - do { - ret += BASE54_DIGITS.charAt(num % base); - num = Math.floor(num / base); - base = 64; - } while (num > 0); - return ret; -}; - function push_uniq(array, el) { if (array.indexOf(el) < 0) array.push(el); diff --git a/tmp/test-node.js b/tmp/test-node.js index 01ae35a7..85dc1b1a 100755 --- a/tmp/test-node.js +++ b/tmp/test-node.js @@ -8,47 +8,7 @@ var UglifyJS = require("../tools/node"); var filename = process.argv[2]; var code = fs.readFileSync(filename, "utf8"); -var ast = time_it("parse", function() { - return UglifyJS.parse(code); -}); - -time_it("scope", function(){ - // calling figure_out_scope is a prerequisite for mangle_names, - // scope_warnings and compress - // - // perhaps figure_out_scope should be called automatically by the - // parser, but there might be instances where the functionality is - // not needed. - ast.figure_out_scope(); -}); - -ast.scope_warnings(); - -time_it("mangle", function(){ - ast.mangle_names(); -}); - -time_it("compress", function(){ - var compressor = new UglifyJS.Compressor({ - // sequences : true, - // properties : true, - // dead_code : true, - // keep_comps : true, - // drop_debugger : true, - // unsafe : true, - // warnings : true - }); - ast = ast.squeeze(compressor); -}); - -var stream = UglifyJS.OutputStream({ beautify: true }); -time_it("generate", function(){ - ast.print(stream); -}); -sys.puts(stream.get()); - -function time_it(name, cont) { - var t1 = new Date().getTime(); - try { return cont(); } - finally { sys.debug("// " + name + ": " + ((new Date().getTime() - t1) / 1000).toFixed(3) + " sec."); } -}; +var ast = UglifyJS.parse(code); +ast.figure_out_scope(); +ast.compute_char_frequency(); +console.log(UglifyJS.base54.get().join(",")); -- 2.34.1