adaptive base54 digits depending on char frequency (WIP)
authorMihai Bazon <mihai@bazon.net>
Mon, 10 Sep 2012 15:25:52 +0000 (18:25 +0300)
committerMihai Bazon <mihai@bazon.net>
Mon, 10 Sep 2012 19:29:18 +0000 (22:29 +0300)
bin/uglifyjs2
lib/scope.js
lib/utils.js
tmp/test-node.js

index 0c2de4e..e100b21 100755 (executable)
@@ -70,7 +70,12 @@ var output = UglifyJS.OutputStream({
     source_map: SOURCE_MAP
 });
 
-files.forEach(do_file);
+files = files.map(do_file_1);
+files = files.map(do_file_2);
+files.forEach(do_file_3);
+if (ARGS.v) {
+    sys.error("BASE54 digits: " + UglifyJS.base54.get());
+}
 
 output = output.get();
 
@@ -100,7 +105,7 @@ if (ARGS.stats) {
 
 /* -----[ functions ]----- */
 
-function do_file(file) {
+function do_file_1(file) {
     if (ARGS.v) {
         sys.error("Compressing " + file);
     }
@@ -112,13 +117,27 @@ function do_file(file) {
     time_it("scope", function(){
         ast.figure_out_scope();
     });
-    time_it("mangle", function(){
-        ast.mangle_names();
-    });
     time_it("squeeze", function(){
         var compressor = UglifyJS.Compressor({});
         ast = ast.squeeze(compressor);
     });
+    ast.filename = file;
+    return ast;
+}
+
+function do_file_2(ast) {
+    time_it("scope", function(){
+        //ast.figure_out_scope();
+        ast.compute_char_frequency();
+    });
+    return ast;
+}
+
+function do_file_3(ast) {
+    var file = ast.filename;
+    time_it("mangle", function(){
+        ast.mangle_names();
+    });
     time_it("generate", function(){
         if (SOURCE_MAP) {
             if (ARGS.p != null) {
index adf3649..e343334 100644 (file)
@@ -296,14 +296,19 @@ AST_Scope.DEFMETHOD("next_mangled", function(){
     }
 });
 
+AST_SymbolDeclaration.DEFMETHOD("unmangleable", function(){
+    return this.global || this.scope.uses_eval || this.scope.uses_with;
+});
+
+AST_Label.DEFMETHOD("unmangleable", function(){
+    return false;
+});
+
 AST_SymbolDeclaration.DEFMETHOD("mangle", function(){
-    if (this.uniq) {
+    if (this.uniq && this.uniq !== this) {
         this.uniq.mangle();
     }
-    else if (!(this.global
-               || this.scope.uses_eval
-               || this.scope.uses_with
-               || this.mangled_name)) {
+    else if (!(this.mangled_name || this.unmangleable())) {
         this.mangled_name = this.scope.next_mangled();
     }
 });
@@ -354,3 +359,103 @@ AST_Toplevel.DEFMETHOD("mangle_names", function(){
     });
     this.walk(tw);
 });
+
+AST_Toplevel.DEFMETHOD("compute_char_frequency", function(){
+    var tw = new TreeWalker(function(node){
+        if (node instanceof AST_Constant)
+            base54.consider(node.print_to_string());
+        else if (node instanceof AST_Debugger)
+            base54.consider("debugger");
+        else if (node instanceof AST_Directive)
+            base54.consider(node.value);
+        else if (node instanceof AST_While)
+            base54.consider("while");
+        else if (node instanceof AST_Do)
+            base54.consider("dowhile");
+        else if (node instanceof AST_If) {
+            base54.consider("if");
+            if (node.alternative) base54.consider("else");
+        }
+        else if (node instanceof AST_Var)
+            base54.consider("var");
+        else if (node instanceof AST_Const)
+            base54.consider("const");
+        else if (node instanceof AST_Lambda)
+            base54.consider("function");
+        else if (node instanceof AST_For)
+            base54.consider("for");
+        else if (node instanceof AST_ForIn)
+            base54.consider("forin");
+        else if (node instanceof AST_Switch)
+            base54.consider("switch");
+        else if (node instanceof AST_Case)
+            base54.consider("case");
+        else if (node instanceof AST_Default)
+            base54.consider("default");
+        else if (node instanceof AST_With)
+            base54.consider("with");
+        else if (node instanceof AST_ObjectSetter)
+            base54.consider("set" + node.key);
+        else if (node instanceof AST_ObjectGetter)
+            base54.consider("get" + node.key);
+        else if (node instanceof AST_ObjectKeyVal)
+            base54.consider(node.key);
+        else if (node instanceof AST_New)
+            base54.consider("new");
+        else if (node instanceof AST_This)
+            base54.consider("this");
+        else if (node instanceof AST_Try)
+            base54.consider("try");
+        else if (node instanceof AST_Catch)
+            base54.consider("catch");
+        else if (node instanceof AST_Finally)
+            base54.consider("finally");
+        else if (node instanceof AST_SymbolDeclaration && node.unmangleable())
+            base54.consider(node.name);
+        else if (node instanceof AST_SymbolRef && !node.uniq && !(node instanceof AST_LabelRef))
+            base54.consider(node.name);
+        else if (node instanceof AST_Unary || node instanceof AST_Binary)
+            base54.consider(node.operator);
+        else if (node instanceof AST_Dot)
+            base54.consider(node.property);
+    });
+    this.walk(tw);
+    base54.sort();
+});
+
+var base54 = (function() {
+    var string = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$_0123456789";
+    var chars, frequency;
+    function reset() {
+        frequency = {};
+        chars = string.split("");
+        chars.map(function(ch){ frequency[ch] = 0 });
+    }
+    base54.consider = function(str){
+        for (var i = str.length; --i >= 0;) {
+            var ch = str.charAt(i);
+            if (string.indexOf(ch))
+                ++frequency[ch];
+        }
+    };
+    base54.sort = function() {
+        chars.sort(function(a, b){
+            if (is_digit(a) && !is_digit(b)) return 1;
+            if (is_digit(b) && !is_digit(a)) return -1;
+            return frequency[b] - frequency[a];
+        });
+    };
+    base54.reset = reset;
+    reset();
+    base54.get = function(){ return chars };
+    function base54(num) {
+        var ret = "", base = 54;
+        do {
+            ret += chars[num % base];
+            num = Math.floor(num / base);
+            base = 64;
+        } while (num > 0);
+        return ret;
+    };
+    return base54;
+})();
index 79e612c..6a73c71 100644 (file)
@@ -141,23 +141,6 @@ var MAP = (function(){
     return MAP;
 })();
 
-// XXX: currently this is optimized for jQuery, though I have the
-// feeling it works well in general for many scripts (well, better
-// than alphabetical order).  It would be nice if we could adapt it to
-// the currently running script.
-var BASE54_DIGITS = "etnrisouaflchpdvmgybwESxTNCkLAOM_DPHBjFIqRUzWXV$JKQGYZ0516372984";
-//var BASE54_DIGITS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$_0123456789";
-
-function base54(num) {
-    var ret = "", base = 54;
-    do {
-        ret += BASE54_DIGITS.charAt(num % base);
-        num = Math.floor(num / base);
-        base = 64;
-    } while (num > 0);
-    return ret;
-};
-
 function push_uniq(array, el) {
     if (array.indexOf(el) < 0)
         array.push(el);
index 01ae35a..85dc1b1 100755 (executable)
@@ -8,47 +8,7 @@ var UglifyJS = require("../tools/node");
 var filename = process.argv[2];
 var code = fs.readFileSync(filename, "utf8");
 
-var ast = time_it("parse", function() {
-    return UglifyJS.parse(code);
-});
-
-time_it("scope", function(){
-    // calling figure_out_scope is a prerequisite for mangle_names,
-    // scope_warnings and compress
-    //
-    // perhaps figure_out_scope should be called automatically by the
-    // parser, but there might be instances where the functionality is
-    // not needed.
-    ast.figure_out_scope();
-});
-
-ast.scope_warnings();
-
-time_it("mangle", function(){
-    ast.mangle_names();
-});
-
-time_it("compress", function(){
-    var compressor = new UglifyJS.Compressor({
-        // sequences     : true,
-        // properties    : true,
-        // dead_code     : true,
-        // keep_comps    : true,
-        // drop_debugger : true,
-        // unsafe        : true,
-        // warnings      : true
-    });
-    ast = ast.squeeze(compressor);
-});
-
-var stream = UglifyJS.OutputStream({ beautify: true });
-time_it("generate", function(){
-    ast.print(stream);
-});
-sys.puts(stream.get());
-
-function time_it(name, cont) {
-    var t1 = new Date().getTime();
-    try { return cont(); }
-    finally { sys.debug("// " + name + ": " + ((new Date().getTime() - t1) / 1000).toFixed(3) + " sec."); }
-};
+var ast = UglifyJS.parse(code);
+ast.figure_out_scope();
+ast.compute_char_frequency();
+console.log(UglifyJS.base54.get().join(","));