Initial import
authorJuriy Zaytsev <kangax@gmail.com>
Fri, 5 Feb 2010 20:49:43 +0000 (15:49 -0500)
committerJuriy Zaytsev <kangax@gmail.com>
Fri, 5 Feb 2010 20:49:43 +0000 (15:49 -0500)
html_minifier.html [new file with mode: 0644]
htmlparser.js [new file with mode: 0644]
master.css [new file with mode: 0644]
master.js [new file with mode: 0644]

diff --git a/html_minifier.html b/html_minifier.html
new file mode 100644 (file)
index 0000000..fcf50aa
--- /dev/null
@@ -0,0 +1,75 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+  "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+  <head>
+    <meta http-equiv="Content-type" content="text/html; charset=utf-8">
+    <title>HTML minifier</title>
+    <script src="htmlparser.js" type="text/javascript"></script>
+    <link rel="stylesheet" href="master.css" type="text/css">
+  </head>
+  <body>
+    <div>
+      <div id="wrapper">
+        <h1>HTML Minifier</h1>
+        <textarea rows="8" cols="40" id="input"></textarea>
+        <div id="options">
+          <ul>
+            <li>
+              <input type="checkbox" id="remove-comments" checked>
+              <label for="remove-comments">Remove comments</label>
+            </li>
+            <li>
+              <input type="checkbox" id="collapse-whitespace" checked>
+              <label for="collapse-whitespace">Collapse whitespace</label>
+            </li>
+            <li>
+              <input type="checkbox" id="collapse-boolean-attributes" checked>
+              <label for="collapse-boolean-attributes">
+                Collapse boolean attributes
+                <br>
+                <span class="quiet">
+                  (e.g. <code>&lt;input disabled="disabled"&gt; &rarr; &lt;input disabled&gt;</code>)
+                </span>
+              </label>
+            </li>
+            <li>
+              <input type="checkbox" id="remove-attribute-quotes" checked>
+              <label for="remove-attribute-quotes">
+                Remove attribute quotes
+                <br>
+                <span class="quiet">
+                  (e.g. <code>&lt;p class="foo"&gt; &rarr; &lt;p class=foo&gt;</code>)
+                </span>
+              </label>
+            </li>
+            <li>
+              <input type="checkbox" id="remove-redundant-attributes" checked>
+              <label for="remove-redundant-attributes">
+                Remove redundant attributes
+                <br>
+                <span class="quiet">
+                  (e.g. <code>&lt;script language="Javascript" ...&gt;</code>)
+                </span>
+              </label>
+            </li>
+            <li>
+              <input type="checkbox" id="use-short-doctype" checked>
+              <label for="use-short-doctype">
+                Use short doctype
+                <br>
+                <span class="quiet">
+                  (<code>&lt;!DOCTYPE html&gt;</code>)
+                </span>
+              </label>
+            </li>
+          </ul>
+          <p><button type="button" id="convert-btn">Convert</button></p>
+        </div>
+      </div>
+      <textarea rows="8" cols="40" id="output" readonly></textarea>
+      <p id="stats"></p>
+      <p class="quiet" style="font-style:italic;">Minifier is made by <a href="http://twitter.com/kangax/">kangax</a>, using tweaked version of HTML parser by <a href="http://ejohn.org/">John Resig</a> (which is based on work of <a href="http://erik.eae.net/">Erik Arvidsson</a>).</p>
+    </div>
+    <script src="master.js" type="text/javascript"></script>
+  </body>
+</html>
\ No newline at end of file
diff --git a/htmlparser.js b/htmlparser.js
new file mode 100644 (file)
index 0000000..63c5123
--- /dev/null
@@ -0,0 +1,319 @@
+/*
+ * HTML Parser By John Resig (ejohn.org)
+ * Original code by Erik Arvidsson, Mozilla Public License
+ * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js
+ *
+ * // Use like so:
+ * HTMLParser(htmlString, {
+ *     start: function(tag, attrs, unary) {},
+ *     end: function(tag) {},
+ *     chars: function(text) {},
+ *     comment: function(text) {}
+ * });
+ *
+ * // or to get an XML string:
+ * HTMLtoXML(htmlString);
+ *
+ * // or to get an XML DOM Document
+ * HTMLtoDOM(htmlString);
+ *
+ * // or to inject into an existing document/DOM node
+ * HTMLtoDOM(htmlString, document);
+ * HTMLtoDOM(htmlString, document.body);
+ *
+ */
+
+(function(){
+
+       // Regular Expressions for parsing tags and attributes
+       var startTag = /^<(\w+)((?:\s+[\w:-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)>/,
+                 endTag = /^<\/(\w+)[^>]*>/,
+                 attr = /([\w:-]+)(?:\s*=\s*(?:(?:"((?:\\.|[^"])*)")|(?:'((?:\\.|[^'])*)')|([^>\s]+)))?/g,
+                 doctype = /<!DOCTYPE [^>]+>/;
+               
+       // Empty Elements - HTML 4.01
+       var empty = makeMap("area,base,basefont,br,col,frame,hr,img,input,isindex,link,meta,param,embed");
+
+       // Block Elements - HTML 4.01
+       var block = makeMap("address,applet,blockquote,button,center,dd,del,dir,div,dl,dt,fieldset,form,frameset,hr,iframe,ins,isindex,li,map,menu,noframes,noscript,object,ol,p,pre,script,table,tbody,td,tfoot,th,thead,tr,ul");
+
+       // Inline Elements - HTML 4.01
+       var inline = makeMap("a,abbr,acronym,applet,b,basefont,bdo,big,br,button,cite,code,del,dfn,em,font,i,iframe,img,input,ins,kbd,label,map,object,q,s,samp,script,select,small,span,strike,strong,sub,sup,textarea,tt,u,var");
+
+       // Elements that you can, intentionally, leave open
+       // (and which close themselves)
+       var closeSelf = makeMap("colgroup,dd,dt,li,options,p,td,tfoot,th,thead,tr");
+
+       // Attributes that have their values filled in disabled="disabled"
+       var fillAttrs = makeMap("checked,compact,declare,defer,disabled,ismap,multiple,nohref,noresize,noshade,nowrap,readonly,selected");
+
+       // Special Elements (can contain anything)
+       var special = makeMap("script,style");
+
+       var HTMLParser = this.HTMLParser = function( html, handler ) {
+               var index, chars, match, stack = [], last = html;
+               stack.last = function(){
+                       return this[ this.length - 1 ];
+               };
+
+               while ( html ) {
+                       chars = true;
+
+                       // Make sure we're not in a script or style element
+                       if ( !stack.last() || !special[ stack.last() ] ) {
+
+                               // Comment
+                               if ( html.indexOf("<!--") == 0 ) {
+                                       index = html.indexOf("-->");
+       
+                                       if ( index >= 0 ) {
+                                               if ( handler.comment )
+                                                       handler.comment( html.substring( 4, index ) );
+                                               html = html.substring( index + 3 );
+                                               chars = false;
+                                       }
+                               }
+             else if ( html.indexOf("<!DOCTYPE") == 0 ) {
+               match = html.match( doctype );
+
+               if ( match ) {
+                 if ( handler.doctype )
+                                               handler.doctype( match[0] );
+                                       html = html.substring( match[0].length );
+                                       chars = false;
+               }
+                               // end tag
+                               } else if ( html.indexOf("</") == 0 ) {
+                                       match = html.match( endTag );
+       
+                                       if ( match ) {
+                                               html = html.substring( match[0].length );
+                                               match[0].replace( endTag, parseEndTag );
+                                               chars = false;
+                                       }
+       
+                               // start tag
+                               } else if ( html.indexOf("<") == 0 ) {
+                                       match = html.match( startTag );
+       
+                                       if ( match ) {
+                                               html = html.substring( match[0].length );
+                                               match[0].replace( startTag, parseStartTag );
+                                               chars = false;
+                                       }
+                               }
+
+                               if ( chars ) {
+                                       index = html.indexOf("<");
+                                       
+                                       var text = index < 0 ? html : html.substring( 0, index );
+                                       html = index < 0 ? "" : html.substring( index );
+                                       
+                                       if ( handler.chars )
+                                               handler.chars( text );
+                               }
+
+                       } else {
+                               html = html.replace(new RegExp("(.*)<\/" + stack.last() + "[^>]*>"), function(all, text){
+                                       text = text.replace(/<!--(.*?)-->/g, "$1")
+                                               .replace(/<!\[CDATA\[(.*?)]]>/g, "$1");
+
+                                       if ( handler.chars )
+                                               handler.chars( text );
+
+                                       return "";
+                               });
+
+                               parseEndTag( "", stack.last() );
+                       }
+
+                       if ( html == last )
+                               throw "Parse Error: " + html;
+                       last = html;
+               }
+               
+               // Clean up any remaining tags
+               parseEndTag();
+
+               function parseStartTag( tag, tagName, rest, unary ) {
+                       if ( block[ tagName ] ) {
+                               while ( stack.last() && inline[ stack.last() ] ) {
+                                       parseEndTag( "", stack.last() );
+                               }
+                       }
+
+                       if ( closeSelf[ tagName ] && stack.last() == tagName ) {
+                               parseEndTag( "", tagName );
+                       }
+
+                       unary = empty[ tagName ] || !!unary;
+
+                       if ( !unary )
+                               stack.push( tagName );
+                       
+                       if ( handler.start ) {
+                               var attrs = [];
+       
+                               rest.replace(attr, function(match, name) {
+                                       var value = arguments[2] ? arguments[2] :
+                                               arguments[3] ? arguments[3] :
+                                               arguments[4] ? arguments[4] :
+                                               fillAttrs[name] ? name : "";
+                                       
+                                       attrs.push({
+                                               name: name,
+                                               value: value,
+                                               escaped: value.replace(/(^|[^\\])"/g, '$1\\\"') //"
+                                       });
+                               });
+       
+                               if ( handler.start )
+                                       handler.start( tagName, attrs, unary );
+                       }
+               }
+
+               function parseEndTag( tag, tagName ) {
+                       // If no tag name is provided, clean shop
+                       if ( !tagName )
+                               var pos = 0;
+                               
+                       // Find the closest opened tag of the same type
+                       else
+                               for ( var pos = stack.length - 1; pos >= 0; pos-- )
+                                       if ( stack[ pos ] == tagName )
+                                               break;
+                       
+                       if ( pos >= 0 ) {
+                               // Close all the open elements, up the stack
+                               for ( var i = stack.length - 1; i >= pos; i-- )
+                                       if ( handler.end )
+                                               handler.end( stack[ i ] );
+                               
+                               // Remove the open elements from the stack
+                               stack.length = pos;
+                       }
+               }
+       };
+       
+       this.HTMLtoXML = function( html ) {
+               var results = "";
+               
+               HTMLParser(html, {
+                       start: function( tag, attrs, unary ) {
+                               results += "<" + tag;
+               
+                               for ( var i = 0; i < attrs.length; i++ )
+                                       results += " " + attrs[i].name + '="' + attrs[i].escaped + '"';
+               
+                               results += (unary ? "/" : "") + ">";
+                       },
+                       end: function( tag ) {
+                               results += "</" + tag + ">";
+                       },
+                       chars: function( text ) {
+                               results += text;
+                       },
+                       comment: function( text ) {
+                               results += "<!--" + text + "-->";
+                       }
+               });
+               
+               return results;
+       };
+       
+       this.HTMLtoDOM = function( html, doc ) {
+               // There can be only one of these elements
+               var one = makeMap("html,head,body,title");
+               
+               // Enforce a structure for the document
+               var structure = {
+                       link: "head",
+                       base: "head"
+               };
+       
+               if ( !doc ) {
+                       if ( typeof DOMDocument != "undefined" )
+                               doc = new DOMDocument();
+                       else if ( typeof document != "undefined" && document.implementation && document.implementation.createDocument )
+                               doc = document.implementation.createDocument("", "", null);
+                       else if ( typeof ActiveX != "undefined" )
+                               doc = new ActiveXObject("Msxml.DOMDocument");
+                       
+               } else
+                       doc = doc.ownerDocument ||
+                               doc.getOwnerDocument && doc.getOwnerDocument() ||
+                               doc;
+               
+               var elems = [],
+                       documentElement = doc.documentElement ||
+                               doc.getDocumentElement && doc.getDocumentElement();
+                               
+               // If we're dealing with an empty document then we
+               // need to pre-populate it with the HTML document structure
+               if ( !documentElement && doc.createElement ) (function(){
+                       var html = doc.createElement("html");
+                       var head = doc.createElement("head");
+                       head.appendChild( doc.createElement("title") );
+                       html.appendChild( head );
+                       html.appendChild( doc.createElement("body") );
+                       doc.appendChild( html );
+               })();
+               
+               // Find all the unique elements
+               if ( doc.getElementsByTagName )
+                       for ( var i in one )
+                               one[ i ] = doc.getElementsByTagName( i )[0];
+               
+               // If we're working with a document, inject contents into
+               // the body element
+               var curParentNode = one.body;
+               
+               HTMLParser( html, {
+                       start: function( tagName, attrs, unary ) {
+                               // If it's a pre-built element, then we can ignore
+                               // its construction
+                               if ( one[ tagName ] ) {
+                                       curParentNode = one[ tagName ];
+                                       return;
+                               }
+                       
+                               var elem = doc.createElement( tagName );
+                               
+                               for ( var attr in attrs )
+                                       elem.setAttribute( attrs[ attr ].name, attrs[ attr ].value );
+                               
+                               if ( structure[ tagName ] && typeof one[ structure[ tagName ] ] != "boolean" )
+                                       one[ structure[ tagName ] ].appendChild( elem );
+                               
+                               else if ( curParentNode && curParentNode.appendChild )
+                                       curParentNode.appendChild( elem );
+                                       
+                               if ( !unary ) {
+                                       elems.push( elem );
+                                       curParentNode = elem;
+                               }
+                       },
+                       end: function( tag ) {
+                               elems.length -= 1;
+                               
+                               // Init the new parentNode
+                               curParentNode = elems[ elems.length - 1 ];
+                       },
+                       chars: function( text ) {
+                               curParentNode.appendChild( doc.createTextNode( text ) );
+                       },
+                       comment: function( text ) {
+                               // create comment node
+                       }
+               });
+               
+               return doc;
+       };
+
+       function makeMap(str){
+               var obj = {}, items = str.split(",");
+               for ( var i = 0; i < items.length; i++ )
+                       obj[ items[i] ] = true;
+               return obj;
+       }
+})();
\ No newline at end of file
diff --git a/master.css b/master.css
new file mode 100644 (file)
index 0000000..bd5bfc9
--- /dev/null
@@ -0,0 +1,16 @@
+body { font-family: "Cambria", Georgia, Times, "Times New Roman", serif; }
+textarea { height: 20em; }
+h1 { margin-top: 0.5em; font-size: 1.25em; }
+
+#wrapper { overflow: hidden; min-width: 900px; }
+#input { width: 65%; margin-bottom: 1em; }
+#output { width: 100%; height: 18em; }
+#options { float: right; width: 33%; padding-left: 1em; }
+#options ul { list-style: none; padding: 0.5em; overflow: hidden; background: #ffe; margin-top: 0; }
+#options ul li { float: left; clear: both; padding-bottom: 0.5em; }
+#options label, #options input { float: left; }
+#options input { margin-right: 0.5em; }
+
+.success { color: green; }
+.failure { color: red; }
+.quiet { font-size: 0.85em; color: #888; }
\ No newline at end of file
diff --git a/master.js b/master.js
new file mode 100644 (file)
index 0000000..d6a1ae3
--- /dev/null
+++ b/master.js
@@ -0,0 +1,130 @@
+(function(){
+  
+  function byId(id) {
+    return document.getElementById(id);
+  }
+  
+  var options = {
+    shouldRemoveComments: function(){
+      return byId('remove-comments').checked;
+    },
+    shouldCollapseWhitespace: function(){ 
+      return byId('collapse-whitespace').checked;
+    },
+    shouldCollapseBooleanAttributes: function(){ 
+      return byId('collapse-boolean-attributes').checked;
+    },
+    shouldRemoveAttributeQuotes: function(){ 
+      return byId('remove-attribute-quotes').checked;
+    },
+    shouldRemoveRedundantAttributes: function(){
+      return byId('remove-redundant-attributes').checked;
+    },
+    shouldUseShortDoctype: function() {
+      return byId('use-short-doctype').checked;
+    }
+  };
+  
+  function escapeHTML(str) {
+    return String(str).replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
+  }
+  function collapseWhitespace(str) {
+    return str.replace(/^\s+/, ' ').replace(/\s+$/, ' ');
+  }
+  function canRemoveAttributeQuotes(value) {
+    return /^[\w-]+$/.test(value);
+  }
+  function isAttributeRedundant(tag, attributeName, attributeValue) {
+    return (
+        (tag === 'script' && 
+        attributeName === 'language' && 
+        /^javascript$/i.test(attributeValue)) ||
+        
+        (tag === 'form' && 
+        attributeName === 'method' && 
+        /^get$/i.test(attributeValue)) ||
+        
+        (tag === 'input' && 
+        attributeName === 'type' && 
+        /^text$/i.test(attributeValue))
+    );
+  }
+  function isBooleanAttribute(attributeName) {
+    return /^(?:checked|disabled|selected|readonly)$/.test(attributeName);
+  }
+  
+  function minify(value) {
+    var results = '';
+    HTMLParser(value, {
+      start: function( tag, attrs, unary ) {
+        results += '<' + tag;
+        for ( var i = 0, len = attrs.length; i < len; i++ ) {
+          
+          if (options.shouldRemoveRedundantAttributes() &&
+              isAttributeRedundant(tag, attrs[i].name, attrs[i].value)) {
+            continue;
+          }
+          
+          var attributeValue;
+          if (options.shouldRemoveAttributeQuotes() && 
+              canRemoveAttributeQuotes(attrs[i].escaped)) {
+            attributeValue = attrs[i].escaped;
+          }
+          else {
+            attributeValue = '"' + attrs[i].escaped + '"';
+          }
+          
+          var attributeFragment;
+          if (options.shouldCollapseBooleanAttributes() && 
+              isBooleanAttribute(attrs[i].name)) {
+            attributeFragment = attrs[i].name;
+          }
+          else {
+            attributeFragment = attrs[i].name + '=' + attributeValue;
+          }
+          results += ' ' + attributeFragment;
+        }
+        results += '>';
+      },
+      end: function( tag ) {
+        results += '</' + tag + '>';
+      },
+      chars: function( text ) {
+        if (options.shouldCollapseWhitespace()) {
+          results += collapseWhitespace(text);
+        }
+        else {
+          results += text;
+        }
+      },
+      comment: function( text ) {
+        results += (options.shouldRemoveComments() ? '' : ('<!--' + text + '-->'));
+      },
+      doctype: function(doctype) {
+        results += (options.shouldUseShortDoctype() ? '<!DOCTYPE html>' : doctype);
+      }
+    });
+    return results;
+  }
+  
+  byId('convert-btn').onclick = function() {
+    try {
+      var originalValue = byId('input').value,
+          minifiedValue = minify(originalValue),
+          diff = originalValue.length - minifiedValue.length;
+
+      byId('output').value = minifiedValue;
+
+      byId('stats').innerHTML = '<span class="success">' +
+        'Original size: <strong>' + originalValue.length + '</strong>' +
+        '. Minified size: <strong>' + minifiedValue.length + '</strong>' +
+        '. Savings: <strong>' + ((100 * diff) / originalValue.length).toFixed(2) + '</strong>%.' +
+      '</span>';
+    }
+    catch(err) {
+      byId('output').value = '';
+      byId('stats').innerHTML = '<span class="failure">' + escapeHTML(err) + '</span>';
+    }
+  };
+  
+})();
\ No newline at end of file