Extract LINT logic into a separate file -- htmllint.js, now also under a standalone...
authorJuriy Zaytsev <kangax@gmail.com>
Fri, 12 Feb 2010 21:28:20 +0000 (16:28 -0500)
committerJuriy Zaytsev <kangax@gmail.com>
Fri, 12 Feb 2010 21:28:20 +0000 (16:28 -0500)
index.html
master.css
master.js
src/htmllint.js [new file with mode: 0644]
src/htmlminifier.js

index c25676b..0569e5d 100644 (file)
   <head>
     <meta http-equiv="Content-type" content="text/html; charset=utf-8">
     <title>HTML minifier</title>
+    
     <script src="src/htmlparser.js" type="text/javascript"></script>
     <script src="src/htmlminifier.js" type="text/javascript"></script>
+    <script src="src/htmllint.js" type="text/javascript"></script>
+    
     <link rel="stylesheet" href="master.css" type="text/css">
   </head>
   <body>
     <div>
-      <div id="wrapper">
-        <h1>HTML Minifier <span style="font-size:0.6em">(ver. 0.3)</span></h1>
-        <textarea rows="8" cols="40" id="input"></textarea>
-        <p style="width:65%">
-          <button type="button" id="convert-btn">Convert</button>
-        </p>
-        <textarea rows="8" cols="40" id="output" readonly></textarea>
-      </div>
-      <div id="options">
-        <ul>
-          <li>
-            <input type="checkbox" id="remove-comments" checked>
-            <label for="remove-comments">Remove comments (</label>
-            <input type="checkbox" id="remove-comments-from-cdata" checked>
-            <label for="remove-comments-from-cdata">also from scripts and styles )</label>
-            <span class="quiet short" style="margin-left:1.5em">
-              Conditional comments are left intact.
-            </span>
-          </li>
-          <li>
-            <input type="checkbox" id="remove-cdata-sections-from-cdata" checked>
-            <label for="remove-cdata-sections-from-cdata">Remove CDATA sections from scripts and styles</label>
-          </li>
-          <li>
-            <input type="checkbox" id="collapse-whitespace" checked>
-            <label for="collapse-whitespace">Collapse whitespace</label>
-          </li>
-          <li>
-            <input type="checkbox" id="collapse-boolean-attributes" checked>
-            <label for="collapse-boolean-attributes">
-              Collapse boolean attributes
-              <br>
-              <span class="quiet short">
-                (e.g. <code>&lt;... disabled="disabled"&gt; &rarr; &lt;... disabled&gt;</code>)
-              </span>
-            </label>
-          </li>
-          <li>
-            <input type="checkbox" id="remove-attribute-quotes" checked>
-            <label for="remove-attribute-quotes">
-              Remove attribute quotes
-              <br>
-              <span class="quiet">
-                (e.g. <code>&lt;p class="foo"&gt; &rarr; &lt;p class=foo&gt;</code>)
-              </span>
-            </label>
-          </li>
-          <li>
-            <input type="checkbox" id="remove-redundant-attributes" checked>
-            <label for="remove-redundant-attributes">Remove redundant attributes/values</label>
-            <div class="quiet">
-              <code>&lt;script language="Javascript" ...&gt;</code><br>
-              <code>&lt;form method="get" ...&gt;</code><br>
-              <code>&lt;input type="text" ...&gt;</code><br>
-              <code>&lt;script src="..." charset="..."&gt;</code><br>
-              <code>&lt;a id="..." name="..."&gt;</code><br>
-              <code>&lt;... onclick="javascript:..." ...&gt;</code>
-            </div>
-          </li>
-          <li>
-            <input type="checkbox" id="use-short-doctype" checked>
-            <label for="use-short-doctype" title="i.e. <!DOCTYPE html>">
-              Use short doctype
-            </label>
-          </li>
-          <li>
-            <input type="checkbox" id="remove-empty-attributes" checked>
-            <label for="remove-empty-attributes">
-              Remove empty (or blank) attributes
-              <br>
-              <span class="quiet short">
-                Valid attributes are: class, id, style, title, lang, dir, event attributes
-              </span>
-            </label>
-          </li>
-          <li>
-            <input type="checkbox" id="remove-optional-tags" checked>
-            <label for="remove-optional-tags">
-              Remove optional tags
-              <br>
-              <span class="quiet short">
-                Currently, only <code>&lt;/html></code> and <code>&lt;/body></code>
+      <div id="outer-wrapper">
+        <div id="wrapper">
+          <h1>HTML Minifier <span style="font-size:0.6em">(ver. 0.3)</span></h1>
+          <p id="warning">
+            Minifier is <strong>very draft</strong> and is <strong>not yet thoroughly tested</strong>. Use at your own risk. 
+          </p>
+          <textarea rows="8" cols="40" id="input"></textarea>
+          <p style="width:65%">
+            <button type="button" id="convert-btn">Convert</button>
+          </p>
+          <textarea rows="8" cols="40" id="output" readonly></textarea>
+        </div>
+        <div id="options">
+          <ul>
+            <li>
+              <input type="checkbox" id="remove-comments" checked>
+              <label for="remove-comments">Remove comments (</label>
+              <input type="checkbox" id="remove-comments-from-cdata" checked>
+              <label for="remove-comments-from-cdata">also from scripts and styles )</label>
+              <span class="quiet short" style="margin-left:1.5em">
+                Conditional comments are left intact.
               </span>
-            </label>
-          </li>
-          <li>
-            <input type="checkbox" id="remove-empty-elements">
-            <label for="remove-empty-elements">
-              Remove empty elements
-              <br>
-              <span class="quiet short">
-                All except <code>textarea</code>
-              </span>
-            </label>
-          </li>
-        </ul>
+            </li>
+            <li>
+              <input type="checkbox" id="remove-cdata-sections-from-cdata" checked>
+              <label for="remove-cdata-sections-from-cdata">Remove CDATA sections from scripts and styles</label>
+            </li>
+            <li>
+              <input type="checkbox" id="collapse-whitespace" checked>
+              <label for="collapse-whitespace">Collapse whitespace</label>
+            </li>
+            <li>
+              <input type="checkbox" id="collapse-boolean-attributes" checked>
+              <label for="collapse-boolean-attributes">
+                Collapse boolean attributes
+                <br>
+                <span class="quiet short">
+                  (e.g. <code>&lt;... disabled="disabled"&gt; &rarr; &lt;... disabled&gt;</code>)
+                </span>
+              </label>
+            </li>
+            <li>
+              <input type="checkbox" id="remove-attribute-quotes" checked>
+              <label for="remove-attribute-quotes">
+                Remove attribute quotes
+                <br>
+                <span class="quiet">
+                  (e.g. <code>&lt;p class="foo"&gt; &rarr; &lt;p class=foo&gt;</code>)
+                </span>
+              </label>
+            </li>
+            <li>
+              <input type="checkbox" id="remove-redundant-attributes" checked>
+              <label for="remove-redundant-attributes">Remove redundant attributes/values</label>
+              <div class="quiet">
+                <code>&lt;script language="Javascript" ...&gt;</code><br>
+                <code>&lt;form method="get" ...&gt;</code><br>
+                <code>&lt;input type="text" ...&gt;</code><br>
+                <code>&lt;script src="..." charset="..."&gt;</code><br>
+                <code>&lt;a id="..." name="..."&gt;</code><br>
+                <code>&lt;... onclick="javascript:..." ...&gt;</code>
+              </div>
+            </li>
+            <li>
+              <input type="checkbox" id="use-short-doctype" checked>
+              <label for="use-short-doctype" title="i.e. <!DOCTYPE html>">
+                Use short doctype
+              </label>
+            </li>
+            <li>
+              <input type="checkbox" id="remove-empty-attributes" checked>
+              <label for="remove-empty-attributes">
+                Remove empty (or blank) attributes
+                <br>
+                <span class="quiet short">
+                  Valid attributes are: class, id, style, title, lang, dir, event attributes
+                </span>
+              </label>
+            </li>
+            <li>
+              <input type="checkbox" id="remove-optional-tags" checked>
+              <label for="remove-optional-tags">
+                Remove optional tags
+                <br>
+                <span class="quiet short">
+                  Currently, only <code>&lt;/html></code> and <code>&lt;/body></code>
+                </span>
+              </label>
+            </li>
+            <li>
+              <input type="checkbox" id="remove-empty-elements">
+              <label for="remove-empty-elements">
+                Remove empty elements
+                <br>
+                <span class="quiet short">
+                  All except <code>textarea</code>
+                </span>
+              </label>
+            </li>
+            <li>
+              <input type="checkbox" id="use-htmllint" checked>
+              <label for="use-htmllint">
+                Validate input through HTML lint
+              </label>
+            </li>
+          </ul>
+        </div>
       </div>
-      
       <p id="stats"></p>
-      <p id="warning">
-        Minifier is <strong>very draft</strong> and is <strong>not yet thoroughly tested</strong>. Use at your own risk. 
-      </p>
+      <div id="lint-report">
+        LINT REPORT:
+        <div id="report"></div>
+      </div>
       <div id="todo">
         TODO:
         <ul>
           <li>Detect repeating attributes (e.g. multiple styles, classes, etc.)</li>
           <li>Strip whitespace from attributes where allowed</li>
-          <li>Report deprecated (or presentational) attributes (e.g.: <code>&lt;td width="..." height="..."></code>)</li>
           <li>Add option to collapse all whitespace to 1 character, instead of completely removing it (to preserve empty text nodes)</li>
           <li>Figure out when it is safe to remove optional closing tags, so that it doesn't affect document tree</li>
           <li>Support IE "Downlevel-revealed Conditional Comments"</li>
         </ul>
       </div>
       <p class="quiet" style="font-style:italic;">
-        Minifier is made by <a href="http://perfectionkills.com/">kangax</a>, 
+        HTMLMinifier is made by <a href="http://perfectionkills.com/">kangax</a>, 
         using tweaked version of HTML parser by <a href="http://ejohn.org/">John Resig</a> 
         (which, in its turn, is based on work of <a href="http://erik.eae.net/">Erik Arvidsson</a>).
       </p>
index 09f79f8..e7e6ee1 100644 (file)
@@ -1,22 +1,29 @@
-body { font-family: "Cambria", Georgia, Times, "Times New Roman", serif; }
+body { font-family: "Cambria", Georgia, Times, "Times New Roman", serif; margin-top: 0; padding-top: 0; }
 textarea { height: 30em; }
 h1 { margin-top: 0.5em; font-size: 1.25em; }
 button { font-weight: bold; width: 100px; }
 
-#wrapper { overflow: hidden; width: 65%; float: left; }
+#outer-wrapper { overflow: hidden; }
+#wrapper { width: 65%; float: left; }
 #input { width: 99%; height: 18em; }
 #output { width: 99%; height: 18em; margin-bottom: 2em; }
-#options { float: right; width: 33%; padding-left: 1em; margin-top: 2.5em; min-height: 50em; }
+#options { float: right; width: 33%; padding-left: 1em; margin-top: 3em; }
 #options ul { list-style: none; padding: 0.5em; overflow: hidden; background: #ffe; margin-top: 0; }
 #options ul li { float: left; clear: both; padding-bottom: 0.5em; }
 #options ul li div { margin-left: 1.75em; }
 #options label, #options input { float: left; }
 #options input { margin-right: 0.5em; }
-#stats { margin-bottom: 2em; }
+#stats { margin-bottom: 2em; overflow: hidden; margin-top: 0; }
 #todo { font-family: monospace; margin-bottom: 2em; }
-#warning { background: #fcc; padding: 0.25em; display: inline-block; }
+#warning { background: #fcc; padding: 0.25em; display: inline-block; margin-top: 0; font-size: 0.85em; }
+#lint-report { font-family: monospace; }
+#report { margin-bottom: 5em; }
+#report ul { margin: 0.5em; padding-left: 1em; list-style: none; }
 
 .success { color: green; }
 .failure { color: red; }
 .quiet { font-size: 0.85em; color: #888; }
-.short { display: inline-block; width: 20em; margin-top: 0.25em; }
\ No newline at end of file
+.short { display: inline-block; width: 20em; margin-top: 0.25em; }
+
+.deprecated-element, .deprecated-attribute { color: red; }
+.presentational-element, .presentational-attribute { color: #FF8C00; }
\ No newline at end of file
index e020834..30dcde6 100644 (file)
--- a/master.js
+++ b/master.js
@@ -21,6 +21,7 @@
       removeEmptyAttributes:        byId('remove-empty-attributes').checked,
       removeEmptyElements:          byId('remove-empty-elements').checked,
       removeOptionalTags:           byId('remove-optional-tags').checked,
+      lint:                         byId('use-htmllint').checked ? new HTMLLint() : null
     };
   }
   
   
   byId('convert-btn').onclick = function() {
     try {
-      var originalValue = byId('input').value,
-          minifiedValue = minify(originalValue, getOptions()),
+      var options = getOptions(),
+          lint = options.lint,
+          originalValue = byId('input').value,
+          minifiedValue = minify(originalValue, options),
           diff = originalValue.length - minifiedValue.length,
           savings = originalValue.length ? ((100 * diff) / originalValue.length).toFixed(2) : 0;
 
           '. Minified size: <strong>' + commify(minifiedValue.length) + '</strong>' +
           '. Savings: <strong>' + commify(diff) + ' (' + savings + '%)</strong>.' +
         '</span>';
+      
+      if (lint) {
+        lint.populate(byId('report'));
+      }
     }
     catch(err) {
       byId('output').value = '';
diff --git a/src/htmllint.js b/src/htmllint.js
new file mode 100644 (file)
index 0000000..35416fe
--- /dev/null
@@ -0,0 +1,102 @@
+/*!
+ * HTMLLint (to be used in conjunction with HTMLMinifier)
+ *
+ * Copyright (c) 2010 Juriy "kangax" Zaytsev
+ * Licensed under the MIT license.
+ *
+ */
+(function(global){
+  
+  function isPresentationalElement(tag) {
+    return (/^(?:b|i|big|small|hr|blink|marquee)$/).test(tag);
+  }  
+  function isDeprecatedElement(tag) {
+    return (/^(?:applet|basefont|center|dir|font|isindex|menu|s|strike|u)$/).test(tag);
+  }
+  function isEventAttribute(attrName) {
+    return (/^on[a-z]+/).test(attrName);
+  }
+  function isDeprecatedAttribute(tag, attrName) {
+    return (
+      (attrName === 'align' && 
+      (/^(?:caption|applet|iframe|img|imput|object|legend|table|hr|div|h[1-6]|p)$/).test(tag)) ||
+      (attrName === 'alink' && tag === 'body') ||
+      (attrName === 'alt' && tag === 'applet') ||
+      (attrName === 'archive' && tag === 'applet') ||
+      (attrName === 'background' && tag === 'body') ||
+      (attrName === 'bgcolor' && (/^(?:table|t[rdh]|body)$/).test(tag)) ||
+      (attrName === 'border' && (/^(?:img|object)$/).test(tag)) ||
+      (attrName === 'clear' && tag === 'br') ||
+      (attrName === 'code' && tag === 'applet') ||
+      (attrName === 'codebase' && tag === 'applet') ||
+      (attrName === 'color' && (/^(?:base(?:font)?)$/).test(tag)) ||
+      (attrName === 'compact' && (/^(?:dir|[dou]l|menu)$/).test(tag)) ||
+      (attrName === 'face' && (/^base(?:font)?$/).test(tag)) ||
+      (attrName === 'height' && (/^(?:t[dh]|applet)$/).test(tag)) ||
+      (attrName === 'hspace' && (/^(?:applet|img|object)$/).test(tag)) ||
+      (attrName === 'language' && tag === 'script') ||
+      (attrName === 'link' && tag === 'body') ||
+      (attrName === 'name' && tag === 'applet') ||
+      (attrName === 'noshade' && tag === 'hr') ||
+      (attrName === 'nowrap' && (/^t[dh]$/).test(tag)) ||
+      (attrName === 'object' && tag === 'applet') ||
+      (attrName === 'prompt' && tag === 'isindex') ||
+      (attrName === 'size' && (/^(?:hr|font|basefont)$/).test(tag)) ||
+      (attrName === 'start' && tag === 'ol') ||
+      (attrName === 'text' && tag === 'body') ||
+      (attrName === 'type' && (/^(?:li|ol|ul)$/).test(tag)) ||
+      (attrName === 'value' && tag === 'li') ||
+      (attrName === 'version' && tag === 'html') ||
+      (attrName === 'vlink' && tag === 'body') ||
+      (attrName === 'vspace' && (/^(?:applet|img|object)$/).test(tag)) ||
+      (attrName === 'width' && (/^(?:hr|td|th|applet|pre)$/).test(tag))
+    );
+  }
+  
+  function Lint() {
+    this.log = [ ];
+  }
+  
+  Lint.prototype._testElement = function(tag, attrName) {
+    if (isDeprecatedElement(tag)) {
+      this.log.push(
+        '<li>Warning: found <span class="deprecated-element">deprecated element</span> (<strong>', 
+        tag, '</strong>)</li>');
+    }
+    else if (isPresentationalElement(tag)) {
+      this.log.push(
+        '<li>Warning: found <span class="presentational-element">presentational element</span> (<strong>', 
+        tag, '</strong>)</li>');
+    }
+  };
+  
+  Lint.prototype._testAttribute = function(tag, attrName) {
+    if (isEventAttribute(attrName)) {
+      this.log.push(
+        '<li>Warning: found <span class="event-attribute">event attribute</span> (<strong>', 
+        attrName, '</strong>)</li>');
+    }
+    else if (isDeprecatedAttribute(tag, attrName)) {
+      this.log.push(
+        '<li>Warning: found <span class="deprecated-attribute">deprecated attribute</span> (<strong>', 
+        attrName, '</strong> on ', tag, ' element)</li>');
+    }
+  };
+  
+  Lint.prototype.test = function(tag, attrName) {
+    this._testElement(tag, attrName);
+    this._testAttribute(tag, attrName);
+  };
+  
+  Lint.prototype.populate = function(writeToElement) {
+    var report;
+    if (this.log.length && writeToElement) {
+      report = '<ul>' + this.log.join('') + '</ul>';
+      writeToElement.innerHTML = report;
+    }
+  };
+  
+  global.HTMLLint = Lint;
+  
+})(this);
\ No newline at end of file
index ce57761..5efeec4 100644 (file)
@@ -1,5 +1,5 @@
 /*!
- * HTML Minifier v0.3
+ * HTMLMinifier v0.3
  * http://kangax.github.com/html-minifier/
  *
  * Copyright (c) 2010 Juriy "kangax" Zaytsev
     return (/\[if[^\]]+\]/).test(text);
   }
   
+  function isEventAttribute(attrName) {
+    return (/^on[a-z]+/).test(attrName);
+  }
+  
   function canRemoveAttributeQuotes(value) {
     // http://www.w3.org/TR/html4/intro/sgmltut.html#attributes
     // avoid \w, which could match unicode in some implementations
   }
   
   function cleanAttributeValue(tag, attrName, attrValue) {
-    if (/^on[a-z]+/.test(attrName)) {
+    if (isEventAttribute(attrName)) {
       return trimWhitespace(attrValue.replace(/^\s*javascript:\s*/i, ''));
     }
     if (attrName === 'class') {
-      // trim and collapse whitesapce
       return collapseWhitespace(trimWhitespace(attrValue));
     }
     return attrValue;
         buffer = [ ],
         currentChars = '',
         currentTag = '',
-        t = new Date();
+        lint = options.lint,
+        t = new Date()
     
     HTMLParser(value, {
       start: function( tag, attrs, unary ) {
         buffer.push('<', tag);
         
         for ( var i = 0, len = attrs.length; i < len; i++ ) {
+          lint && lint.test(tag, attrs[i].name.toLowerCase());
           buffer.push(normalizeAttribute(attrs[i], attrs, tag, options));
         }
         
       }
     });  
     
-    results.push.apply(results, buffer);
-    
+    results.push.apply(results, buffer)    
     var str = results.join('');
-
     log('minified in: ' + (new Date() - t) + 'ms');
-
     return str;
   }