From b38600d8f15e39e458c7bb871ae29cb52eecb2d1 Mon Sep 17 00:00:00 2001 From: kangax Date: Sat, 11 May 2013 15:05:42 -0400 Subject: [PATCH] Make parser ignore block elements in inline elements (allowed in HTML5, not HTML4). Closes #51. Closes #52. Closes #54. --- dist/all.js | 87 ++++++++++++++++++++++------------------------- src/htmlparser.js | 87 ++++++++++++++++++++++------------------------- tests/minifier.js | 6 ++-- 3 files changed, 86 insertions(+), 94 deletions(-) diff --git a/dist/all.js b/dist/all.js index f0cc986..7024f20 100644 --- a/dist/all.js +++ b/dist/all.js @@ -31,7 +31,7 @@ endTag = /^<\/([\w:-]+)[^>]*>/, attr = /([\w:-]+)(?:\s*=\s*(?:(?:"((?:\\.|[^"])*)")|(?:'((?:\\.|[^'])*)')|([^>\s]+)))?/g, doctype = /^]+>/i; - + // Empty Elements - HTML 4.01 var empty = makeMap("area,base,basefont,br,col,frame,hr,img,input,isindex,link,meta,param,embed"); @@ -50,7 +50,7 @@ // Special Elements (can contain anything) var special = makeMap("script,style"); - + var reCache = { }, stackedTag, re; var HTMLParser = global.HTMLParser = function( html, handler ) { @@ -68,7 +68,7 @@ // Comment if ( html.indexOf(""); - + if ( index >= 0 ) { if ( handler.comment ) handler.comment( html.substring( 4, index ) ); @@ -81,22 +81,22 @@ handler.doctype( match[0] ); html = html.substring( match[0].length ); chars = false; - + // end tag } else if ( html.indexOf("]*>", "i")); - + html = html.replace(reStackedTag, function(all, text) { if (stackedTag !== 'script' && stackedTag !== 'style') { text = text @@ -154,16 +154,11 @@ throw "Parse Error: " + html; last = html; } - + // Clean up any remaining tags parseEndTag(); function parseStartTag( tag, tagName, rest, unary ) { - if ( block[ tagName ] ) { - while ( stack.last() && inline[ stack.last() ] ) { - parseEndTag( "", stack.last() ); - } - } if ( closeSelf[ tagName ] && stack.last() == tagName ) { parseEndTag( "", tagName ); @@ -173,10 +168,10 @@ if ( !unary ) stack.push( tagName ); - + if ( handler.start ) { var attrs = []; - + rest.replace(attr, function(match, name) { var value = arguments[2] ? arguments[2] : arguments[3] ? arguments[3] : @@ -188,7 +183,7 @@ escaped: value.replace(/(^|[^\\])"/g, '$1\\\"') //" }); }); - + if ( handler.start ) handler.start( tagName, attrs, unary ); } @@ -198,35 +193,35 @@ // If no tag name is provided, clean shop if ( !tagName ) var pos = 0; - + // Find the closest opened tag of the same type else for ( var pos = stack.length - 1; pos >= 0; pos-- ) if ( stack[ pos ] == tagName ) break; - + if ( pos >= 0 ) { // Close all the open elements, up the stack for ( var i = stack.length - 1; i >= pos; i-- ) if ( handler.end ) handler.end( stack[ i ] ); - + // Remove the open elements from the stack stack.length = pos; } } }; - + global.HTMLtoXML = function( html ) { var results = ""; - + HTMLParser(html, { start: function( tag, attrs, unary ) { results += "<" + tag; - + for ( var i = 0; i < attrs.length; i++ ) results += " " + attrs[i].name + '="' + attrs[i].escaped + '"'; - + results += (unary ? "/" : "") + ">"; }, end: function( tag ) { @@ -239,20 +234,20 @@ results += ""; } }); - + return results; }; - + global.HTMLtoDOM = function( html, doc ) { // There can be only one of these elements var one = makeMap("html,head,body,title"); - + // Enforce a structure for the document var structure = { link: "head", base: "head" }; - + if ( !doc ) { if ( typeof DOMDocument != "undefined" ) doc = new DOMDocument(); @@ -260,16 +255,16 @@ doc = document.implementation.createDocument("", "", null); else if ( typeof ActiveX != "undefined" ) doc = new ActiveXObject("Msxml.DOMDocument"); - + } else doc = doc.ownerDocument || doc.getOwnerDocument && doc.getOwnerDocument() || doc; - + var elems = [], documentElement = doc.documentElement || doc.getDocumentElement && doc.getDocumentElement(); - + // If we're dealing with an empty document then we // need to pre-populate it with the HTML document structure if ( !documentElement && doc.createElement ) (function(){ @@ -280,16 +275,16 @@ html.appendChild( doc.createElement("body") ); doc.appendChild( html ); })(); - + // Find all the unique elements if ( doc.getElementsByTagName ) for ( var i in one ) one[ i ] = doc.getElementsByTagName( i )[0]; - + // If we're working with a document, inject contents into // the body element var curParentNode = one.body; - + HTMLParser( html, { start: function( tagName, attrs, unary ) { // If it's a pre-built element, then we can ignore @@ -298,18 +293,18 @@ curParentNode = one[ tagName ]; return; } - + var elem = doc.createElement( tagName ); - + for ( var attr in attrs ) elem.setAttribute( attrs[ attr ].name, attrs[ attr ].value ); - + if ( structure[ tagName ] && typeof one[ structure[ tagName ] ] != "boolean" ) one[ structure[ tagName ] ].appendChild( elem ); - + else if ( curParentNode && curParentNode.appendChild ) curParentNode.appendChild( elem ); - + if ( !unary ) { elems.push( elem ); curParentNode = elem; @@ -317,7 +312,7 @@ }, end: function( tag ) { elems.length -= 1; - + // Init the new parentNode curParentNode = elems[ elems.length - 1 ]; }, @@ -328,7 +323,7 @@ // create comment node } }); - + return doc; }; diff --git a/src/htmlparser.js b/src/htmlparser.js index 69803b4..d64c07e 100644 --- a/src/htmlparser.js +++ b/src/htmlparser.js @@ -31,7 +31,7 @@ endTag = /^<\/([\w:-]+)[^>]*>/, attr = /([\w:-]+)(?:\s*=\s*(?:(?:"((?:\\.|[^"])*)")|(?:'((?:\\.|[^'])*)')|([^>\s]+)))?/g, doctype = /^]+>/i; - + // Empty Elements - HTML 4.01 var empty = makeMap("area,base,basefont,br,col,frame,hr,img,input,isindex,link,meta,param,embed"); @@ -50,7 +50,7 @@ // Special Elements (can contain anything) var special = makeMap("script,style"); - + var reCache = { }, stackedTag, re; var HTMLParser = global.HTMLParser = function( html, handler ) { @@ -68,7 +68,7 @@ // Comment if ( html.indexOf(""); - + if ( index >= 0 ) { if ( handler.comment ) handler.comment( html.substring( 4, index ) ); @@ -81,22 +81,22 @@ handler.doctype( match[0] ); html = html.substring( match[0].length ); chars = false; - + // end tag } else if ( html.indexOf("]*>", "i")); - + html = html.replace(reStackedTag, function(all, text) { if (stackedTag !== 'script' && stackedTag !== 'style') { text = text @@ -154,16 +154,11 @@ throw "Parse Error: " + html; last = html; } - + // Clean up any remaining tags parseEndTag(); function parseStartTag( tag, tagName, rest, unary ) { - if ( block[ tagName ] ) { - while ( stack.last() && inline[ stack.last() ] ) { - parseEndTag( "", stack.last() ); - } - } if ( closeSelf[ tagName ] && stack.last() == tagName ) { parseEndTag( "", tagName ); @@ -173,10 +168,10 @@ if ( !unary ) stack.push( tagName ); - + if ( handler.start ) { var attrs = []; - + rest.replace(attr, function(match, name) { var value = arguments[2] ? arguments[2] : arguments[3] ? arguments[3] : @@ -188,7 +183,7 @@ escaped: value.replace(/(^|[^\\])"/g, '$1\\\"') //" }); }); - + if ( handler.start ) handler.start( tagName, attrs, unary ); } @@ -198,35 +193,35 @@ // If no tag name is provided, clean shop if ( !tagName ) var pos = 0; - + // Find the closest opened tag of the same type else for ( var pos = stack.length - 1; pos >= 0; pos-- ) if ( stack[ pos ] == tagName ) break; - + if ( pos >= 0 ) { // Close all the open elements, up the stack for ( var i = stack.length - 1; i >= pos; i-- ) if ( handler.end ) handler.end( stack[ i ] ); - + // Remove the open elements from the stack stack.length = pos; } } }; - + global.HTMLtoXML = function( html ) { var results = ""; - + HTMLParser(html, { start: function( tag, attrs, unary ) { results += "<" + tag; - + for ( var i = 0; i < attrs.length; i++ ) results += " " + attrs[i].name + '="' + attrs[i].escaped + '"'; - + results += (unary ? "/" : "") + ">"; }, end: function( tag ) { @@ -239,20 +234,20 @@ results += ""; } }); - + return results; }; - + global.HTMLtoDOM = function( html, doc ) { // There can be only one of these elements var one = makeMap("html,head,body,title"); - + // Enforce a structure for the document var structure = { link: "head", base: "head" }; - + if ( !doc ) { if ( typeof DOMDocument != "undefined" ) doc = new DOMDocument(); @@ -260,16 +255,16 @@ doc = document.implementation.createDocument("", "", null); else if ( typeof ActiveX != "undefined" ) doc = new ActiveXObject("Msxml.DOMDocument"); - + } else doc = doc.ownerDocument || doc.getOwnerDocument && doc.getOwnerDocument() || doc; - + var elems = [], documentElement = doc.documentElement || doc.getDocumentElement && doc.getDocumentElement(); - + // If we're dealing with an empty document then we // need to pre-populate it with the HTML document structure if ( !documentElement && doc.createElement ) (function(){ @@ -280,16 +275,16 @@ html.appendChild( doc.createElement("body") ); doc.appendChild( html ); })(); - + // Find all the unique elements if ( doc.getElementsByTagName ) for ( var i in one ) one[ i ] = doc.getElementsByTagName( i )[0]; - + // If we're working with a document, inject contents into // the body element var curParentNode = one.body; - + HTMLParser( html, { start: function( tagName, attrs, unary ) { // If it's a pre-built element, then we can ignore @@ -298,18 +293,18 @@ curParentNode = one[ tagName ]; return; } - + var elem = doc.createElement( tagName ); - + for ( var attr in attrs ) elem.setAttribute( attrs[ attr ].name, attrs[ attr ].value ); - + if ( structure[ tagName ] && typeof one[ structure[ tagName ] ] != "boolean" ) one[ structure[ tagName ] ].appendChild( elem ); - + else if ( curParentNode && curParentNode.appendChild ) curParentNode.appendChild( elem ); - + if ( !unary ) { elems.push( elem ); curParentNode = elem; @@ -317,7 +312,7 @@ }, end: function( tag ) { elems.length -= 1; - + // Init the new parentNode curParentNode = elems[ elems.length - 1 ]; }, @@ -328,7 +323,7 @@ // create comment node } }); - + return doc; }; diff --git a/tests/minifier.js b/tests/minifier.js index a56a686..c434b56 100644 --- a/tests/minifier.js +++ b/tests/minifier.js @@ -24,7 +24,9 @@ equal(minify('foo'), 'foo'); equal(minify('

x'), '

x

'); equal(minify('

x

'), '

x

', 'trailing quote should be ignored'); - + equal(minify('

Click me

'), '

Click me

'); + equal(minify(''), ''); + equal(minify(''), ''); equal(minify(''), ''); equal(minify('
'), '
'); @@ -440,7 +442,7 @@ input = '

foo bar

'; output = '

foo bar

'; equal(minify(input, { collapseWhitespace: true }), output); - + input = '

foo blah 22 bar

'; output = '

foo blah 22 bar

'; equal(minify(input, { collapseWhitespace: true }), output); -- 2.34.1