From 10291f87b92406c68a284ce09b6153ff8af528b1 Mon Sep 17 00:00:00 2001 From: Juriy Zaytsev Date: Wed, 10 Feb 2010 12:34:24 -0500 Subject: [PATCH] Fix some of the bugs in element removal mechanism. Write more tests for it. --- src/htmlminifier.js | 55 ++++++++++++++++++++++++++++++++------------- tests/index.html | 44 ++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 16 deletions(-) diff --git a/src/htmlminifier.js b/src/htmlminifier.js index 788c6c3..c79ac00 100644 --- a/src/htmlminifier.js +++ b/src/htmlminifier.js @@ -14,6 +14,9 @@ function trimWhitespace(str) { return (str.trim ? str.trim() : str.replace(/^\s+/, '').replace(/\s+$/, '')); } + function collapseWhitespace(str) { + return str.replace(/\s+/g, ' '); + } function canRemoveAttributeQuotes(value) { // http://www.w3.org/TR/html4/intro/sgmltut.html#attributes @@ -61,11 +64,11 @@ function cleanAttributeValue(tag, attrName, attrValue) { if (/^on[a-z]+/.test(attrName)) { - return attrValue.replace(/^(['"])?javascript:/i, '$1'); + return attrValue.replace(/^\s*javascript:/i, ''); } - if (attrName.toLowerCase() === 'class') { + if (attrName === 'class') { // trim and collapse whitesapce - return attrValue.replace(/^(["'])?\s+/, '$1').replace(/\s+(["'])?$/, '$1').replace(/\s+/g, ' '); + return collapseWhitespace(trimWhitespace(attrValue)); } return attrValue; } @@ -85,11 +88,15 @@ return false; } + function canRemoveElement(tag) { + return tag !== 'textarea'; + } + function normalizeAttribute(attr, attrs, tag, options) { - var attrName = attr.name.toLowerCase(); - var attrValue = attr.escaped; - var attrFragment; + var attrName = attr.name.toLowerCase(), + attrValue = attr.escaped, + attrFragment; if (options.shouldRemoveRedundantAttributes && isAttributeRedundant(tag, attrName, attrValue, attrs)) { @@ -124,36 +131,52 @@ options = options || { }; value = trimWhitespace(value); - var results = []; - var t = new Date(); + var results = [ ], + buffer = [ ], + currentChars = '', + currentTag = '', + t = new Date(); HTMLParser(value, { start: function( tag, attrs, unary ) { tag = tag.toLowerCase(); + currentTag = tag; - results.push('<', tag); + buffer.push('<', tag); for ( var i = 0, len = attrs.length; i < len; i++ ) { - results.push(normalizeAttribute(attrs[i], attrs, tag, options)); + buffer.push(normalizeAttribute(attrs[i], attrs, tag, options)); } - results.push('>'); + buffer.push('>'); }, end: function( tag ) { - results.push(''); + var isElementEmpty = currentChars === '' && tag === currentTag; + if (options.shouldRemoveEmptyElements && isElementEmpty && canRemoveElement(tag)) { + // noop + } + else { + buffer.push(''); + results.push.apply(results, buffer); + } + buffer.length = 0; + currentChars = ''; }, chars: function( text ) { - results.push(options.shouldCollapseWhitespace ? trimWhitespace(text) : text); + currentChars = text; + buffer.push(options.shouldCollapseWhitespace ? trimWhitespace(text) : text); }, comment: function( text ) { - results.push(options.shouldRemoveComments ? '' : ('')); + buffer.push(options.shouldRemoveComments ? '' : ('')); }, doctype: function(doctype) { - results.push(options.shouldUseShortDoctype ? '' : doctype.replace(/\s+/g, ' ')); + buffer.push(options.shouldUseShortDoctype ? '' : collapseWhitespace(doctype)); } }); - + + results.push.apply(results, buffer); + var str = results.join(''); log('minified in: ' + (new Date() - t) + 'ms'); diff --git a/tests/index.html b/tests/index.html index 92b5e3a..be5fc4d 100644 --- a/tests/index.html +++ b/tests/index.html @@ -22,6 +22,24 @@ var minify = global.minify; + test('parsing non-trivial markup', function() { + equals(minify('

x

'), '

x

'); + equals(minify('

x

'), '

x

'); + equals(minify('

x

'), '

x

'); + equals(minify('

xxx

'), '

xxx

'); + equals(minify('

xxx

'), '

xxx

'); + + var input = '
'+ + 'i\'m 10 levels deep'+ + '
'; + + equals(minify(input), input); + + equals(minify(' + \ No newline at end of file -- 2.34.1