From 57b0fcabce916102ca4c079c308b704a0b3b4511 Mon Sep 17 00:00:00 2001 From: GoalSmashers Date: Sat, 16 Mar 2013 16:39:19 +0100 Subject: [PATCH] Fixes #46 - special characters in URLs and attributes. * Simplified preserving 'content' attribute's content. --- History.md | 6 ++ lib/clean.js | 218 ++++++++++++++++++++++++---------------------- test/unit-test.js | 10 ++- 3 files changed, 128 insertions(+), 106 deletions(-) diff --git a/History.md b/History.md index 1b32cdee..faefb1c6 100644 --- a/History.md +++ b/History.md @@ -1,3 +1,9 @@ +1.0 / 2013-xx-xx +================== + +* Fixed issue [#46](https://github.com/GoalSmashers/clean-css/issues/46) - preserving special characters in URLs + and attributes. + 0.10.1 / 2013-02-14 ================== diff --git a/lib/clean.js b/lib/clean.js index 25119390..51215fe4 100644 --- a/lib/clean.js +++ b/lib/clean.js @@ -31,7 +31,8 @@ var CleanCSS = { process: function(data, options) { var context = { specialComments: [], - contentBlocks: [] + freeTextBlocks: [], + urlBlocks: [] }; var replace = function() { if (typeof arguments[0] == 'function') @@ -74,12 +75,7 @@ var CleanCSS = { data = CleanCSS._stripComments(context, data); }); - // replace content: with a placeholder - replace(function stripContent() { - data = CleanCSS._stripContent(context, data); - }); - - // strip url's parentheses if possible (no spaces inside) + // strip parentheses in urls if possible (no spaces inside) replace(/url\(['"]([^\)]+)['"]\)/g, function(urlMatch) { if (urlMatch.match(/\s/g) !== null) return urlMatch; @@ -87,6 +83,47 @@ var CleanCSS = { return urlMatch.replace(/\(['"]/, '(').replace(/['"]\)$/, ')'); }); + // strip parentheses in animation & font names + replace(/(animation|animation\-name|font|font\-family):([^;}]+)/g, function(match, propertyName, fontDef) { + return propertyName + ':' + fontDef.replace(/['"]([\w\-]+)['"]/g, '$1'); + }); + + // strip parentheses in @keyframes + replace(/@(\-moz\-|\-o\-|\-webkit\-)?keyframes ([^{]+)/g, function(match, prefix, name) { + prefix = prefix || ''; + return '@' + prefix + 'keyframes ' + (name.indexOf(' ') > -1 ? name : name.replace(/['"]/g, '')); + }); + + // IE shorter filters, but only if single (IE 7 issue) + replace(/progid:DXImageTransform\.Microsoft\.(Alpha|Chroma)(\([^\)]+\))([;}'"])/g, function(match, filter, args, suffix) { + return filter.toLowerCase() + args + suffix; + }); + + // strip parentheses in attribute values + replace(/\[([^\]]+)\]/g, function(match, content) { + var eqIndex = content.indexOf('='); + if (eqIndex < 0 && content.indexOf('\'') < 0 && content.indexOf('"') < 0) + return match; + + var key = content.substring(0, eqIndex); + var value = content.substring(eqIndex + 1, content.length); + + if (/^['"](?:[a-zA-Z][a-zA-Z\d\-]+)['"]$/.test(value)) + return '[' + key + '=' + value.substring(1, value.length - 1) + ']'; + else + return match; + }); + + // replace all free text content with a placeholder + replace(function stripFreeText() { + data = CleanCSS._stripFreeText(context, data); + }); + + // replace url(...) with a placeholder + replace(function stripUrls() { + data = CleanCSS._stripUrls(context, data); + }); + // line breaks if (!options.keepBreaks) replace(/[\r]?\n/g, ' '); @@ -119,32 +156,6 @@ var CleanCSS = { // trailing semicolons replace(/;\}/g, '}'); - // strip quotation in animation & font names - replace(/(animation|animation\-name|font|font\-family):([^;}]+)/g, function(match, propertyName, fontDef) { - return propertyName + ':' + fontDef.replace(/['"]([\w\-]+)['"]/g, '$1'); - }); - - // strip quotation in @keyframes - replace(/@(\-moz\-|\-o\-|\-webkit\-)?keyframes ([^{]+)/g, function(match, prefix, name) { - prefix = prefix || ''; - return '@' + prefix + 'keyframes ' + (name.indexOf(' ') > -1 ? name : name.replace(/['"]/g, '')); - }); - - // strip quotation in attribute values - replace(/\[([^\]]+)\]/g, function(match, content) { - var eqIndex = content.indexOf('='); - if (eqIndex < 0 && content.indexOf('\'') < 0 && content.indexOf('"') < 0) - return match; - - var key = content.substring(0, eqIndex); - var value = content.substring(eqIndex + 1, content.length); - - if (/^['"](?:[a-zA-Z][a-zA-Z\d\-]+)['"]$/.test(value)) - return '[' + key + '=' + value.substring(1, value.length - 1) + ']'; - else - return match; - }); - // rgb to hex colors replace(/rgb\s*\(([^\)]+)\)/g, function(match, color) { var parts = color.split(','); @@ -184,11 +195,6 @@ var CleanCSS = { return match; }); - // IE shorter filters, but only if single (IE 7 issue) - replace(/progid:DXImageTransform\.Microsoft\.(Alpha|Chroma)(\([^\)]+\))([;}'"])/g, function(match, filter, args, suffix) { - return filter.toLowerCase() + args + suffix; - }); - // zero + unit to zero replace(/(\s|:|,)0(?:px|em|ex|cm|mm|in|pt|pc|%)/g, '$1' + '0'); replace(/rect\(0(?:px|em|ex|cm|mm|in|pt|pc|%)/g, 'rect(0'); @@ -284,12 +290,21 @@ var CleanCSS = { // remove universal selector when not needed (*#id, *.class etc) replace(/\*([\.#:\[])/g, '$1'); - // Restore special comments, content content, and spaces inside calc back - var specialCommentsCount = context.specialComments.length; - + // Restore spaces inside calc back replace(/calc\([^\}]+\}/g, function(match) { return match.replace(/\+/g, ' + '); }); + + // Restore urls, content content, and special comments (in that order) + replace(/__URL__/g, function() { + return context.urlBlocks.shift(); + }); + + replace(/__CSSFREETEXT__/g, function() { + return context.freeTextBlocks.shift(); + }); + + var specialCommentsCount = context.specialComments.length; replace(/__CSSCOMMENT__/g, function() { switch (options.keepSpecialComments) { case '*': @@ -302,9 +317,6 @@ var CleanCSS = { return ''; } }); - replace(/__CSSCONTENT__/g, function() { - return context.contentBlocks.shift(); - }); // trim spaces at beginning and end return data.trim(); @@ -314,10 +326,10 @@ var CleanCSS = { // for further restoring. Plain comments are removed. It's done by scanning datq using // String#indexOf scanning instead of regexps to speed up the process. _stripComments: function(context, data) { - var tempData = [], - nextStart = 0, - nextEnd = 0, - cursor = 0; + var tempData = []; + var nextStart = 0; + var nextEnd = 0; + var cursor = 0; for (; nextEnd < data.length; ) { nextStart = data.indexOf('/*', nextEnd); @@ -339,75 +351,73 @@ var CleanCSS = { data; }, - // Strip content tags by replacing them by the __CSSCONTENT__ + // Strip content tags by replacing them by the __CSSFREETEXT__ // marker for further restoring. It's done via string scanning // instead of regexps to speed up the process. - _stripContent: function(context, data) { - var tempData = [], - nextStart = 0, - nextEnd = 0, - cursor = 0, - matchedParenthesis = null; - var allowedPrefixes = [' ', '{', ';', this.lineBreak]; - var skipBy = 'content'.length; - - // Find either first (matchedParenthesis == null) or second matching - // parenthesis so that we can determine boundaries of content block. - var nextParenthesis = function(pos) { - var min, - max = data.length; - - if (matchedParenthesis) { - min = data.indexOf(matchedParenthesis, pos); - if (min == -1) - min = max; - } else { - var next1 = data.indexOf("'", pos); - var next2 = data.indexOf('"', pos); - if (next1 == -1) - next1 = max; - if (next2 == -1) - next2 = max; - - min = next1 > next2 ? next2 : next1; - } + _stripFreeText: function(context, data) { + var tempData = []; + var nextStart = 0; + var nextEnd = 0; + var cursor = 0; + var matchedParenthesis = null; + var singleParenthesis = "'"; + var doubleParenthesis = '"'; + var dataLength = data.length; - if (min == max) - return -1; + for (; nextEnd < data.length; ) { + var nextStartSingle = data.indexOf(singleParenthesis, nextEnd + 1); + var nextStartDouble = data.indexOf(doubleParenthesis, nextEnd + 1); - if (matchedParenthesis) { - matchedParenthesis = null; - return min; - } else { - // check if there's anything else between pos and min - // that doesn't match ':' or whitespace - if (/[^:\s]/.test(data.substring(pos, min))) - return -1; + if (nextStartSingle == -1) + nextStartSingle = dataLength; + if (nextStartDouble == -1) + nextStartDouble = dataLength; - matchedParenthesis = data.charAt(min); - return min + 1; + if (nextStartSingle < nextStartDouble) { + nextStart = nextStartSingle; + matchedParenthesis = singleParenthesis; + } else { + nextStart = nextStartDouble; + matchedParenthesis = doubleParenthesis; } - }; - for (; nextEnd < data.length; ) { - nextStart = data.indexOf('content', nextEnd); if (nextStart == -1) break; - // skip by `skipBy` bytes if matched declaration is not a property but ID, class name or a some substring - if (allowedPrefixes.indexOf(data[nextStart - 1]) == -1) { - nextEnd += skipBy; - continue; - } - - nextStart = nextParenthesis(nextStart + skipBy); - nextEnd = nextParenthesis(nextStart); + nextEnd = data.indexOf(matchedParenthesis, nextStart + 1); if (nextStart == -1 || nextEnd == -1) break; - tempData.push(data.substring(cursor, nextStart - 1)); - tempData.push('__CSSCONTENT__'); - context.contentBlocks.push(data.substring(nextStart - 1, nextEnd + 1)); + tempData.push(data.substring(cursor, nextStart)); + tempData.push('__CSSFREETEXT__'); + context.freeTextBlocks.push(data.substring(nextStart, nextEnd + 1)); + cursor = nextEnd + 1; + } + + return tempData.length > 0 ? + tempData.join('') + data.substring(cursor, data.length) : + data; + }, + + // Strip urls by replacing them by the __URL__ + // marker for further restoring. It's done via string scanning + // instead of regexps to speed up the process. + _stripUrls: function(context, data) { + var nextStart = 0; + var nextEnd = 0; + var cursor = 0; + var tempData = []; + + for (; nextEnd < data.length; ) { + nextStart = data.indexOf('url(', nextEnd); + if (nextStart == -1) + break; + + nextEnd = data.indexOf(')', nextStart); + + tempData.push(data.substring(cursor, nextStart)); + tempData.push('__URL__'); + context.urlBlocks.push(data.substring(nextStart, nextEnd + 1)); cursor = nextEnd + 1; } diff --git a/test/unit-test.js b/test/unit-test.js index 995eaa39..92f54530 100644 --- a/test/unit-test.js +++ b/test/unit-test.js @@ -528,7 +528,10 @@ vows.describe('clean-units').addBatch({ 'not add a space before url\'s hash': [ "url(\"../fonts/d90b3358-e1e2-4abb-ba96-356983a54c22.svg#d90b3358-e1e2-4abb-ba96-356983a54c22\")", "url(../fonts/d90b3358-e1e2-4abb-ba96-356983a54c22.svg#d90b3358-e1e2-4abb-ba96-356983a54c22)" - ] + ], + 'keep urls from being stripped down #1': 'a{background:url(/image-1.0.png)}', + 'keep urls from being stripped down #2': "a{background:url(/image-white.png)}", + 'keep __URL__ in comments (so order is important)': '/*! __URL__ */a{}' }), 'fonts': cssContext({ 'keep format quotation': "@font-face{font-family:PublicVintage;src:url(./PublicVintage.otf) format('opentype')}", @@ -603,7 +606,10 @@ vows.describe('clean-units').addBatch({ 'should strip quotations if is less specific selectors': [ 'a[data-href*=\'object1\']{border-color:red}a[data-href|=\'object2\']{border-color:#0f0}', 'a[data-href*=object1]{border-color:red}a[data-href|=object2]{border-color:#0f0}' - ] + ], + 'should keep special characters inside attributes #1': "a[data-css='color:white']", + 'should keep special characters inside attributes #2': "a[data-text='a\nb\nc']", + 'should keep special characters inside attributes #3': 'a[href="/version-0.01.html"]' }), 'ie filters': cssContext({ 'short alpha': [ -- 2.34.1