Fixes #46 - special characters in URLs and attributes.

author GoalSmashers <jakub@goalsmashers.com>

Sat, 16 Mar 2013 15:39:19 +0000 (16:39 +0100)

committer GoalSmashers <jakub@goalsmashers.com>

Sat, 16 Mar 2013 15:42:37 +0000 (16:42 +0100)
author GoalSmashers <jakub@goalsmashers.com>
Sat, 16 Mar 2013 15:39:19 +0000 (16:39 +0100)
committer GoalSmashers <jakub@goalsmashers.com>
Sat, 16 Mar 2013 15:42:37 +0000 (16:42 +0100)
diff --git a/History.md b/History.md

index 1b32cde..faefb1c 100644 (file)
--- a/History.md
+++ b/History.md
@@ -1,3 +1,9 @@
+1.0 / 2013-xx-xx
+==================
+
+* Fixed issue [#46](https://github.com/GoalSmashers/clean-css/issues/46) - preserving special characters in URLs
+  and attributes.
+
  0.10.1 / 2013-02-14
  ==================
  
diff --git a/lib/clean.js b/lib/clean.js

index 2511939..51215fe 100644 (file)
--- a/lib/clean.js
+++ b/lib/clean.js
@@ -31,7 +31,8 @@ var CleanCSS = {
    process: function(data, options) {
      var context = {
        specialComments: [],
-      contentBlocks: []
+      freeTextBlocks: [],
+      urlBlocks: []
      };
      var replace = function() {
        if (typeof arguments[0] == 'function')
@@ -74,12 +75,7 @@ var CleanCSS = {
        data = CleanCSS._stripComments(context, data);
      });
  
-    // replace content: with a placeholder
-    replace(function stripContent() {
-      data = CleanCSS._stripContent(context, data);
-    });
-
-    // strip url's parentheses if possible (no spaces inside)
+    // strip parentheses in urls if possible (no spaces inside)
      replace(/url\(['"]([^\)]+)['"]\)/g, function(urlMatch) {
        if (urlMatch.match(/\s/g) !== null)
          return urlMatch;
@@ -87,6 +83,47 @@ var CleanCSS = {
          return urlMatch.replace(/\(['"]/, '(').replace(/['"]\)$/, ')');
      });
  
+    // strip parentheses in animation & font names
+    replace(/(animation|animation\-name|font|font\-family):([^;}]+)/g, function(match, propertyName, fontDef) {
+      return propertyName + ':' + fontDef.replace(/['"]([\w\-]+)['"]/g, '$1');
+    });
+
+    // strip parentheses in @keyframes
+    replace(/@(\-moz\-|\-o\-|\-webkit\-)?keyframes ([^{]+)/g, function(match, prefix, name) {
+      prefix = prefix || '';
+      return '@' + prefix + 'keyframes ' + (name.indexOf(' ') > -1 ? name : name.replace(/['"]/g, ''));
+    });
+
+    // IE shorter filters, but only if single (IE 7 issue)
+    replace(/progid:DXImageTransform\.Microsoft\.(Alpha|Chroma)(\([^\)]+\))([;}'"])/g, function(match, filter, args, suffix) {
+      return filter.toLowerCase() + args + suffix;
+    });
+
+    // strip parentheses in attribute values
+    replace(/\[([^\]]+)\]/g, function(match, content) {
+      var eqIndex = content.indexOf('=');
+      if (eqIndex < 0 && content.indexOf('\'') < 0 && content.indexOf('"') < 0)
+        return match;
+
+      var key = content.substring(0, eqIndex);
+      var value = content.substring(eqIndex + 1, content.length);
+
+      if (/^['"](?:[a-zA-Z][a-zA-Z\d\-]+)['"]$/.test(value))
+        return '[' + key + '=' + value.substring(1, value.length - 1) + ']';
+      else
+        return match;
+    });
+
+    // replace all free text content with a placeholder
+    replace(function stripFreeText() {
+      data = CleanCSS._stripFreeText(context, data);
+    });
+
+    // replace url(...) with a placeholder
+    replace(function stripUrls() {
+      data = CleanCSS._stripUrls(context, data);
+    });
+
      // line breaks
      if (!options.keepBreaks)
        replace(/[\r]?\n/g, ' ');
@@ -119,32 +156,6 @@ var CleanCSS = {
      // trailing semicolons
      replace(/;\}/g, '}');
  
-    // strip quotation in animation & font names
-    replace(/(animation|animation\-name|font|font\-family):([^;}]+)/g, function(match, propertyName, fontDef) {
-      return propertyName + ':' + fontDef.replace(/['"]([\w\-]+)['"]/g, '$1');
-    });
-
-    // strip quotation in @keyframes
-    replace(/@(\-moz\-|\-o\-|\-webkit\-)?keyframes ([^{]+)/g, function(match, prefix, name) {
-      prefix = prefix || '';
-      return '@' + prefix + 'keyframes ' + (name.indexOf(' ') > -1 ? name : name.replace(/['"]/g, ''));
-    });
-
-    // strip quotation in attribute values
-    replace(/\[([^\]]+)\]/g, function(match, content) {
-      var eqIndex = content.indexOf('=');
-      if (eqIndex < 0 && content.indexOf('\'') < 0 && content.indexOf('"') < 0)
-        return match;
-
-      var key = content.substring(0, eqIndex);
-      var value = content.substring(eqIndex + 1, content.length);
-
-      if (/^['"](?:[a-zA-Z][a-zA-Z\d\-]+)['"]$/.test(value))
-        return '[' + key + '=' + value.substring(1, value.length - 1) + ']';
-      else
-        return match;
-    });
-
      // rgb to hex colors
      replace(/rgb\s*\(([^\)]+)\)/g, function(match, color) {
        var parts = color.split(',');
@@ -184,11 +195,6 @@ var CleanCSS = {
          return match;
      });
  
-    // IE shorter filters, but only if single (IE 7 issue)
-    replace(/progid:DXImageTransform\.Microsoft\.(Alpha|Chroma)(\([^\)]+\))([;}'"])/g, function(match, filter, args, suffix) {
-      return filter.toLowerCase() + args + suffix;
-    });
-
      // zero + unit to zero
      replace(/(\s|:|,)0(?:px|em|ex|cm|mm|in|pt|pc|%)/g, '$1' + '0');
      replace(/rect\(0(?:px|em|ex|cm|mm|in|pt|pc|%)/g, 'rect(0');
@@ -284,12 +290,21 @@ var CleanCSS = {
      // remove universal selector when not needed (*#id, *.class etc)
      replace(/\*([\.#:\[])/g, '$1');
  
-    // Restore special comments, content content, and spaces inside calc back
-    var specialCommentsCount = context.specialComments.length;
-
+    // Restore spaces inside calc back
      replace(/calc\([^\}]+\}/g, function(match) {
        return match.replace(/\+/g, ' + ');
      });
+
+    // Restore urls, content content, and special comments (in that order)
+    replace(/__URL__/g, function() {
+      return context.urlBlocks.shift();
+    });
+
+    replace(/__CSSFREETEXT__/g, function() {
+      return context.freeTextBlocks.shift();
+    });
+
+    var specialCommentsCount = context.specialComments.length;
      replace(/__CSSCOMMENT__/g, function() {
        switch (options.keepSpecialComments) {
          case '*':
@@ -302,9 +317,6 @@ var CleanCSS = {
            return '';
        }
      });
-    replace(/__CSSCONTENT__/g, function() {
-      return context.contentBlocks.shift();
-    });
  
      // trim spaces at beginning and end
      return data.trim();
@@ -314,10 +326,10 @@ var CleanCSS = {
    // for further restoring. Plain comments are removed. It's done by scanning datq using
    // String#indexOf scanning instead of regexps to speed up the process.
    _stripComments: function(context, data) {
-    var tempData = [],
-      nextStart = 0,
-      nextEnd = 0,
-      cursor = 0;
+    var tempData = [];
+    var nextStart = 0;
+    var nextEnd = 0;
+    var cursor = 0;
  
      for (; nextEnd < data.length; ) {
        nextStart = data.indexOf('/*', nextEnd);
@@ -339,75 +351,73 @@ var CleanCSS = {
        data;
    },
  
-  // Strip content tags by replacing them by the __CSSCONTENT__
+  // Strip content tags by replacing them by the __CSSFREETEXT__
    // marker for further restoring. It's done via string scanning
    // instead of regexps to speed up the process.
-  _stripContent: function(context, data) {
-    var tempData = [],
-      nextStart = 0,
-      nextEnd = 0,
-      cursor = 0,
-      matchedParenthesis = null;
-    var allowedPrefixes = [' ', '{', ';', this.lineBreak];
-    var skipBy = 'content'.length;
-
-    // Find either first (matchedParenthesis == null) or second matching
-    // parenthesis so that we can determine boundaries of content block.
-    var nextParenthesis = function(pos) {
-      var min,
-        max = data.length;
-
-      if (matchedParenthesis) {
-        min = data.indexOf(matchedParenthesis, pos);
-        if (min == -1)
-          min = max;
-      } else {
-        var next1 = data.indexOf("'", pos);
-        var next2 = data.indexOf('"', pos);
-        if (next1 == -1)
-          next1 = max;
-        if (next2 == -1)
-          next2 = max;
-
-        min = next1 > next2 ? next2 : next1;
-      }
+  _stripFreeText: function(context, data) {
+    var tempData = [];
+    var nextStart = 0;
+    var nextEnd = 0;
+    var cursor = 0;
+    var matchedParenthesis = null;
+    var singleParenthesis = "'";
+    var doubleParenthesis = '"';
+    var dataLength = data.length;
  
-      if (min == max)
-        return -1;
+    for (; nextEnd < data.length; ) {
+      var nextStartSingle = data.indexOf(singleParenthesis, nextEnd + 1);
+      var nextStartDouble = data.indexOf(doubleParenthesis, nextEnd + 1);
  
-      if (matchedParenthesis) {
-        matchedParenthesis = null;
-        return min;
-      } else {
-        // check if there's anything else between pos and min
-        // that doesn't match ':' or whitespace
-        if (/[^:\s]/.test(data.substring(pos, min)))
-          return -1;
+      if (nextStartSingle == -1)
+        nextStartSingle = dataLength;
+      if (nextStartDouble == -1)
+        nextStartDouble = dataLength;
  
-        matchedParenthesis = data.charAt(min);
-        return min + 1;
+      if (nextStartSingle < nextStartDouble) {
+        nextStart = nextStartSingle;
+        matchedParenthesis = singleParenthesis;
+      } else {
+        nextStart = nextStartDouble;
+        matchedParenthesis = doubleParenthesis;
        }
-    };
  
-    for (; nextEnd < data.length; ) {
-      nextStart = data.indexOf('content', nextEnd);
        if (nextStart == -1)
          break;
  
-      // skip by `skipBy` bytes if matched declaration is not a property but ID, class name or a some substring
-      if (allowedPrefixes.indexOf(data[nextStart - 1]) == -1) {
-        nextEnd += skipBy;
-        continue;
-      }
-
-      nextStart = nextParenthesis(nextStart + skipBy);
-      nextEnd = nextParenthesis(nextStart);
+      nextEnd = data.indexOf(matchedParenthesis, nextStart + 1);
        if (nextStart == -1 || nextEnd == -1)
          break;
  
-      tempData.push(data.substring(cursor, nextStart - 1));
-      tempData.push('__CSSCONTENT__');
-      context.contentBlocks.push(data.substring(nextStart - 1, nextEnd + 1));
+      tempData.push(data.substring(cursor, nextStart));
+      tempData.push('__CSSFREETEXT__');
+      context.freeTextBlocks.push(data.substring(nextStart, nextEnd + 1));
+      cursor = nextEnd + 1;
+    }
+
+    return tempData.length > 0 ?
+      tempData.join('') + data.substring(cursor, data.length) :
+      data;
+  },
+
+  // Strip urls by replacing them by the __URL__
+  // marker for further restoring. It's done via string scanning
+  // instead of regexps to speed up the process.
+  _stripUrls: function(context, data) {
+    var nextStart = 0;
+    var nextEnd = 0;
+    var cursor = 0;
+    var tempData = [];
+
+    for (; nextEnd < data.length; ) {
+      nextStart = data.indexOf('url(', nextEnd);
+      if (nextStart == -1)
+        break;
+
+      nextEnd = data.indexOf(')', nextStart);
+
+      tempData.push(data.substring(cursor, nextStart));
+      tempData.push('__URL__');
+      context.urlBlocks.push(data.substring(nextStart, nextEnd + 1));
        cursor = nextEnd + 1;
      }
  
diff --git a/test/unit-test.js b/test/unit-test.js

index 995eaa3..92f5453 100644 (file)
--- a/test/unit-test.js
+++ b/test/unit-test.js
@@ -528,7 +528,10 @@ vows.describe('clean-units').addBatch({
      'not add a space before url\'s hash': [
        "url(\"../fonts/d90b3358-e1e2-4abb-ba96-356983a54c22.svg#d90b3358-e1e2-4abb-ba96-356983a54c22\")",
        "url(../fonts/d90b3358-e1e2-4abb-ba96-356983a54c22.svg#d90b3358-e1e2-4abb-ba96-356983a54c22)"
-    ]
+    ],
+    'keep urls from being stripped down #1': 'a{background:url(/image-1.0.png)}',
+    'keep urls from being stripped down #2': "a{background:url(/image-white.png)}",
+    'keep __URL__ in comments (so order is important)': '/*! __URL__ */a{}'
    }),
    'fonts': cssContext({
      'keep format quotation': "@font-face{font-family:PublicVintage;src:url(./PublicVintage.otf) format('opentype')}",
@@ -603,7 +606,10 @@ vows.describe('clean-units').addBatch({
      'should strip quotations if is less specific selectors': [
        'a[data-href*=\'object1\']{border-color:red}a[data-href|=\'object2\']{border-color:#0f0}',
        'a[data-href*=object1]{border-color:red}a[data-href|=object2]{border-color:#0f0}'
-    ]
+    ],
+    'should keep special characters inside attributes #1': "a[data-css='color:white']",
+    'should keep special characters inside attributes #2': "a[data-text='a\nb\nc']",
+    'should keep special characters inside attributes #3': 'a[href="/version-0.01.html"]'
    }),
    'ie filters': cssContext({
      'short alpha': [
author	GoalSmashers <jakub@goalsmashers.com>
	Sat, 16 Mar 2013 15:39:19 +0000 (16:39 +0100)
committer	GoalSmashers <jakub@goalsmashers.com>
	Sat, 16 Mar 2013 15:42:37 +0000 (16:42 +0100)
History.md		patch \| blob \| history
lib/clean.js		patch \| blob \| history
test/unit-test.js		patch \| blob \| history