From: GoalSmashers <jakub@goalsmashers.com>
Date: Fri, 27 Sep 2013 14:03:08 +0000 (+0200)
Subject: Adds CSS tokenizer which will make it possible to further optimize content by reorder... 
X-Git-Url: https://git.ndcode.org/public/gitweb.cgi?a=commitdiff_plain;h=cd4e1f31d34ca0b37e3e032713e81ef8f31847fc;p=clean-css.git

Adds CSS tokenizer which will make it possible to further optimize content by reordering and/or merging selectors.
---

diff --git a/History.md b/History.md
index 48441c51..d07d5a96 100644
--- a/History.md
+++ b/History.md
@@ -3,11 +3,11 @@
 
 * Adds simplified and more advanced text escaping / restoring via `EscapeStore` class.
 * Adds simplified and much faster empty elements removal.
-* Adds simplified and much faster empty elements removal.
 * Adds missing `@import` processing to our benchmark (run via `npm run bench`).
 * Fixed issue [#157](https://github.com/GoalSmashers/clean-css/issues/157) - gets rid of `removeEmpty` option.
 * Fixed issue [#159](https://github.com/GoalSmashers/clean-css/issues/159) - escaped quotes inside content.
 * Fixed issue [#162](https://github.com/GoalSmashers/clean-css/issues/162) - strip quotes from base64 encoded URLs.
+* Adds CSS tokenizer which will make it possible to optimize content by reordering and/or merging selectors.
 
 1.1.7 / 2013-10-28
 ==================
diff --git a/lib/clean.js b/lib/clean.js
index 105bad83..a3e1cbc3 100644
--- a/lib/clean.js
+++ b/lib/clean.js
@@ -20,6 +20,8 @@ var ExpressionsProcessor = require('./text/expressions');
 var FreeTextProcessor = require('./text/free');
 var UrlsProcessor = require('./text/urls');
 
+var SelectorsTokenizer = require('./selectors/tokenizer');
+
 var CleanCSS = {
   process: function(data, options) {
     var replace = function() {
@@ -245,6 +247,10 @@ var CleanCSS = {
       return match.replace(/\+/g, ' + ');
     });
 
+    replace(function optimizeSelectors() {
+      data = new SelectorsTokenizer(data).process();
+    });
+
     replace(function restoreUrls() {
       data = urlsProcessor.restore(data);
     });
diff --git a/lib/selectors/tokenizer.js b/lib/selectors/tokenizer.js
new file mode 100644
index 00000000..236d4364
--- /dev/null
+++ b/lib/selectors/tokenizer.js
@@ -0,0 +1,128 @@
+module.exports = function Tokenizer(data) {
+  var whatsNext = function(context) {
+    var cursor = context.cursor;
+    var mode = context.mode;
+    var closest;
+
+    if (mode == 'body') {
+      closest = data.indexOf('}', cursor);
+      return closest > -1 ?
+        [closest, 'bodyEnd'] :
+        null;
+    }
+
+    var nextSpecial = data.indexOf('@', cursor);
+    var nextEscape = mode == 'top' ? data.indexOf('__ESCAPED_COMMENT_CLEAN_CSS', cursor) : -1;
+    var nextBodyStart = data.indexOf('{', cursor);
+    var nextBodyEnd = data.indexOf('}', cursor);
+
+    closest = nextSpecial;
+    if (closest == -1 || (nextEscape > -1 && nextEscape < closest))
+      closest = nextEscape;
+    if (closest == -1 || (nextBodyStart > -1 && nextBodyStart < closest))
+      closest = nextBodyStart;
+    if (closest == -1 || (nextBodyEnd > -1 && nextBodyEnd < closest))
+      closest = nextBodyEnd;
+
+    if (closest == -1)
+      return;
+    if (nextEscape === closest)
+      return [closest, 'escape'];
+    if (nextBodyStart === closest)
+      return [closest, 'bodyStart'];
+    if (nextBodyEnd === closest)
+      return [closest, 'bodyEnd'];
+    if (nextSpecial === closest)
+      return [closest, 'special'];
+  };
+
+  var tokenize = function(context) {
+    var tokenized = [];
+
+    context = context || { cursor: 0, mode: 'top' };
+
+    while (true) {
+      var next = whatsNext(context);
+      if (!next) {
+        var whatsLeft = data.substring(context.cursor);
+        if (whatsLeft.length > 0) {
+          tokenized.push(whatsLeft);
+          context.cursor += whatsLeft.length;
+        }
+        break;
+      }
+
+      var nextSpecial = next[0];
+      var what = next[1];
+      var nextEnd, oldMode;
+
+      if (what == 'special') {
+        var fragment = data.substring(nextSpecial, context.cursor + '@font-face'.length + 1);
+        var isSingle = fragment.indexOf('@import') === 0 || fragment.indexOf('@charset') === 0;
+        if (isSingle) {
+          nextEnd = data.indexOf(';', nextSpecial + 1);
+          tokenized.push(data.substring(context.cursor, nextEnd + 1));
+
+          context.cursor = nextEnd + 1;
+        } else {
+          nextEnd = data.indexOf('{', nextSpecial + 1);
+          var block = data.substring(context.cursor, nextEnd);
+
+          var isFlat = fragment.indexOf('@font-face') === 0;
+          oldMode = context.mode;
+          context.cursor = nextEnd + 1;
+          context.mode = isFlat ? 'body' : 'block';
+          var specialBody = tokenize(context);
+          context.mode = oldMode;
+
+          tokenized.push({ block: block, body: specialBody });
+        }
+      } else if (what == 'escape') {
+        nextEnd = data.indexOf('__', nextSpecial + 1);
+        var escaped = data.substring(context.cursor, nextEnd + 2);
+        tokenized.push(escaped);
+
+        context.cursor = nextEnd + 2;
+      } else if (what == 'bodyStart') {
+        var selector = data.substring(context.cursor, nextSpecial);
+
+        oldMode = context.mode;
+        context.cursor = nextSpecial + 1;
+        context.mode = 'body';
+        var body = tokenize(context);
+        context.mode = oldMode;
+
+        tokenized.push({ selector: selector, body: body });
+      } else if (what == 'bodyEnd') {
+        if (context.mode != 'block') {
+          tokenized = data.substring(context.cursor, nextSpecial);
+        }
+        context.cursor = nextSpecial + 1;
+
+        break;
+      }
+    }
+
+    return tokenized;
+  };
+
+  var rebuild = function(tokens) {
+    return (Array.isArray(tokens) ? tokens : [tokens])
+      .map(function(token) {
+        if (typeof token == 'string')
+          return token;
+
+        if (token.block)
+          return token.block + '{' + rebuild(token.body) + '}';
+        else
+          return token.selector + '{' + token.body + '}';
+      })
+      .join('');
+  };
+
+  return {
+    process: function() {
+      return rebuild(tokenize());
+    }
+  };
+};