Fixes #161 - adds ~30x faster tokenizer.

author GoalSmashers <jakub@goalsmashers.com>

Sun, 17 Nov 2013 09:31:04 +0000 (10:31 +0100)

committer GoalSmashers <jakub@goalsmashers.com>

Sun, 17 Nov 2013 22:05:51 +0000 (23:05 +0100)
author GoalSmashers <jakub@goalsmashers.com>
Sun, 17 Nov 2013 09:31:04 +0000 (10:31 +0100)
committer GoalSmashers <jakub@goalsmashers.com>
Sun, 17 Nov 2013 22:05:51 +0000 (23:05 +0100)
diff --git a/History.md b/History.md

index c11f333..3edec0c 100644 (file)
--- a/History.md
+++ b/History.md
@@ -1,6 +1,7 @@
  [2.1.0 / 2013-xx-xx (UNRELEASED)](https://github.com/GoalSmashers/clean-css/compare/v2.0.0...HEAD)
  ==================
  
+* Fixed issue [#161](https://github.com/GoalSmashers/clean-css/issues/161) - improves tokenizer performance.
  * Fixed issue [#163](https://github.com/GoalSmashers/clean-css/issues/163) - round pixels to 2nd decimal place.
  * Fixed issue [#165](https://github.com/GoalSmashers/clean-css/issues/165) - extra space after trailing parenthesis.
  
diff --git a/lib/selectors/tokenizer.js b/lib/selectors/tokenizer.js

index 6a7e789..8edaf75 100644 (file)
--- a/lib/selectors/tokenizer.js
+++ b/lib/selectors/tokenizer.js
@@ -1,20 +1,33 @@
+/* jshint latedef: false */
+
  module.exports = function Tokenizer(data) {
+  var chunker = new Chunker(data, 128);
+  var chunk = chunker.next();
+
    var whatsNext = function(context) {
      var cursor = context.cursor;
      var mode = context.mode;
      var closest;
  
+    if (chunk.length == context.cursor) {
+      if (chunker.isEmpty())
+        return null;
+
+      chunk = chunker.next();
+      context.cursor = 0;
+    }
+
      if (mode == 'body') {
-      closest = data.indexOf('}', cursor);
+      closest = chunk.indexOf('}', cursor);
        return closest > -1 ?
          [closest, 'bodyEnd'] :
          null;
      }
  
-    var nextSpecial = data.indexOf('@', cursor);
-    var nextEscape = mode == 'top' ? data.indexOf('__ESCAPED_COMMENT_CLEAN_CSS', cursor) : -1;
-    var nextBodyStart = data.indexOf('{', cursor);
-    var nextBodyEnd = data.indexOf('}', cursor);
+    var nextSpecial = chunk.indexOf('@', context.cursor);
+    var nextEscape = mode == 'top' ? chunk.indexOf('__ESCAPED_COMMENT_CLEAN_CSS', context.cursor) : -1;
+    var nextBodyStart = chunk.indexOf('{', context.cursor);
+    var nextBodyEnd = chunk.indexOf('}', context.cursor);
  
      closest = nextSpecial;
      if (closest == -1 || (nextEscape > -1 && nextEscape < closest))
@@ -44,7 +57,7 @@ module.exports = function Tokenizer(data) {
      while (true) {
        var next = whatsNext(context);
        if (!next) {
-        var whatsLeft = data.substring(context.cursor);
+        var whatsLeft = chunk.substring(context.cursor);
          if (whatsLeft.length > 0) {
            tokenized.push(whatsLeft);
            context.cursor += whatsLeft.length;
@@ -58,16 +71,16 @@ module.exports = function Tokenizer(data) {
        var oldMode;
  
        if (what == 'special') {
-        var fragment = data.substring(nextSpecial, context.cursor + '@font-face'.length + 1);
+        var fragment = chunk.substring(nextSpecial, context.cursor + '@font-face'.length + 1);
          var isSingle = fragment.indexOf('@import') === 0 || fragment.indexOf('@charset') === 0;
          if (isSingle) {
-          nextEnd = data.indexOf(';', nextSpecial + 1);
-          tokenized.push(data.substring(context.cursor, nextEnd + 1));
+          nextEnd = chunk.indexOf(';', nextSpecial + 1);
+          tokenized.push(chunk.substring(context.cursor, nextEnd + 1));
  
            context.cursor = nextEnd + 1;
          } else {
-          nextEnd = data.indexOf('{', nextSpecial + 1);
-          var block = data.substring(context.cursor, nextEnd).trim();
+          nextEnd = chunk.indexOf('{', nextSpecial + 1);
+          var block = chunk.substring(context.cursor, nextEnd).trim();
  
            var isFlat = fragment.indexOf('@font-face') === 0;
            oldMode = context.mode;
@@ -79,13 +92,13 @@ module.exports = function Tokenizer(data) {
            tokenized.push({ block: block, body: specialBody });
          }
        } else if (what == 'escape') {
-        nextEnd = data.indexOf('__', nextSpecial + 1);
-        var escaped = data.substring(context.cursor, nextEnd + 2);
+        nextEnd = chunk.indexOf('__', nextSpecial + 1);
+        var escaped = chunk.substring(context.cursor, nextEnd + 2);
          tokenized.push(escaped);
  
          context.cursor = nextEnd + 2;
        } else if (what == 'bodyStart') {
-        var selector = data.substring(context.cursor, nextSpecial).trim();
+        var selector = chunk.substring(context.cursor, nextSpecial).trim();
  
          oldMode = context.mode;
          context.cursor = nextSpecial + 1;
@@ -102,7 +115,7 @@ module.exports = function Tokenizer(data) {
          }
  
          if (context.mode != 'block')
-          tokenized = data.substring(context.cursor, nextSpecial);
+          tokenized = chunk.substring(context.cursor, nextSpecial);
  
          context.cursor = nextSpecial + 1;
  
@@ -119,3 +132,31 @@ module.exports = function Tokenizer(data) {
      }
    };
  };
+
+// Divides `data` into chunks of `chunkSize` for faster processing
+var Chunker = function(data, chunkSize) {
+  var chunks = [];
+  for (var cursor = 0, dataSize = data.length; cursor < dataSize;) {
+    var nextCursor = cursor + chunkSize > dataSize ?
+      dataSize - 1 :
+      cursor + chunkSize;
+
+    if (data[nextCursor] != '}')
+      nextCursor = data.indexOf('}', nextCursor);
+    if (nextCursor == -1)
+      nextCursor = data.length - 1;
+
+    chunks.push(data.substring(cursor, nextCursor + 1));
+    cursor = nextCursor + 1;
+  }
+
+  return {
+    isEmpty: function() {
+      return chunks.length === 0;
+    },
+
+    next: function() {
+      return chunks.shift() || '';
+    }
+  };
+};
author	GoalSmashers <jakub@goalsmashers.com>
	Sun, 17 Nov 2013 09:31:04 +0000 (10:31 +0100)
committer	GoalSmashers <jakub@goalsmashers.com>
	Sun, 17 Nov 2013 22:05:51 +0000 (23:05 +0100)
History.md		patch \| blob \| history
lib/selectors/tokenizer.js		patch \| blob \| history