From: GoalSmashers Date: Sun, 17 Nov 2013 09:31:04 +0000 (+0100) Subject: Fixes #161 - adds ~30x faster tokenizer. X-Git-Url: https://git.ndcode.org/public/gitweb.cgi?a=commitdiff_plain;h=81fa8edf72ba40c11a628a0e0d7279523b855574;p=clean-css.git Fixes #161 - adds ~30x faster tokenizer. * Splits data into 128 bytes long chunks (rounded to nearest closing parenthesis). * Won't seek trough the whole document all the time. --- diff --git a/History.md b/History.md index c11f3331..3edec0cb 100644 --- a/History.md +++ b/History.md @@ -1,6 +1,7 @@ [2.1.0 / 2013-xx-xx (UNRELEASED)](https://github.com/GoalSmashers/clean-css/compare/v2.0.0...HEAD) ================== +* Fixed issue [#161](https://github.com/GoalSmashers/clean-css/issues/161) - improves tokenizer performance. * Fixed issue [#163](https://github.com/GoalSmashers/clean-css/issues/163) - round pixels to 2nd decimal place. * Fixed issue [#165](https://github.com/GoalSmashers/clean-css/issues/165) - extra space after trailing parenthesis. diff --git a/lib/selectors/tokenizer.js b/lib/selectors/tokenizer.js index 6a7e7896..8edaf756 100644 --- a/lib/selectors/tokenizer.js +++ b/lib/selectors/tokenizer.js @@ -1,20 +1,33 @@ +/* jshint latedef: false */ + module.exports = function Tokenizer(data) { + var chunker = new Chunker(data, 128); + var chunk = chunker.next(); + var whatsNext = function(context) { var cursor = context.cursor; var mode = context.mode; var closest; + if (chunk.length == context.cursor) { + if (chunker.isEmpty()) + return null; + + chunk = chunker.next(); + context.cursor = 0; + } + if (mode == 'body') { - closest = data.indexOf('}', cursor); + closest = chunk.indexOf('}', cursor); return closest > -1 ? [closest, 'bodyEnd'] : null; } - var nextSpecial = data.indexOf('@', cursor); - var nextEscape = mode == 'top' ? data.indexOf('__ESCAPED_COMMENT_CLEAN_CSS', cursor) : -1; - var nextBodyStart = data.indexOf('{', cursor); - var nextBodyEnd = data.indexOf('}', cursor); + var nextSpecial = chunk.indexOf('@', context.cursor); + var nextEscape = mode == 'top' ? chunk.indexOf('__ESCAPED_COMMENT_CLEAN_CSS', context.cursor) : -1; + var nextBodyStart = chunk.indexOf('{', context.cursor); + var nextBodyEnd = chunk.indexOf('}', context.cursor); closest = nextSpecial; if (closest == -1 || (nextEscape > -1 && nextEscape < closest)) @@ -44,7 +57,7 @@ module.exports = function Tokenizer(data) { while (true) { var next = whatsNext(context); if (!next) { - var whatsLeft = data.substring(context.cursor); + var whatsLeft = chunk.substring(context.cursor); if (whatsLeft.length > 0) { tokenized.push(whatsLeft); context.cursor += whatsLeft.length; @@ -58,16 +71,16 @@ module.exports = function Tokenizer(data) { var oldMode; if (what == 'special') { - var fragment = data.substring(nextSpecial, context.cursor + '@font-face'.length + 1); + var fragment = chunk.substring(nextSpecial, context.cursor + '@font-face'.length + 1); var isSingle = fragment.indexOf('@import') === 0 || fragment.indexOf('@charset') === 0; if (isSingle) { - nextEnd = data.indexOf(';', nextSpecial + 1); - tokenized.push(data.substring(context.cursor, nextEnd + 1)); + nextEnd = chunk.indexOf(';', nextSpecial + 1); + tokenized.push(chunk.substring(context.cursor, nextEnd + 1)); context.cursor = nextEnd + 1; } else { - nextEnd = data.indexOf('{', nextSpecial + 1); - var block = data.substring(context.cursor, nextEnd).trim(); + nextEnd = chunk.indexOf('{', nextSpecial + 1); + var block = chunk.substring(context.cursor, nextEnd).trim(); var isFlat = fragment.indexOf('@font-face') === 0; oldMode = context.mode; @@ -79,13 +92,13 @@ module.exports = function Tokenizer(data) { tokenized.push({ block: block, body: specialBody }); } } else if (what == 'escape') { - nextEnd = data.indexOf('__', nextSpecial + 1); - var escaped = data.substring(context.cursor, nextEnd + 2); + nextEnd = chunk.indexOf('__', nextSpecial + 1); + var escaped = chunk.substring(context.cursor, nextEnd + 2); tokenized.push(escaped); context.cursor = nextEnd + 2; } else if (what == 'bodyStart') { - var selector = data.substring(context.cursor, nextSpecial).trim(); + var selector = chunk.substring(context.cursor, nextSpecial).trim(); oldMode = context.mode; context.cursor = nextSpecial + 1; @@ -102,7 +115,7 @@ module.exports = function Tokenizer(data) { } if (context.mode != 'block') - tokenized = data.substring(context.cursor, nextSpecial); + tokenized = chunk.substring(context.cursor, nextSpecial); context.cursor = nextSpecial + 1; @@ -119,3 +132,31 @@ module.exports = function Tokenizer(data) { } }; }; + +// Divides `data` into chunks of `chunkSize` for faster processing +var Chunker = function(data, chunkSize) { + var chunks = []; + for (var cursor = 0, dataSize = data.length; cursor < dataSize;) { + var nextCursor = cursor + chunkSize > dataSize ? + dataSize - 1 : + cursor + chunkSize; + + if (data[nextCursor] != '}') + nextCursor = data.indexOf('}', nextCursor); + if (nextCursor == -1) + nextCursor = data.length - 1; + + chunks.push(data.substring(cursor, nextCursor + 1)); + cursor = nextCursor + 1; + } + + return { + isEmpty: function() { + return chunks.length === 0; + }, + + next: function() { + return chunks.shift() || ''; + } + }; +};