From: Jakub Pawlowicz Date: Sun, 28 Sep 2014 21:36:27 +0000 (+0100) Subject: Improves Tokenizer. X-Git-Url: https://git.ndcode.org/public/gitweb.cgi?a=commitdiff_plain;h=ef93abfc8454bca8d98391e0ec53ee1d85235d4b;p=clean-css.git Improves Tokenizer. * Adds prototypal OO. * Adds specs. * Adds stripping some whitespace from selectors and properties. --- diff --git a/lib/selectors/optimizer.js b/lib/selectors/optimizer.js index 927e4442..c5e6b6e0 100644 --- a/lib/selectors/optimizer.js +++ b/lib/selectors/optimizer.js @@ -1,7 +1,7 @@ var Tokenizer = require('./tokenizer'); var PropertyOptimizer = require('../properties/optimizer'); -module.exports = function Optimizer(context, options) { +module.exports = function Optimizer(options, context) { var specialSelectors = { '*': /\-(moz|ms|o|webkit)\-/, 'ie8': /(\-moz\-|\-ms\-|\-o\-|\-webkit\-|:root|:nth|:first\-of|:last|:only|:empty|:target|:checked|::selection|:enabled|:disabled|:not)/, @@ -333,10 +333,9 @@ module.exports = function Optimizer(context, options) { return { process: function(data) { - return data; - // var tokenized = new Tokenizer(data, context).process(); + var tokenized = new Tokenizer(context).toTokens(data); // optimize(tokenized); - // return rebuild(tokenized); + return rebuild(tokenized); } }; }; diff --git a/lib/selectors/tokenizer.js b/lib/selectors/tokenizer.js index 3e960f9d..357d7421 100644 --- a/lib/selectors/tokenizer.js +++ b/lib/selectors/tokenizer.js @@ -1,142 +1,188 @@ var Chunker = require('../utils/chunker'); -module.exports = function Tokenizer(data, minifyContext) { - var chunker = new Chunker(data, '}', 128); - var chunk = chunker.next(); - var flatBlock = /(^@(font\-face|page|\-ms\-viewport|\-o\-viewport|viewport)|\\@.+?)/; +var flatBlock = /(^@(font\-face|page|\-ms\-viewport|\-o\-viewport|viewport)|\\@.+?)/; - var whatsNext = function(context) { - var cursor = context.cursor; - var mode = context.mode; - var closest; +function Tokenizer(minifyContext) { + this.minifyContext = minifyContext; +} - if (chunk.length == context.cursor) { - if (chunker.isEmpty()) - return null; +Tokenizer.prototype.toTokens = function (data) { + var chunker = new Chunker(data, '}', 128); + if (chunker.isEmpty()) + return []; + + var context = { + cursor: 0, + mode: 'top', + chunker: chunker, + chunk: chunker.next(), + outer: this.minifyContext + }; - chunk = chunker.next(); - context.cursor = 0; - } + return tokenize(context); +}; - if (mode == 'body') { - closest = chunk.indexOf('}', cursor); - return closest > -1 ? - [closest, 'bodyEnd'] : - null; +function extractProperties(string) { + return string + .replace(/\s{2,}/g, ' ') + .replace(/ ?: ?/g, ':') + .replace(/([\(,]) /g, '$1') + .replace(/ ([\),])/g, '$1') + .split(';') + .map(function (value) { return value.trim(); }) + .filter(function (value) { return value.length > 0; }); +} + +function extractSelectors(string) { + return string + .replace(/(\s{2,}|\s)/g, ' ') + .replace(/, /g, ',') + .split(',') + .map(function (value) { return value.trim(); }); +} + +function extractBlock(string) { + return string + .replace(/\s/g, ' ') + .replace(/\s{2,}/g, ' ') + .trim(); +} + +function whatsNext(context) { + var mode = context.mode; + var chunk = context.chunk; + var closest; + + if (chunk.length == context.cursor) { + if (context.chunker.isEmpty()) + return null; + + context.chunk = chunk = context.chunker.next(); + context.cursor = 0; + } + + if (mode == 'body') { + closest = chunk.indexOf('}', context.cursor); + return closest > -1 ? + [closest, 'bodyEnd'] : + null; + } + + var nextSpecial = chunk.indexOf('@', context.cursor); + var nextEscape = chunk.indexOf('__ESCAPED_', context.cursor); + var nextBodyStart = chunk.indexOf('{', context.cursor); + var nextBodyEnd = chunk.indexOf('}', context.cursor); + + closest = nextSpecial; + if (closest == -1 || (nextEscape > -1 && nextEscape < closest)) + closest = nextEscape; + if (closest == -1 || (nextBodyStart > -1 && nextBodyStart < closest)) + closest = nextBodyStart; + if (closest == -1 || (nextBodyEnd > -1 && nextBodyEnd < closest)) + closest = nextBodyEnd; + + if (closest == -1) + return; + if (nextEscape === closest) + return [closest, 'escape']; + if (nextBodyStart === closest) + return [closest, 'bodyStart']; + if (nextBodyEnd === closest) + return [closest, 'bodyEnd']; + if (nextSpecial === closest) + return [closest, 'special']; +} + +function tokenize(context) { + var chunk = context.chunk; + var tokenized = []; + + while (true) { + var next = whatsNext(context); + if (!next) { + var whatsLeft = context.chunk.substring(context.cursor); + if (whatsLeft.length > 0) { + tokenized.push(whatsLeft); + context.cursor += whatsLeft.length; + } + break; } - var nextSpecial = chunk.indexOf('@', context.cursor); - var nextEscape = mode == 'top' ? chunk.indexOf('__ESCAPED_COMMENT_CLEAN_CSS', context.cursor) : -1; - var nextBodyStart = chunk.indexOf('{', context.cursor); - var nextBodyEnd = chunk.indexOf('}', context.cursor); - - closest = nextSpecial; - if (closest == -1 || (nextEscape > -1 && nextEscape < closest)) - closest = nextEscape; - if (closest == -1 || (nextBodyStart > -1 && nextBodyStart < closest)) - closest = nextBodyStart; - if (closest == -1 || (nextBodyEnd > -1 && nextBodyEnd < closest)) - closest = nextBodyEnd; - - if (closest == -1) - return; - if (nextEscape === closest) - return [closest, 'escape']; - if (nextBodyStart === closest) - return [closest, 'bodyStart']; - if (nextBodyEnd === closest) - return [closest, 'bodyEnd']; - if (nextSpecial === closest) - return [closest, 'special']; - }; + var nextSpecial = next[0]; + var what = next[1]; + var nextEnd; + var oldMode; - var tokenize = function(context) { - var tokenized = []; + chunk = context.chunk; - context = context || { cursor: 0, mode: 'top' }; + if (what == 'special') { + var firstOpenBraceAt = chunk.indexOf('{', nextSpecial); + var firstSemicolonAt = chunk.indexOf(';', nextSpecial); + var isSingle = firstSemicolonAt > -1 && (firstOpenBraceAt == -1 || firstSemicolonAt < firstOpenBraceAt); + if (isSingle) { + nextEnd = chunk.indexOf(';', nextSpecial + 1); - while (true) { - var next = whatsNext(context); - if (!next) { - var whatsLeft = chunk.substring(context.cursor); - if (whatsLeft.length > 0) { - tokenized.push(whatsLeft); - context.cursor += whatsLeft.length; - } - break; - } + var single = extractBlock(chunk.substring(context.cursor, nextEnd + 1)); + tokenized.push(single); - var nextSpecial = next[0]; - var what = next[1]; - var nextEnd; - var oldMode; - - if (what == 'special') { - var firstOpenBraceAt = chunk.indexOf('{', nextSpecial); - var firstSemicolonAt = chunk.indexOf(';', nextSpecial); - var isSingle = firstSemicolonAt > -1 && (firstOpenBraceAt == -1 || firstSemicolonAt < firstOpenBraceAt); - if (isSingle) { - nextEnd = chunk.indexOf(';', nextSpecial + 1); - tokenized.push(chunk.substring(context.cursor, nextEnd + 1)); - - context.cursor = nextEnd + 1; - } else { - nextEnd = chunk.indexOf('{', nextSpecial + 1); - var block = chunk.substring(context.cursor, nextEnd).trim(); - - var isFlat = flatBlock.test(block); - oldMode = context.mode; - context.cursor = nextEnd + 1; - context.mode = isFlat ? 'body' : 'block'; - var specialBody = tokenize(context); - context.mode = oldMode; - - tokenized.push({ block: block, body: specialBody }); - } - } else if (what == 'escape') { - nextEnd = chunk.indexOf('__', nextSpecial + 1); - var escaped = chunk.substring(context.cursor, nextEnd + 2); - tokenized.push(escaped); - - context.cursor = nextEnd + 2; - } else if (what == 'bodyStart') { - var selector = chunk.substring(context.cursor, nextSpecial).trim(); + context.cursor = nextEnd + 1; + } else { + nextEnd = chunk.indexOf('{', nextSpecial + 1); + var block = chunk.substring(context.cursor, nextEnd).trim(); + var isFlat = flatBlock.test(block); oldMode = context.mode; - context.cursor = nextSpecial + 1; - context.mode = 'body'; - var body = tokenize(context); - context.mode = oldMode; - - tokenized.push({ selector: selector, body: body }); - } else if (what == 'bodyEnd') { - // extra closing brace at the top level can be safely ignored - if (context.mode == 'top') { - var at = context.cursor; - var warning = chunk[context.cursor] == '}' ? - 'Unexpected \'}\' in \'' + chunk.substring(at - 20, at + 20) + '\'. Ignoring.' : - 'Unexpected content: \'' + chunk.substring(at, nextSpecial + 1) + '\'. Ignoring.'; + context.cursor = nextEnd + 1; + context.mode = isFlat ? 'body' : 'block'; + var specialBody = tokenize(context); - minifyContext.warnings.push(warning); - context.cursor = nextSpecial + 1; - continue; - } + if (typeof specialBody == 'string') + specialBody = extractProperties(specialBody); - if (context.mode != 'block') - tokenized = chunk.substring(context.cursor, nextSpecial); + context.mode = oldMode; + tokenized.push({ block: block, body: specialBody }); + } + } else if (what == 'escape') { + nextEnd = chunk.indexOf('__', nextSpecial + 1); + var escaped = chunk.substring(context.cursor, nextEnd + 2); + tokenized.push(escaped); + + context.cursor = nextEnd + 2; + } else if (what == 'bodyStart') { + var selector = extractSelectors(chunk.substring(context.cursor, nextSpecial)); + + oldMode = context.mode; + context.cursor = nextSpecial + 1; + context.mode = 'body'; + var body = extractProperties(tokenize(context)); + + context.mode = oldMode; + + tokenized.push({ selector: selector, body: body }); + } else if (what == 'bodyEnd') { + // extra closing brace at the top level can be safely ignored + if (context.mode == 'top') { + var at = context.cursor; + var warning = chunk[context.cursor] == '}' ? + 'Unexpected \'}\' in \'' + chunk.substring(at - 20, at + 20) + '\'. Ignoring.' : + 'Unexpected content: \'' + chunk.substring(at, nextSpecial + 1) + '\'. Ignoring.'; + + context.outer.warnings.push(warning); context.cursor = nextSpecial + 1; - - break; + continue; } - } - return tokenized; - }; + if (context.mode != 'block') + tokenized = chunk.substring(context.cursor, nextSpecial); - return { - process: function() { - return tokenize(); + context.cursor = nextSpecial + 1; + + break; } - }; -}; + } + + return tokenized; +} + +module.exports = Tokenizer; diff --git a/test/selectors/tokenizer-test.js b/test/selectors/tokenizer-test.js new file mode 100644 index 00000000..4c077357 --- /dev/null +++ b/test/selectors/tokenizer-test.js @@ -0,0 +1,97 @@ +var vows = require('vows'); +var assert = require('assert'); +var Tokenizer = require('../../lib/selectors/tokenizer'); + +function tokenizerContext(config) { + var ctx = {}; + + function tokenized(target) { + return function (source) { + var tokenized = new Tokenizer({}).toTokens(source); + assert.deepEqual(target, tokenized); + }; + } + + for (var test in config) { + ctx[test] = { + topic: config[test][0], + tokenized: tokenized(config[test][1]) + }; + } + + return ctx; +} + +vows.describe(Tokenizer) + .addBatch( + tokenizerContext({ + 'no content': [ + '', + [] + ], + 'an escaped content': [ + '__ESCAPED_COMMENT_CLEAN_CSS0__', + ['__ESCAPED_COMMENT_CLEAN_CSS0__'] + ], + 'an empty selector': [ + 'a{}', + [{ selector: ['a'], body: [] }] + ], + 'an empty selector with whitespace': [ + 'a{ \n }', + [{ selector: ['a'], body: [] }] + ], + 'a selector': [ + 'a{color:red}', + [{ selector: ['a'], body: ['color:red'] }] + ], + 'a selector with whitespace': [ + 'a {color:red;\n\ndisplay : block }', + [{ selector: ['a'], body: ['color:red', 'display:block'] }] + ], + 'a selector with whitespace in functions': [ + 'a{color:rgba( 255, 255, 0, 0.5 )}', + [{ selector: ['a'], body: ['color:rgba(255,255,0,0.5)'] }] + ], + 'a selector with empty properties': [ + 'a{color:red; ; ; ;}', + [{ selector: ['a'], body: ['color:red'] }] + ], + 'a double selector': [ + 'a,\n\ndiv.class > p {color:red}', + [{ selector: ['a', 'div.class > p'], body: ['color:red'] }] + ], + 'two selectors': [ + 'a{color:red}div{color:blue}', + [ + { selector: ['a'], body: ['color:red'] }, + { selector: ['div'], body: ['color:blue'] } + ] + ], + 'media query': [ + '@media (min-width:980px){}', + [{ block: '@media (min-width:980px)', body: [] }] + ], + 'media query with selectors': [ + '@media (min-width:980px){a{color:red}}', + [{ block: '@media (min-width:980px)', body: [{ selector: ['a'], body: ['color:red'] }] }] + ], + 'media query spanning more than one chunk': [ + '@media only screen and (max-width:1319px) and (min--moz-device-pixel-ratio:1.5),only screen and (max-width:1319px) and (-moz-min-device-pixel-ratio:1.5){a{color:#000}}', + [{ block: '@media only screen and (max-width:1319px) and (min--moz-device-pixel-ratio:1.5),only screen and (max-width:1319px) and (-moz-min-device-pixel-ratio:1.5)', body: [{ selector: ['a'], body: ['color:#000'] }] }] + ], + 'font-face': [ + '@font-face{font-family: fontName;font-size:12px}', + [{ block: '@font-face', body: ['font-family:fontName', 'font-size:12px'] }] + ], + 'charset': [ + '@charset \'utf-8\';a{color:red}', + ['@charset \'utf-8\';', { selector: ['a'], body: ['color:red'] }] + ], + 'charset after a line break': [ + '\n@charset \n\'utf-8\';', + ['@charset \'utf-8\';'] + ] + }) + ) + .export(module);