From 90a5357893ad60597acc8e6c96f353ea594078be Mon Sep 17 00:00:00 2001 From: Duncan Beevers Date: Sat, 26 Jul 2014 17:39:57 -0500 Subject: [PATCH] Accept multiple assignment join regexps --- src/htmlparser.js | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/src/htmlparser.js b/src/htmlparser.js index e33368f..ef0053b 100644 --- a/src/htmlparser.js +++ b/src/htmlparser.js @@ -32,18 +32,18 @@ 'use strict'; // Regular Expressions for parsing tags and attributes - var startTagOpen = /^<([\w:-]+)/, - startTagAttrs = /(?:\s*[\w:-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*/, - startTagClose = /\s*(\/?)>/, - endTag = /^<\/([\w:-]+)[^>]*>/, - endingSlash = /\/>$/, - singleAttrIdentifier = /([\w:-]+)/, + var singleAttrIdentifier = /([\w:-]+)/, singleAttrAssign = /=/, + singleAttrAssigns = [ singleAttrAssign ], singleAttrValues = [ /"((?:\\.|[^"])*)"/.source, // attr value double quotes /'((?:\\.|[^'])*)'/.source, // attr value, single quotes /([^>\s]+)/.source // attr value, no quotes ], + startTagOpen = /^<([\w:-]+)/, + startTagClose = /\s*(\/?)>/, + endTag = /^<\/([\w:-]+)[^>]*>/, + endingSlash = /\/>$/, doctype = /^]+>/i, startIgnore = /<(%|\?)/, endIgnore = /(%|\?)>/; @@ -72,6 +72,15 @@ function startTagForHandler( handler ) { var customStartTagAttrs; + var startTagAttrs = new RegExp( + '(?:\\s*[\\w:-]+' + + '(?:\\s*' + + '(?:' + joinSingleAttrAssigns(handler) + ')' + + '\\s*(?:(?:"[^"]*")|(?:\'[^\']*\')|[^>\\s]+)' + + ')?' + + ')*' + ); + if ( handler.customAttrSurround ) { var attrClauses = []; @@ -101,9 +110,7 @@ var singleAttr = new RegExp( singleAttrIdentifier.source + '(?:\\s*' - + '(' - + singleAttrAssign.source - + ')' + + '(' + joinSingleAttrAssigns( handler ) + ')' + '\\s*' + '(?:' + singleAttrValues.join('|') @@ -129,6 +136,13 @@ } } + function joinSingleAttrAssigns( handler ) { + return singleAttrAssigns.concat( + handler.customAttrAssign || [] + ).map(function (assign) { + return '(?:' + assign.source + ')'; + }).join('|'); + } var HTMLParser = global.HTMLParser = function( html, handler ) { var index, chars, match, stack = [], last = html, prevTag, nextTag; -- 2.34.1