Use proper characters when checking attribute value for validity when ommitting quotes;
authorJuriy Zaytsev <kangax@gmail.com>
Fri, 5 Feb 2010 23:01:26 +0000 (18:01 -0500)
committerJuriy Zaytsev <kangax@gmail.com>
Fri, 5 Feb 2010 23:01:26 +0000 (18:01 -0500)
Make sure lower-case doctype matches.

htmlparser.js
master.js

index 63c5123..a58e9f0 100644 (file)
@@ -29,7 +29,7 @@
        var startTag = /^<(\w+)((?:\s+[\w:-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)>/,
                  endTag = /^<\/(\w+)[^>]*>/,
                  attr = /([\w:-]+)(?:\s*=\s*(?:(?:"((?:\\.|[^"])*)")|(?:'((?:\\.|[^'])*)')|([^>\s]+)))?/g,
-                 doctype = /<!DOCTYPE [^>]+>/;
+                 doctype = /^<!DOCTYPE [^>]+>/i;
                
        // Empty Elements - HTML 4.01
        var empty = makeMap("area,base,basefont,br,col,frame,hr,img,input,isindex,link,meta,param,embed");
                                                chars = false;
                                        }
                                }
-             else if ( html.indexOf("<!DOCTYPE") == 0 ) {
-               match = html.match( doctype );
-
-               if ( match ) {
-                 if ( handler.doctype )
-                                               handler.doctype( match[0] );
-                                       html = html.substring( match[0].length );
-                                       chars = false;
-               }
+             else if ( match = doctype.exec( html )) {
+               if ( handler.doctype )
+                                               handler.doctype( match[0] );
+                                       html = html.substring( match[0].length );
+                                       chars = false;
+                               
                                // end tag
                                } else if ( html.indexOf("</") == 0 ) {
                                        match = html.match( endTag );
index 7fd9137..64f9123 100644 (file)
--- a/master.js
+++ b/master.js
@@ -32,7 +32,9 @@
     return str.replace(/^\s+/, ' ').replace(/\s+$/, ' ');
   }
   function canRemoveAttributeQuotes(value) {
-    return /^[\w-]+$/.test(value);
+    // http://www.w3.org/TR/html4/intro/sgmltut.html#attributes
+    // avoid \w, which could match unicode in certain implementations
+    return /^[a-zA-Z0-9-._:]+$/.test(value);
   }
   function isAttributeRedundant(tag, attributeName, attributeValue) {
     return (