import {isIdentifierStart, isIdentifierChar} from "./identifier.js"
import {Parser} from "./state.js"
import UNICODE_PROPERTY_VALUES from "./unicode-property-data.js"
+import {hasOwn} from "./util.js"
const pp = Parser.prototype
export class RegExpValidationState {
constructor(parser) {
this.parser = parser
- this.validFlags = `gim${parser.options.ecmaVersion >= 6 ? "uy" : ""}${parser.options.ecmaVersion >= 9 ? "s" : ""}`
+ this.validFlags = `gim${parser.options.ecmaVersion >= 6 ? "uy" : ""}${parser.options.ecmaVersion >= 9 ? "s" : ""}${parser.options.ecmaVersion >= 13 ? "d" : ""}`
+ this.unicodeProperties = UNICODE_PROPERTY_VALUES[parser.options.ecmaVersion >= 13 ? 13 : parser.options.ecmaVersion]
this.source = ""
this.flags = ""
this.start = 0
// If u flag is given, this returns the code point at the index (it combines a surrogate pair).
// Otherwise, this returns the code unit of the index (can be a part of a surrogate pair).
- at(i) {
+ at(i, forceU = false) {
const s = this.source
const l = s.length
if (i >= l) {
return -1
}
const c = s.charCodeAt(i)
- if (!this.switchU || c <= 0xD7FF || c >= 0xE000 || i + 1 >= l) {
+ if (!(forceU || this.switchU) || c <= 0xD7FF || c >= 0xE000 || i + 1 >= l) {
return c
}
- return (c << 10) + s.charCodeAt(i + 1) - 0x35FDC00
+ const next = s.charCodeAt(i + 1)
+ return next >= 0xDC00 && next <= 0xDFFF ? (c << 10) + next - 0x35FDC00 : c
}
- nextIndex(i) {
+ nextIndex(i, forceU = false) {
const s = this.source
const l = s.length
if (i >= l) {
return l
}
- const c = s.charCodeAt(i)
- if (!this.switchU || c <= 0xD7FF || c >= 0xE000 || i + 1 >= l) {
+ let c = s.charCodeAt(i), next
+ if (!(forceU || this.switchU) || c <= 0xD7FF || c >= 0xE000 || i + 1 >= l ||
+ (next = s.charCodeAt(i + 1)) < 0xDC00 || next > 0xDFFF) {
return i + 1
}
return i + 2
}
- current() {
- return this.at(this.pos)
+ current(forceU = false) {
+ return this.at(this.pos, forceU)
}
- lookahead() {
- return this.at(this.nextIndex(this.pos))
+ lookahead(forceU = false) {
+ return this.at(this.nextIndex(this.pos, forceU), forceU)
}
- advance() {
- this.pos = this.nextIndex(this.pos)
+ advance(forceU = false) {
+ this.pos = this.nextIndex(this.pos, forceU)
}
- eat(ch) {
- if (this.current() === ch) {
- this.advance()
+ eat(ch, forceU = false) {
+ if (this.current(forceU) === ch) {
+ this.advance(forceU)
return true
}
return false
if (state.eat(0x29 /* ) */)) {
state.raise("Unmatched ')'")
}
- if (state.eat(0x5D /* [ */) || state.eat(0x7D /* } */)) {
+ if (state.eat(0x5D /* ] */) || state.eat(0x7D /* } */)) {
state.raise("Lone quantifier brackets")
}
}
return false
}
-// GroupSpecifier[U] ::
+// GroupSpecifier ::
// [empty]
-// `?` GroupName[?U]
+// `?` GroupName
pp.regexp_groupSpecifier = function(state) {
if (state.eat(0x3F /* ? */)) {
if (this.regexp_eatGroupName(state)) {
}
}
-// GroupName[U] ::
-// `<` RegExpIdentifierName[?U] `>`
+// GroupName ::
+// `<` RegExpIdentifierName `>`
// Note: this updates `state.lastStringValue` property with the eaten name.
pp.regexp_eatGroupName = function(state) {
state.lastStringValue = ""
return false
}
-// RegExpIdentifierName[U] ::
-// RegExpIdentifierStart[?U]
-// RegExpIdentifierName[?U] RegExpIdentifierPart[?U]
+// RegExpIdentifierName ::
+// RegExpIdentifierStart
+// RegExpIdentifierName RegExpIdentifierPart
// Note: this updates `state.lastStringValue` property with the eaten name.
pp.regexp_eatRegExpIdentifierName = function(state) {
state.lastStringValue = ""
return false
}
-// RegExpIdentifierStart[U] ::
+// RegExpIdentifierStart ::
// UnicodeIDStart
// `$`
// `_`
-// `\` RegExpUnicodeEscapeSequence[?U]
+// `\` RegExpUnicodeEscapeSequence[+U]
pp.regexp_eatRegExpIdentifierStart = function(state) {
const start = state.pos
- let ch = state.current()
- state.advance()
+ const forceU = this.options.ecmaVersion >= 11
+ let ch = state.current(forceU)
+ state.advance(forceU)
- if (ch === 0x5C /* \ */ && this.regexp_eatRegExpUnicodeEscapeSequence(state)) {
+ if (ch === 0x5C /* \ */ && this.regexp_eatRegExpUnicodeEscapeSequence(state, forceU)) {
ch = state.lastIntValue
}
if (isRegExpIdentifierStart(ch)) {
return isIdentifierStart(ch, true) || ch === 0x24 /* $ */ || ch === 0x5F /* _ */
}
-// RegExpIdentifierPart[U] ::
+// RegExpIdentifierPart ::
// UnicodeIDContinue
// `$`
// `_`
-// `\` RegExpUnicodeEscapeSequence[?U]
+// `\` RegExpUnicodeEscapeSequence[+U]
// <ZWNJ>
// <ZWJ>
pp.regexp_eatRegExpIdentifierPart = function(state) {
const start = state.pos
- let ch = state.current()
- state.advance()
+ const forceU = this.options.ecmaVersion >= 11
+ let ch = state.current(forceU)
+ state.advance(forceU)
- if (ch === 0x5C /* \ */ && this.regexp_eatRegExpUnicodeEscapeSequence(state)) {
+ if (ch === 0x5C /* \ */ && this.regexp_eatRegExpUnicodeEscapeSequence(state, forceU)) {
ch = state.lastIntValue
}
if (isRegExpIdentifierPart(ch)) {
this.regexp_eatCControlLetter(state) ||
this.regexp_eatZero(state) ||
this.regexp_eatHexEscapeSequence(state) ||
- this.regexp_eatRegExpUnicodeEscapeSequence(state) ||
+ this.regexp_eatRegExpUnicodeEscapeSequence(state, false) ||
(!state.switchU && this.regexp_eatLegacyOctalEscapeSequence(state)) ||
this.regexp_eatIdentityEscape(state)
)
}
// https://www.ecma-international.org/ecma-262/8.0/#prod-RegExpUnicodeEscapeSequence
-pp.regexp_eatRegExpUnicodeEscapeSequence = function(state) {
+pp.regexp_eatRegExpUnicodeEscapeSequence = function(state, forceU = false) {
const start = state.pos
+ const switchU = forceU || state.switchU
if (state.eat(0x75 /* u */)) {
if (this.regexp_eatFixedHexDigits(state, 4)) {
const lead = state.lastIntValue
- if (state.switchU && lead >= 0xD800 && lead <= 0xDBFF) {
+ if (switchU && lead >= 0xD800 && lead <= 0xDBFF) {
const leadSurrogateEnd = state.pos
if (state.eat(0x5C /* \ */) && state.eat(0x75 /* u */) && this.regexp_eatFixedHexDigits(state, 4)) {
const trail = state.lastIntValue
return true
}
if (
- state.switchU &&
+ switchU &&
state.eat(0x7B /* { */) &&
this.regexp_eatHexDigits(state) &&
state.eat(0x7D /* } */) &&
) {
return true
}
- if (state.switchU) {
+ if (switchU) {
state.raise("Invalid unicode escape")
}
state.pos = start
return false
}
pp.regexp_validateUnicodePropertyNameAndValue = function(state, name, value) {
- if (!UNICODE_PROPERTY_VALUES.hasOwnProperty(name) || UNICODE_PROPERTY_VALUES[name].indexOf(value) === -1) {
+ if (!hasOwn(state.unicodeProperties.nonBinary, name))
state.raise("Invalid property name")
- }
+ if (!state.unicodeProperties.nonBinary[name].test(value))
+ state.raise("Invalid property value")
}
pp.regexp_validateUnicodePropertyNameOrValue = function(state, nameOrValue) {
- if (UNICODE_PROPERTY_VALUES.$LONE.indexOf(nameOrValue) === -1) {
+ if (!state.unicodeProperties.binary.test(nameOrValue))
state.raise("Invalid property name")
- }
}
// UnicodePropertyName ::
if (state.eat(0x5B /* [ */)) {
state.eat(0x5E /* ^ */)
this.regexp_classRanges(state)
- if (state.eat(0x5D /* [ */)) {
+ if (state.eat(0x5D /* ] */)) {
return true
}
// Unreachable since it threw "unterminated regular expression" error before.
}
const ch = state.current()
- if (ch !== 0x5D /* [ */) {
+ if (ch !== 0x5D /* ] */) {
state.lastIntValue = ch
state.advance()
return true