1 import {isIdentifierStart, isIdentifierChar} from "./identifier.js"
2 import {Parser} from "./state.js"
3 import UNICODE_PROPERTY_VALUES from "./unicode-property-data.js"
4 import {hasOwn} from "./util.js"
6 const pp = Parser.prototype
8 export class RegExpValidationState {
11 this.validFlags = `gim${parser.options.ecmaVersion >= 6 ? "uy" : ""}${parser.options.ecmaVersion >= 9 ? "s" : ""}${parser.options.ecmaVersion >= 13 ? "d" : ""}`
12 this.unicodeProperties = UNICODE_PROPERTY_VALUES[parser.options.ecmaVersion >= 13 ? 13 : parser.options.ecmaVersion]
20 this.lastStringValue = ""
21 this.lastAssertionIsQuantifiable = false
22 this.numCapturingParens = 0
23 this.maxBackReference = 0
25 this.backReferenceNames = []
28 reset(start, pattern, flags) {
29 const unicode = flags.indexOf("u") !== -1
30 this.start = start | 0
31 this.source = pattern + ""
33 this.switchU = unicode && this.parser.options.ecmaVersion >= 6
34 this.switchN = unicode && this.parser.options.ecmaVersion >= 9
38 this.parser.raiseRecoverable(this.start, `Invalid regular expression: /${this.source}/: ${message}`)
41 // If u flag is given, this returns the code point at the index (it combines a surrogate pair).
42 // Otherwise, this returns the code unit of the index (can be a part of a surrogate pair).
43 at(i, forceU = false) {
49 const c = s.charCodeAt(i)
50 if (!(forceU || this.switchU) || c <= 0xD7FF || c >= 0xE000 || i + 1 >= l) {
53 const next = s.charCodeAt(i + 1)
54 return next >= 0xDC00 && next <= 0xDFFF ? (c << 10) + next - 0x35FDC00 : c
57 nextIndex(i, forceU = false) {
63 let c = s.charCodeAt(i), next
64 if (!(forceU || this.switchU) || c <= 0xD7FF || c >= 0xE000 || i + 1 >= l ||
65 (next = s.charCodeAt(i + 1)) < 0xDC00 || next > 0xDFFF) {
71 current(forceU = false) {
72 return this.at(this.pos, forceU)
75 lookahead(forceU = false) {
76 return this.at(this.nextIndex(this.pos, forceU), forceU)
79 advance(forceU = false) {
80 this.pos = this.nextIndex(this.pos, forceU)
83 eat(ch, forceU = false) {
84 if (this.current(forceU) === ch) {
92 function codePointToString(ch) {
93 if (ch <= 0xFFFF) return String.fromCharCode(ch)
95 return String.fromCharCode((ch >> 10) + 0xD800, (ch & 0x03FF) + 0xDC00)
99 * Validate the flags part of a given RegExpLiteral.
101 * @param {RegExpValidationState} state The state to validate RegExp.
104 pp.validateRegExpFlags = function(state) {
105 const validFlags = state.validFlags
106 const flags = state.flags
108 for (let i = 0; i < flags.length; i++) {
109 const flag = flags.charAt(i)
110 if (validFlags.indexOf(flag) === -1) {
111 this.raise(state.start, "Invalid regular expression flag")
113 if (flags.indexOf(flag, i + 1) > -1) {
114 this.raise(state.start, "Duplicate regular expression flag")
120 * Validate the pattern part of a given RegExpLiteral.
122 * @param {RegExpValidationState} state The state to validate RegExp.
125 pp.validateRegExpPattern = function(state) {
126 this.regexp_pattern(state)
128 // The goal symbol for the parse is |Pattern[~U, ~N]|. If the result of
129 // parsing contains a |GroupName|, reparse with the goal symbol
130 // |Pattern[~U, +N]| and use this result instead. Throw a *SyntaxError*
131 // exception if _P_ did not conform to the grammar, if any elements of _P_
132 // were not matched by the parse, or if any Early Error conditions exist.
133 if (!state.switchN && this.options.ecmaVersion >= 9 && state.groupNames.length > 0) {
135 this.regexp_pattern(state)
139 // https://www.ecma-international.org/ecma-262/8.0/#prod-Pattern
140 pp.regexp_pattern = function(state) {
142 state.lastIntValue = 0
143 state.lastStringValue = ""
144 state.lastAssertionIsQuantifiable = false
145 state.numCapturingParens = 0
146 state.maxBackReference = 0
147 state.groupNames.length = 0
148 state.backReferenceNames.length = 0
150 this.regexp_disjunction(state)
152 if (state.pos !== state.source.length) {
153 // Make the same messages as V8.
154 if (state.eat(0x29 /* ) */)) {
155 state.raise("Unmatched ')'")
157 if (state.eat(0x5D /* ] */) || state.eat(0x7D /* } */)) {
158 state.raise("Lone quantifier brackets")
161 if (state.maxBackReference > state.numCapturingParens) {
162 state.raise("Invalid escape")
164 for (const name of state.backReferenceNames) {
165 if (state.groupNames.indexOf(name) === -1) {
166 state.raise("Invalid named capture referenced")
171 // https://www.ecma-international.org/ecma-262/8.0/#prod-Disjunction
172 pp.regexp_disjunction = function(state) {
173 this.regexp_alternative(state)
174 while (state.eat(0x7C /* | */)) {
175 this.regexp_alternative(state)
178 // Make the same message as V8.
179 if (this.regexp_eatQuantifier(state, true)) {
180 state.raise("Nothing to repeat")
182 if (state.eat(0x7B /* { */)) {
183 state.raise("Lone quantifier brackets")
187 // https://www.ecma-international.org/ecma-262/8.0/#prod-Alternative
188 pp.regexp_alternative = function(state) {
189 while (state.pos < state.source.length && this.regexp_eatTerm(state))
193 // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-Term
194 pp.regexp_eatTerm = function(state) {
195 if (this.regexp_eatAssertion(state)) {
196 // Handle `QuantifiableAssertion Quantifier` alternative.
197 // `state.lastAssertionIsQuantifiable` is true if the last eaten Assertion
198 // is a QuantifiableAssertion.
199 if (state.lastAssertionIsQuantifiable && this.regexp_eatQuantifier(state)) {
200 // Make the same message as V8.
202 state.raise("Invalid quantifier")
208 if (state.switchU ? this.regexp_eatAtom(state) : this.regexp_eatExtendedAtom(state)) {
209 this.regexp_eatQuantifier(state)
216 // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-Assertion
217 pp.regexp_eatAssertion = function(state) {
218 const start = state.pos
219 state.lastAssertionIsQuantifiable = false
222 if (state.eat(0x5E /* ^ */) || state.eat(0x24 /* $ */)) {
227 if (state.eat(0x5C /* \ */)) {
228 if (state.eat(0x42 /* B */) || state.eat(0x62 /* b */)) {
234 // Lookahead / Lookbehind
235 if (state.eat(0x28 /* ( */) && state.eat(0x3F /* ? */)) {
236 let lookbehind = false
237 if (this.options.ecmaVersion >= 9) {
238 lookbehind = state.eat(0x3C /* < */)
240 if (state.eat(0x3D /* = */) || state.eat(0x21 /* ! */)) {
241 this.regexp_disjunction(state)
242 if (!state.eat(0x29 /* ) */)) {
243 state.raise("Unterminated group")
245 state.lastAssertionIsQuantifiable = !lookbehind
254 // https://www.ecma-international.org/ecma-262/8.0/#prod-Quantifier
255 pp.regexp_eatQuantifier = function(state, noError = false) {
256 if (this.regexp_eatQuantifierPrefix(state, noError)) {
257 state.eat(0x3F /* ? */)
263 // https://www.ecma-international.org/ecma-262/8.0/#prod-QuantifierPrefix
264 pp.regexp_eatQuantifierPrefix = function(state, noError) {
266 state.eat(0x2A /* * */) ||
267 state.eat(0x2B /* + */) ||
268 state.eat(0x3F /* ? */) ||
269 this.regexp_eatBracedQuantifier(state, noError)
272 pp.regexp_eatBracedQuantifier = function(state, noError) {
273 const start = state.pos
274 if (state.eat(0x7B /* { */)) {
275 let min = 0, max = -1
276 if (this.regexp_eatDecimalDigits(state)) {
277 min = state.lastIntValue
278 if (state.eat(0x2C /* , */) && this.regexp_eatDecimalDigits(state)) {
279 max = state.lastIntValue
281 if (state.eat(0x7D /* } */)) {
282 // SyntaxError in https://www.ecma-international.org/ecma-262/8.0/#sec-term
283 if (max !== -1 && max < min && !noError) {
284 state.raise("numbers out of order in {} quantifier")
289 if (state.switchU && !noError) {
290 state.raise("Incomplete quantifier")
297 // https://www.ecma-international.org/ecma-262/8.0/#prod-Atom
298 pp.regexp_eatAtom = function(state) {
300 this.regexp_eatPatternCharacters(state) ||
301 state.eat(0x2E /* . */) ||
302 this.regexp_eatReverseSolidusAtomEscape(state) ||
303 this.regexp_eatCharacterClass(state) ||
304 this.regexp_eatUncapturingGroup(state) ||
305 this.regexp_eatCapturingGroup(state)
308 pp.regexp_eatReverseSolidusAtomEscape = function(state) {
309 const start = state.pos
310 if (state.eat(0x5C /* \ */)) {
311 if (this.regexp_eatAtomEscape(state)) {
318 pp.regexp_eatUncapturingGroup = function(state) {
319 const start = state.pos
320 if (state.eat(0x28 /* ( */)) {
321 if (state.eat(0x3F /* ? */) && state.eat(0x3A /* : */)) {
322 this.regexp_disjunction(state)
323 if (state.eat(0x29 /* ) */)) {
326 state.raise("Unterminated group")
332 pp.regexp_eatCapturingGroup = function(state) {
333 if (state.eat(0x28 /* ( */)) {
334 if (this.options.ecmaVersion >= 9) {
335 this.regexp_groupSpecifier(state)
336 } else if (state.current() === 0x3F /* ? */) {
337 state.raise("Invalid group")
339 this.regexp_disjunction(state)
340 if (state.eat(0x29 /* ) */)) {
341 state.numCapturingParens += 1
344 state.raise("Unterminated group")
349 // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ExtendedAtom
350 pp.regexp_eatExtendedAtom = function(state) {
352 state.eat(0x2E /* . */) ||
353 this.regexp_eatReverseSolidusAtomEscape(state) ||
354 this.regexp_eatCharacterClass(state) ||
355 this.regexp_eatUncapturingGroup(state) ||
356 this.regexp_eatCapturingGroup(state) ||
357 this.regexp_eatInvalidBracedQuantifier(state) ||
358 this.regexp_eatExtendedPatternCharacter(state)
362 // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-InvalidBracedQuantifier
363 pp.regexp_eatInvalidBracedQuantifier = function(state) {
364 if (this.regexp_eatBracedQuantifier(state, true)) {
365 state.raise("Nothing to repeat")
370 // https://www.ecma-international.org/ecma-262/8.0/#prod-SyntaxCharacter
371 pp.regexp_eatSyntaxCharacter = function(state) {
372 const ch = state.current()
373 if (isSyntaxCharacter(ch)) {
374 state.lastIntValue = ch
380 function isSyntaxCharacter(ch) {
382 ch === 0x24 /* $ */ ||
383 ch >= 0x28 /* ( */ && ch <= 0x2B /* + */ ||
384 ch === 0x2E /* . */ ||
385 ch === 0x3F /* ? */ ||
386 ch >= 0x5B /* [ */ && ch <= 0x5E /* ^ */ ||
387 ch >= 0x7B /* { */ && ch <= 0x7D /* } */
391 // https://www.ecma-international.org/ecma-262/8.0/#prod-PatternCharacter
393 pp.regexp_eatPatternCharacters = function(state) {
394 const start = state.pos
396 while ((ch = state.current()) !== -1 && !isSyntaxCharacter(ch)) {
399 return state.pos !== start
402 // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ExtendedPatternCharacter
403 pp.regexp_eatExtendedPatternCharacter = function(state) {
404 const ch = state.current()
407 ch !== 0x24 /* $ */ &&
408 !(ch >= 0x28 /* ( */ && ch <= 0x2B /* + */) &&
409 ch !== 0x2E /* . */ &&
410 ch !== 0x3F /* ? */ &&
411 ch !== 0x5B /* [ */ &&
412 ch !== 0x5E /* ^ */ &&
424 pp.regexp_groupSpecifier = function(state) {
425 if (state.eat(0x3F /* ? */)) {
426 if (this.regexp_eatGroupName(state)) {
427 if (state.groupNames.indexOf(state.lastStringValue) !== -1) {
428 state.raise("Duplicate capture group name")
430 state.groupNames.push(state.lastStringValue)
433 state.raise("Invalid group")
438 // `<` RegExpIdentifierName `>`
439 // Note: this updates `state.lastStringValue` property with the eaten name.
440 pp.regexp_eatGroupName = function(state) {
441 state.lastStringValue = ""
442 if (state.eat(0x3C /* < */)) {
443 if (this.regexp_eatRegExpIdentifierName(state) && state.eat(0x3E /* > */)) {
446 state.raise("Invalid capture group name")
451 // RegExpIdentifierName ::
452 // RegExpIdentifierStart
453 // RegExpIdentifierName RegExpIdentifierPart
454 // Note: this updates `state.lastStringValue` property with the eaten name.
455 pp.regexp_eatRegExpIdentifierName = function(state) {
456 state.lastStringValue = ""
457 if (this.regexp_eatRegExpIdentifierStart(state)) {
458 state.lastStringValue += codePointToString(state.lastIntValue)
459 while (this.regexp_eatRegExpIdentifierPart(state)) {
460 state.lastStringValue += codePointToString(state.lastIntValue)
467 // RegExpIdentifierStart ::
471 // `\` RegExpUnicodeEscapeSequence[+U]
472 pp.regexp_eatRegExpIdentifierStart = function(state) {
473 const start = state.pos
474 const forceU = this.options.ecmaVersion >= 11
475 let ch = state.current(forceU)
476 state.advance(forceU)
478 if (ch === 0x5C /* \ */ && this.regexp_eatRegExpUnicodeEscapeSequence(state, forceU)) {
479 ch = state.lastIntValue
481 if (isRegExpIdentifierStart(ch)) {
482 state.lastIntValue = ch
489 function isRegExpIdentifierStart(ch) {
490 return isIdentifierStart(ch, true) || ch === 0x24 /* $ */ || ch === 0x5F /* _ */
493 // RegExpIdentifierPart ::
497 // `\` RegExpUnicodeEscapeSequence[+U]
500 pp.regexp_eatRegExpIdentifierPart = function(state) {
501 const start = state.pos
502 const forceU = this.options.ecmaVersion >= 11
503 let ch = state.current(forceU)
504 state.advance(forceU)
506 if (ch === 0x5C /* \ */ && this.regexp_eatRegExpUnicodeEscapeSequence(state, forceU)) {
507 ch = state.lastIntValue
509 if (isRegExpIdentifierPart(ch)) {
510 state.lastIntValue = ch
517 function isRegExpIdentifierPart(ch) {
518 return isIdentifierChar(ch, true) || ch === 0x24 /* $ */ || ch === 0x5F /* _ */ || ch === 0x200C /* <ZWNJ> */ || ch === 0x200D /* <ZWJ> */
521 // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-AtomEscape
522 pp.regexp_eatAtomEscape = function(state) {
524 this.regexp_eatBackReference(state) ||
525 this.regexp_eatCharacterClassEscape(state) ||
526 this.regexp_eatCharacterEscape(state) ||
527 (state.switchN && this.regexp_eatKGroupName(state))
532 // Make the same message as V8.
533 if (state.current() === 0x63 /* c */) {
534 state.raise("Invalid unicode escape")
536 state.raise("Invalid escape")
540 pp.regexp_eatBackReference = function(state) {
541 const start = state.pos
542 if (this.regexp_eatDecimalEscape(state)) {
543 const n = state.lastIntValue
545 // For SyntaxError in https://www.ecma-international.org/ecma-262/8.0/#sec-atomescape
546 if (n > state.maxBackReference) {
547 state.maxBackReference = n
551 if (n <= state.numCapturingParens) {
558 pp.regexp_eatKGroupName = function(state) {
559 if (state.eat(0x6B /* k */)) {
560 if (this.regexp_eatGroupName(state)) {
561 state.backReferenceNames.push(state.lastStringValue)
564 state.raise("Invalid named reference")
569 // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-CharacterEscape
570 pp.regexp_eatCharacterEscape = function(state) {
572 this.regexp_eatControlEscape(state) ||
573 this.regexp_eatCControlLetter(state) ||
574 this.regexp_eatZero(state) ||
575 this.regexp_eatHexEscapeSequence(state) ||
576 this.regexp_eatRegExpUnicodeEscapeSequence(state, false) ||
577 (!state.switchU && this.regexp_eatLegacyOctalEscapeSequence(state)) ||
578 this.regexp_eatIdentityEscape(state)
581 pp.regexp_eatCControlLetter = function(state) {
582 const start = state.pos
583 if (state.eat(0x63 /* c */)) {
584 if (this.regexp_eatControlLetter(state)) {
591 pp.regexp_eatZero = function(state) {
592 if (state.current() === 0x30 /* 0 */ && !isDecimalDigit(state.lookahead())) {
593 state.lastIntValue = 0
600 // https://www.ecma-international.org/ecma-262/8.0/#prod-ControlEscape
601 pp.regexp_eatControlEscape = function(state) {
602 const ch = state.current()
603 if (ch === 0x74 /* t */) {
604 state.lastIntValue = 0x09 /* \t */
608 if (ch === 0x6E /* n */) {
609 state.lastIntValue = 0x0A /* \n */
613 if (ch === 0x76 /* v */) {
614 state.lastIntValue = 0x0B /* \v */
618 if (ch === 0x66 /* f */) {
619 state.lastIntValue = 0x0C /* \f */
623 if (ch === 0x72 /* r */) {
624 state.lastIntValue = 0x0D /* \r */
631 // https://www.ecma-international.org/ecma-262/8.0/#prod-ControlLetter
632 pp.regexp_eatControlLetter = function(state) {
633 const ch = state.current()
634 if (isControlLetter(ch)) {
635 state.lastIntValue = ch % 0x20
641 function isControlLetter(ch) {
643 (ch >= 0x41 /* A */ && ch <= 0x5A /* Z */) ||
644 (ch >= 0x61 /* a */ && ch <= 0x7A /* z */)
648 // https://www.ecma-international.org/ecma-262/8.0/#prod-RegExpUnicodeEscapeSequence
649 pp.regexp_eatRegExpUnicodeEscapeSequence = function(state, forceU = false) {
650 const start = state.pos
651 const switchU = forceU || state.switchU
653 if (state.eat(0x75 /* u */)) {
654 if (this.regexp_eatFixedHexDigits(state, 4)) {
655 const lead = state.lastIntValue
656 if (switchU && lead >= 0xD800 && lead <= 0xDBFF) {
657 const leadSurrogateEnd = state.pos
658 if (state.eat(0x5C /* \ */) && state.eat(0x75 /* u */) && this.regexp_eatFixedHexDigits(state, 4)) {
659 const trail = state.lastIntValue
660 if (trail >= 0xDC00 && trail <= 0xDFFF) {
661 state.lastIntValue = (lead - 0xD800) * 0x400 + (trail - 0xDC00) + 0x10000
665 state.pos = leadSurrogateEnd
666 state.lastIntValue = lead
672 state.eat(0x7B /* { */) &&
673 this.regexp_eatHexDigits(state) &&
674 state.eat(0x7D /* } */) &&
675 isValidUnicode(state.lastIntValue)
680 state.raise("Invalid unicode escape")
687 function isValidUnicode(ch) {
688 return ch >= 0 && ch <= 0x10FFFF
691 // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-IdentityEscape
692 pp.regexp_eatIdentityEscape = function(state) {
694 if (this.regexp_eatSyntaxCharacter(state)) {
697 if (state.eat(0x2F /* / */)) {
698 state.lastIntValue = 0x2F /* / */
704 const ch = state.current()
705 if (ch !== 0x63 /* c */ && (!state.switchN || ch !== 0x6B /* k */)) {
706 state.lastIntValue = ch
714 // https://www.ecma-international.org/ecma-262/8.0/#prod-DecimalEscape
715 pp.regexp_eatDecimalEscape = function(state) {
716 state.lastIntValue = 0
717 let ch = state.current()
718 if (ch >= 0x31 /* 1 */ && ch <= 0x39 /* 9 */) {
720 state.lastIntValue = 10 * state.lastIntValue + (ch - 0x30 /* 0 */)
722 } while ((ch = state.current()) >= 0x30 /* 0 */ && ch <= 0x39 /* 9 */)
728 // https://www.ecma-international.org/ecma-262/8.0/#prod-CharacterClassEscape
729 pp.regexp_eatCharacterClassEscape = function(state) {
730 const ch = state.current()
732 if (isCharacterClassEscape(ch)) {
733 state.lastIntValue = -1
740 this.options.ecmaVersion >= 9 &&
741 (ch === 0x50 /* P */ || ch === 0x70 /* p */)
743 state.lastIntValue = -1
746 state.eat(0x7B /* { */) &&
747 this.regexp_eatUnicodePropertyValueExpression(state) &&
748 state.eat(0x7D /* } */)
752 state.raise("Invalid property name")
757 function isCharacterClassEscape(ch) {
759 ch === 0x64 /* d */ ||
760 ch === 0x44 /* D */ ||
761 ch === 0x73 /* s */ ||
762 ch === 0x53 /* S */ ||
763 ch === 0x77 /* w */ ||
768 // UnicodePropertyValueExpression ::
769 // UnicodePropertyName `=` UnicodePropertyValue
770 // LoneUnicodePropertyNameOrValue
771 pp.regexp_eatUnicodePropertyValueExpression = function(state) {
772 const start = state.pos
774 // UnicodePropertyName `=` UnicodePropertyValue
775 if (this.regexp_eatUnicodePropertyName(state) && state.eat(0x3D /* = */)) {
776 const name = state.lastStringValue
777 if (this.regexp_eatUnicodePropertyValue(state)) {
778 const value = state.lastStringValue
779 this.regexp_validateUnicodePropertyNameAndValue(state, name, value)
785 // LoneUnicodePropertyNameOrValue
786 if (this.regexp_eatLoneUnicodePropertyNameOrValue(state)) {
787 const nameOrValue = state.lastStringValue
788 this.regexp_validateUnicodePropertyNameOrValue(state, nameOrValue)
793 pp.regexp_validateUnicodePropertyNameAndValue = function(state, name, value) {
794 if (!hasOwn(state.unicodeProperties.nonBinary, name))
795 state.raise("Invalid property name")
796 if (!state.unicodeProperties.nonBinary[name].test(value))
797 state.raise("Invalid property value")
799 pp.regexp_validateUnicodePropertyNameOrValue = function(state, nameOrValue) {
800 if (!state.unicodeProperties.binary.test(nameOrValue))
801 state.raise("Invalid property name")
804 // UnicodePropertyName ::
805 // UnicodePropertyNameCharacters
806 pp.regexp_eatUnicodePropertyName = function(state) {
808 state.lastStringValue = ""
809 while (isUnicodePropertyNameCharacter(ch = state.current())) {
810 state.lastStringValue += codePointToString(ch)
813 return state.lastStringValue !== ""
815 function isUnicodePropertyNameCharacter(ch) {
816 return isControlLetter(ch) || ch === 0x5F /* _ */
819 // UnicodePropertyValue ::
820 // UnicodePropertyValueCharacters
821 pp.regexp_eatUnicodePropertyValue = function(state) {
823 state.lastStringValue = ""
824 while (isUnicodePropertyValueCharacter(ch = state.current())) {
825 state.lastStringValue += codePointToString(ch)
828 return state.lastStringValue !== ""
830 function isUnicodePropertyValueCharacter(ch) {
831 return isUnicodePropertyNameCharacter(ch) || isDecimalDigit(ch)
834 // LoneUnicodePropertyNameOrValue ::
835 // UnicodePropertyValueCharacters
836 pp.regexp_eatLoneUnicodePropertyNameOrValue = function(state) {
837 return this.regexp_eatUnicodePropertyValue(state)
840 // https://www.ecma-international.org/ecma-262/8.0/#prod-CharacterClass
841 pp.regexp_eatCharacterClass = function(state) {
842 if (state.eat(0x5B /* [ */)) {
843 state.eat(0x5E /* ^ */)
844 this.regexp_classRanges(state)
845 if (state.eat(0x5D /* ] */)) {
848 // Unreachable since it threw "unterminated regular expression" error before.
849 state.raise("Unterminated character class")
854 // https://www.ecma-international.org/ecma-262/8.0/#prod-ClassRanges
855 // https://www.ecma-international.org/ecma-262/8.0/#prod-NonemptyClassRanges
856 // https://www.ecma-international.org/ecma-262/8.0/#prod-NonemptyClassRangesNoDash
857 pp.regexp_classRanges = function(state) {
858 while (this.regexp_eatClassAtom(state)) {
859 const left = state.lastIntValue
860 if (state.eat(0x2D /* - */) && this.regexp_eatClassAtom(state)) {
861 const right = state.lastIntValue
862 if (state.switchU && (left === -1 || right === -1)) {
863 state.raise("Invalid character class")
865 if (left !== -1 && right !== -1 && left > right) {
866 state.raise("Range out of order in character class")
872 // https://www.ecma-international.org/ecma-262/8.0/#prod-ClassAtom
873 // https://www.ecma-international.org/ecma-262/8.0/#prod-ClassAtomNoDash
874 pp.regexp_eatClassAtom = function(state) {
875 const start = state.pos
877 if (state.eat(0x5C /* \ */)) {
878 if (this.regexp_eatClassEscape(state)) {
882 // Make the same message as V8.
883 const ch = state.current()
884 if (ch === 0x63 /* c */ || isOctalDigit(ch)) {
885 state.raise("Invalid class escape")
887 state.raise("Invalid escape")
892 const ch = state.current()
893 if (ch !== 0x5D /* ] */) {
894 state.lastIntValue = ch
902 // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ClassEscape
903 pp.regexp_eatClassEscape = function(state) {
904 const start = state.pos
906 if (state.eat(0x62 /* b */)) {
907 state.lastIntValue = 0x08 /* <BS> */
911 if (state.switchU && state.eat(0x2D /* - */)) {
912 state.lastIntValue = 0x2D /* - */
916 if (!state.switchU && state.eat(0x63 /* c */)) {
917 if (this.regexp_eatClassControlLetter(state)) {
924 this.regexp_eatCharacterClassEscape(state) ||
925 this.regexp_eatCharacterEscape(state)
929 // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ClassControlLetter
930 pp.regexp_eatClassControlLetter = function(state) {
931 const ch = state.current()
932 if (isDecimalDigit(ch) || ch === 0x5F /* _ */) {
933 state.lastIntValue = ch % 0x20
940 // https://www.ecma-international.org/ecma-262/8.0/#prod-HexEscapeSequence
941 pp.regexp_eatHexEscapeSequence = function(state) {
942 const start = state.pos
943 if (state.eat(0x78 /* x */)) {
944 if (this.regexp_eatFixedHexDigits(state, 2)) {
948 state.raise("Invalid escape")
955 // https://www.ecma-international.org/ecma-262/8.0/#prod-DecimalDigits
956 pp.regexp_eatDecimalDigits = function(state) {
957 const start = state.pos
959 state.lastIntValue = 0
960 while (isDecimalDigit(ch = state.current())) {
961 state.lastIntValue = 10 * state.lastIntValue + (ch - 0x30 /* 0 */)
964 return state.pos !== start
966 function isDecimalDigit(ch) {
967 return ch >= 0x30 /* 0 */ && ch <= 0x39 /* 9 */
970 // https://www.ecma-international.org/ecma-262/8.0/#prod-HexDigits
971 pp.regexp_eatHexDigits = function(state) {
972 const start = state.pos
974 state.lastIntValue = 0
975 while (isHexDigit(ch = state.current())) {
976 state.lastIntValue = 16 * state.lastIntValue + hexToInt(ch)
979 return state.pos !== start
981 function isHexDigit(ch) {
983 (ch >= 0x30 /* 0 */ && ch <= 0x39 /* 9 */) ||
984 (ch >= 0x41 /* A */ && ch <= 0x46 /* F */) ||
985 (ch >= 0x61 /* a */ && ch <= 0x66 /* f */)
988 function hexToInt(ch) {
989 if (ch >= 0x41 /* A */ && ch <= 0x46 /* F */) {
990 return 10 + (ch - 0x41 /* A */)
992 if (ch >= 0x61 /* a */ && ch <= 0x66 /* f */) {
993 return 10 + (ch - 0x61 /* a */)
995 return ch - 0x30 /* 0 */
998 // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-LegacyOctalEscapeSequence
999 // Allows only 0-377(octal) i.e. 0-255(decimal).
1000 pp.regexp_eatLegacyOctalEscapeSequence = function(state) {
1001 if (this.regexp_eatOctalDigit(state)) {
1002 const n1 = state.lastIntValue
1003 if (this.regexp_eatOctalDigit(state)) {
1004 const n2 = state.lastIntValue
1005 if (n1 <= 3 && this.regexp_eatOctalDigit(state)) {
1006 state.lastIntValue = n1 * 64 + n2 * 8 + state.lastIntValue
1008 state.lastIntValue = n1 * 8 + n2
1011 state.lastIntValue = n1
1018 // https://www.ecma-international.org/ecma-262/8.0/#prod-OctalDigit
1019 pp.regexp_eatOctalDigit = function(state) {
1020 const ch = state.current()
1021 if (isOctalDigit(ch)) {
1022 state.lastIntValue = ch - 0x30 /* 0 */
1026 state.lastIntValue = 0
1029 function isOctalDigit(ch) {
1030 return ch >= 0x30 /* 0 */ && ch <= 0x37 /* 7 */
1033 // https://www.ecma-international.org/ecma-262/8.0/#prod-Hex4Digits
1034 // https://www.ecma-international.org/ecma-262/8.0/#prod-HexDigit
1035 // And HexDigit HexDigit in https://www.ecma-international.org/ecma-262/8.0/#prod-HexEscapeSequence
1036 pp.regexp_eatFixedHexDigits = function(state, length) {
1037 const start = state.pos
1038 state.lastIntValue = 0
1039 for (let i = 0; i < length; ++i) {
1040 const ch = state.current()
1041 if (!isHexDigit(ch)) {
1045 state.lastIntValue = 16 * state.lastIntValue + hexToInt(ch)