Upgrade to https://github.com/acornjs/acorn.git commit 84eda6bf

[jst.git] / src / regexp.js
diff --git a/src/regexp.js b/src/regexp.js

index c18ee6d..37cc75b 100644 (file)
--- a/src/regexp.js
+++ b/src/regexp.js
@@ -1,13 +1,15 @@
  import {isIdentifierStart, isIdentifierChar} from "./identifier.js"
  import {Parser} from "./state.js"
  import UNICODE_PROPERTY_VALUES from "./unicode-property-data.js"
+import {hasOwn} from "./util.js"
  
  const pp = Parser.prototype
  
  export class RegExpValidationState {
    constructor(parser) {
      this.parser = parser
-    this.validFlags = `gim${parser.options.ecmaVersion >= 6 ? "uy" : ""}${parser.options.ecmaVersion >= 9 ? "s" : ""}`
+    this.validFlags = `gim${parser.options.ecmaVersion >= 6 ? "uy" : ""}${parser.options.ecmaVersion >= 9 ? "s" : ""}${parser.options.ecmaVersion >= 13 ? "d" : ""}`
+    this.unicodeProperties = UNICODE_PROPERTY_VALUES[parser.options.ecmaVersion >= 13 ? 13 : parser.options.ecmaVersion]
      this.source = ""
      this.flags = ""
      this.start = 0
@@ -38,47 +40,49 @@ export class RegExpValidationState {
  
    // If u flag is given, this returns the code point at the index (it combines a surrogate pair).
    // Otherwise, this returns the code unit of the index (can be a part of a surrogate pair).
-  at(i) {
+  at(i, forceU = false) {
      const s = this.source
      const l = s.length
      if (i >= l) {
        return -1
      }
      const c = s.charCodeAt(i)
-    if (!this.switchU || c <= 0xD7FF || c >= 0xE000 || i + 1 >= l) {
+    if (!(forceU || this.switchU) || c <= 0xD7FF || c >= 0xE000 || i + 1 >= l) {
        return c
      }
-    return (c << 10) + s.charCodeAt(i + 1) - 0x35FDC00
+    const next = s.charCodeAt(i + 1)
+    return next >= 0xDC00 && next <= 0xDFFF ? (c << 10) + next - 0x35FDC00 : c
    }
  
-  nextIndex(i) {
+  nextIndex(i, forceU = false) {
      const s = this.source
      const l = s.length
      if (i >= l) {
        return l
      }
-    const c = s.charCodeAt(i)
-    if (!this.switchU || c <= 0xD7FF || c >= 0xE000 || i + 1 >= l) {
+    let c = s.charCodeAt(i), next
+    if (!(forceU || this.switchU) || c <= 0xD7FF || c >= 0xE000 || i + 1 >= l ||
+        (next = s.charCodeAt(i + 1)) < 0xDC00 || next > 0xDFFF) {
        return i + 1
      }
      return i + 2
    }
  
-  current() {
-    return this.at(this.pos)
+  current(forceU = false) {
+    return this.at(this.pos, forceU)
    }
  
-  lookahead() {
-    return this.at(this.nextIndex(this.pos))
+  lookahead(forceU = false) {
+    return this.at(this.nextIndex(this.pos, forceU), forceU)
    }
  
-  advance() {
-    this.pos = this.nextIndex(this.pos)
+  advance(forceU = false) {
+    this.pos = this.nextIndex(this.pos, forceU)
    }
  
-  eat(ch) {
-    if (this.current() === ch) {
-      this.advance()
+  eat(ch, forceU = false) {
+    if (this.current(forceU) === ch) {
+      this.advance(forceU)
        return true
      }
      return false
@@ -150,7 +154,7 @@ pp.regexp_pattern = function(state) {
      if (state.eat(0x29 /* ) */)) {
        state.raise("Unmatched ')'")
      }
-    if (state.eat(0x5D /* [ */) || state.eat(0x7D /* } */)) {
+    if (state.eat(0x5D /* ] */) || state.eat(0x7D /* } */)) {
        state.raise("Lone quantifier brackets")
      }
    }
@@ -414,9 +418,9 @@ pp.regexp_eatExtendedPatternCharacter = function(state) {
    return false
  }
  
-// GroupSpecifier[U] ::
+// GroupSpecifier ::
  //   [empty]
-//   `?` GroupName[?U]
+//   `?` GroupName
  pp.regexp_groupSpecifier = function(state) {
    if (state.eat(0x3F /* ? */)) {
      if (this.regexp_eatGroupName(state)) {
@@ -430,8 +434,8 @@ pp.regexp_groupSpecifier = function(state) {
    }
  }
  
-// GroupName[U] ::
-//   `<` RegExpIdentifierName[?U] `>`
+// GroupName ::
+//   `<` RegExpIdentifierName `>`
  // Note: this updates `state.lastStringValue` property with the eaten name.
  pp.regexp_eatGroupName = function(state) {
    state.lastStringValue = ""
@@ -444,9 +448,9 @@ pp.regexp_eatGroupName = function(state) {
    return false
  }
  
-// RegExpIdentifierName[U] ::
-//   RegExpIdentifierStart[?U]
-//   RegExpIdentifierName[?U] RegExpIdentifierPart[?U]
+// RegExpIdentifierName ::
+//   RegExpIdentifierStart
+//   RegExpIdentifierName RegExpIdentifierPart
  // Note: this updates `state.lastStringValue` property with the eaten name.
  pp.regexp_eatRegExpIdentifierName = function(state) {
    state.lastStringValue = ""
@@ -460,17 +464,18 @@ pp.regexp_eatRegExpIdentifierName = function(state) {
    return false
  }
  
-// RegExpIdentifierStart[U] ::
+// RegExpIdentifierStart ::
  //   UnicodeIDStart
  //   `$`
  //   `_`
-//   `\` RegExpUnicodeEscapeSequence[?U]
+//   `\` RegExpUnicodeEscapeSequence[+U]
  pp.regexp_eatRegExpIdentifierStart = function(state) {
    const start = state.pos
-  let ch = state.current()
-  state.advance()
+  const forceU = this.options.ecmaVersion >= 11
+  let ch = state.current(forceU)
+  state.advance(forceU)
  
-  if (ch === 0x5C /* \ */ && this.regexp_eatRegExpUnicodeEscapeSequence(state)) {
+  if (ch === 0x5C /* \ */ && this.regexp_eatRegExpUnicodeEscapeSequence(state, forceU)) {
      ch = state.lastIntValue
    }
    if (isRegExpIdentifierStart(ch)) {
@@ -485,19 +490,20 @@ function isRegExpIdentifierStart(ch) {
    return isIdentifierStart(ch, true) || ch === 0x24 /* $ */ || ch === 0x5F /* _ */
  }
  
-// RegExpIdentifierPart[U] ::
+// RegExpIdentifierPart ::
  //   UnicodeIDContinue
  //   `$`
  //   `_`
-//   `\` RegExpUnicodeEscapeSequence[?U]
+//   `\` RegExpUnicodeEscapeSequence[+U]
  //   <ZWNJ>
  //   <ZWJ>
  pp.regexp_eatRegExpIdentifierPart = function(state) {
    const start = state.pos
-  let ch = state.current()
-  state.advance()
+  const forceU = this.options.ecmaVersion >= 11
+  let ch = state.current(forceU)
+  state.advance(forceU)
  
-  if (ch === 0x5C /* \ */ && this.regexp_eatRegExpUnicodeEscapeSequence(state)) {
+  if (ch === 0x5C /* \ */ && this.regexp_eatRegExpUnicodeEscapeSequence(state, forceU)) {
      ch = state.lastIntValue
    }
    if (isRegExpIdentifierPart(ch)) {
@@ -567,7 +573,7 @@ pp.regexp_eatCharacterEscape = function(state) {
      this.regexp_eatCControlLetter(state) ||
      this.regexp_eatZero(state) ||
      this.regexp_eatHexEscapeSequence(state) ||
-    this.regexp_eatRegExpUnicodeEscapeSequence(state) ||
+    this.regexp_eatRegExpUnicodeEscapeSequence(state, false) ||
      (!state.switchU && this.regexp_eatLegacyOctalEscapeSequence(state)) ||
      this.regexp_eatIdentityEscape(state)
    )
@@ -640,13 +646,14 @@ function isControlLetter(ch) {
  }
  
  // https://www.ecma-international.org/ecma-262/8.0/#prod-RegExpUnicodeEscapeSequence
-pp.regexp_eatRegExpUnicodeEscapeSequence = function(state) {
+pp.regexp_eatRegExpUnicodeEscapeSequence = function(state, forceU = false) {
    const start = state.pos
+  const switchU = forceU || state.switchU
  
    if (state.eat(0x75 /* u */)) {
      if (this.regexp_eatFixedHexDigits(state, 4)) {
        const lead = state.lastIntValue
-      if (state.switchU && lead >= 0xD800 && lead <= 0xDBFF) {
+      if (switchU && lead >= 0xD800 && lead <= 0xDBFF) {
          const leadSurrogateEnd = state.pos
          if (state.eat(0x5C /* \ */) && state.eat(0x75 /* u */) && this.regexp_eatFixedHexDigits(state, 4)) {
            const trail = state.lastIntValue
@@ -661,7 +668,7 @@ pp.regexp_eatRegExpUnicodeEscapeSequence = function(state) {
        return true
      }
      if (
-      state.switchU &&
+      switchU &&
        state.eat(0x7B /* { */) &&
        this.regexp_eatHexDigits(state) &&
        state.eat(0x7D /* } */) &&
@@ -669,7 +676,7 @@ pp.regexp_eatRegExpUnicodeEscapeSequence = function(state) {
      ) {
        return true
      }
-    if (state.switchU) {
+    if (switchU) {
        state.raise("Invalid unicode escape")
      }
      state.pos = start
@@ -784,14 +791,14 @@ pp.regexp_eatUnicodePropertyValueExpression = function(state) {
    return false
  }
  pp.regexp_validateUnicodePropertyNameAndValue = function(state, name, value) {
-  if (!UNICODE_PROPERTY_VALUES.hasOwnProperty(name) || UNICODE_PROPERTY_VALUES[name].indexOf(value) === -1) {
+  if (!hasOwn(state.unicodeProperties.nonBinary, name))
      state.raise("Invalid property name")
-  }
+  if (!state.unicodeProperties.nonBinary[name].test(value))
+    state.raise("Invalid property value")
  }
  pp.regexp_validateUnicodePropertyNameOrValue = function(state, nameOrValue) {
-  if (UNICODE_PROPERTY_VALUES.$LONE.indexOf(nameOrValue) === -1) {
+  if (!state.unicodeProperties.binary.test(nameOrValue))
      state.raise("Invalid property name")
-  }
  }
  
  // UnicodePropertyName ::
@@ -835,7 +842,7 @@ pp.regexp_eatCharacterClass = function(state) {
    if (state.eat(0x5B /* [ */)) {
      state.eat(0x5E /* ^ */)
      this.regexp_classRanges(state)
-    if (state.eat(0x5D /* [ */)) {
+    if (state.eat(0x5D /* ] */)) {
        return true
      }
      // Unreachable since it threw "unterminated regular expression" error before.
@@ -883,7 +890,7 @@ pp.regexp_eatClassAtom = function(state) {
    }
  
    const ch = state.current()
-  if (ch !== 0x5D /* [ */) {
+  if (ch !== 0x5D /* ] */) {
      state.lastIntValue = ch
      state.advance()
      return true