| | |
| | | // one of & - ! # % , : ; < = > @ _ ` ~ |
| | | // |
| | | |
| | | "use strict"; |
| | | (function() { |
| | | |
| | | var fromCodePoint = String.fromCodePoint || (function() { |
| | |
| | | function createCharacter(matches) { |
| | | var _char = matches[0]; |
| | | var first = _char.charCodeAt(0); |
| | | if (hasUnicodeFlag) { |
| | | if (isUnicodeMode) { |
| | | var second; |
| | | if (_char.length === 1 && first >= 0xD800 && first <= 0xDBFF) { |
| | | second = lookahead().charCodeAt(0); |
| | |
| | | }); |
| | | } |
| | | |
| | | function createQuantifier(min, max, from, to) { |
| | | function createQuantifier(min, max, from, to, symbol) { |
| | | if (to == null) { |
| | | from = pos - 1; |
| | | to = pos; |
| | |
| | | max: max, |
| | | greedy: true, |
| | | body: null, // set later on |
| | | symbol: symbol, |
| | | range: [ |
| | | from, |
| | | to |
| | |
| | | } |
| | | |
| | | var atom = parseAtomAndExtendedAtom(); |
| | | var quantifier; |
| | | if (!atom) { |
| | | // Check if a quantifier is following. A quantifier without an atom |
| | | // is an error. |
| | | pos_backup = pos |
| | | var quantifier = parseQuantifier() || false; |
| | | var pos_backup = pos |
| | | quantifier = parseQuantifier() || false; |
| | | if (quantifier) { |
| | | pos = pos_backup |
| | | bail('Expected atom'); |
| | |
| | | |
| | | // If no unicode flag, then try to parse ExtendedAtom -> ExtendedPatternCharacter. |
| | | // ExtendedPatternCharacter |
| | | if (!hasUnicodeFlag && (res = matchReg(/^{/))) { |
| | | var res; |
| | | if (!isUnicodeMode && (res = matchReg(/^{/))) { |
| | | atom = createCharacter(res); |
| | | } else { |
| | | bail('Expected atom'); |
| | | } |
| | | } |
| | | var quantifier = parseQuantifier() || false; |
| | | quantifier = parseQuantifier() || false; |
| | | if (quantifier) { |
| | | quantifier.body = flattenBody(atom); |
| | | // The quantifier contains the atom. Therefore, the beginning of the |
| | |
| | | // \ B |
| | | // ( ? = Disjunction ) |
| | | // ( ? ! Disjunction ) |
| | | var res, from = pos; |
| | | |
| | | if (match('^')) { |
| | | return createAnchor('start', 1 /* rawLength */); |
| | |
| | | var min, max; |
| | | |
| | | if (match('*')) { |
| | | quantifier = createQuantifier(0); |
| | | quantifier = createQuantifier(0, undefined, undefined, undefined, '*'); |
| | | } |
| | | else if (match('+')) { |
| | | quantifier = createQuantifier(1); |
| | | quantifier = createQuantifier(1, undefined, undefined, undefined, "+"); |
| | | } |
| | | else if (match('?')) { |
| | | quantifier = createQuantifier(0, 1); |
| | | quantifier = createQuantifier(0, 1, undefined, undefined, "?"); |
| | | } |
| | | else if (res = matchReg(/^\{([0-9]+)\}/)) { |
| | | min = parseInt(res[1], 10); |
| | |
| | | bail('numbers out of order in {} quantifier', '', from, pos); |
| | | } |
| | | quantifier = createQuantifier(min, max, res.range[0], res.range[1]); |
| | | } |
| | | |
| | | if ((min && !Number.isSafeInteger(min)) || (max && !Number.isSafeInteger(max))) { |
| | | bail("iterations outside JS safe integer range in quantifier", "", from, pos); |
| | | } |
| | | |
| | | if (quantifier) { |
| | |
| | | // PatternCharacter |
| | | return createCharacter(res); |
| | | } |
| | | else if (!hasUnicodeFlag && (res = matchReg(/^(?:]|})/))) { |
| | | else if (!isUnicodeMode && (res = matchReg(/^(?:]|})/))) { |
| | | // ExtendedPatternCharacter, first part. See parseTerm. |
| | | return createCharacter(res); |
| | | } |
| | |
| | | // \ AtomEscape |
| | | res = parseAtomEscape(); |
| | | if (!res) { |
| | | if (!hasUnicodeFlag && lookahead() == 'c') { |
| | | if (!isUnicodeMode && lookahead() == 'c') { |
| | | // B.1.4 ExtendedAtom |
| | | // \[lookahead = c] |
| | | return createValue('symbol', 92, pos - 1, pos); |
| | |
| | | } |
| | | |
| | | function parseUnicodeSurrogatePairEscape(firstEscape) { |
| | | if (hasUnicodeFlag) { |
| | | if (isUnicodeMode) { |
| | | var first, second; |
| | | if (firstEscape.kind == 'unicodeEscape' && |
| | | (first = firstEscape.codePoint) >= 0xD800 && first <= 0xDBFF && |
| | |
| | | |
| | | var res, from = pos; |
| | | |
| | | res = parseDecimalEscape() || parseNamedReference(); |
| | | res = parseDecimalEscape(insideCharacterClass) || parseNamedReference(); |
| | | if (res) { |
| | | return res; |
| | | } |
| | |
| | | return createEscaped('singleEscape', 0x0008, '\\b'); |
| | | } else if (match('B')) { |
| | | bail('\\B not possible inside of CharacterClass', '', from); |
| | | } else if (!hasUnicodeFlag && (res = matchReg(/^c([0-9])/))) { |
| | | } else if (!isUnicodeMode && (res = matchReg(/^c([0-9])/))) { |
| | | // B.1.4 |
| | | // c ClassControlLetter, ClassControlLetter = DecimalDigit |
| | | return createEscaped('controlLetter', res[1] + 16, res[1], 2); |
| | | } else if (!hasUnicodeFlag && (res = matchReg(/^c_/))) { |
| | | } else if (!isUnicodeMode && (res = matchReg(/^c_/))) { |
| | | // B.1.4 |
| | | // c ClassControlLetter, ClassControlLetter = _ |
| | | return createEscaped('controlLetter', 31, '_', 2); |
| | | } |
| | | // [+U] - |
| | | if (hasUnicodeFlag && match('-')) { |
| | | if (isUnicodeMode && match('-')) { |
| | | return createEscaped('singleEscape', 0x002d, '\\-'); |
| | | } |
| | | } |
| | |
| | | } |
| | | |
| | | |
| | | function parseDecimalEscape() { |
| | | function parseDecimalEscape(insideCharacterClass) { |
| | | // DecimalEscape :: |
| | | // DecimalIntegerLiteral [lookahead ∉ DecimalDigit] |
| | | |
| | | var res, match; |
| | | var res, match, from = pos; |
| | | |
| | | if (res = matchReg(/^(?!0)\d+/)) { |
| | | match = res[0]; |
| | | var refIdx = parseInt(res[0], 10); |
| | | if (refIdx <= closedCaptureCounter) { |
| | | if (refIdx <= closedCaptureCounter && !insideCharacterClass) { |
| | | // If the number is smaller than the normal-groups found so |
| | | // far, then it is a reference... |
| | | return createReference(res[0]); |
| | |
| | | // Recall the negative decision to decide if the input must be parsed |
| | | // a second time with the total normal-groups. |
| | | backrefDenied.push(refIdx); |
| | | |
| | | // \1 octal escapes are disallowed in unicode mode, but they might |
| | | // be references to groups which haven't been parsed yet. |
| | | // We must parse a second time to determine if \1 is a reference |
| | | // or an octal scape, and then we can report the error. |
| | | if (firstIteration) { |
| | | shouldReparse = true; |
| | | } else { |
| | | bailOctalEscapeIfUnicode(from, pos); |
| | | } |
| | | |
| | | // Reset the position again, as maybe only parts of the previous |
| | | // matched numbers are actual octal numbers. E.g. in '019' only |
| | |
| | | // /\091/.exec('\091')[0].length === 3 |
| | | else if (res = matchReg(/^[0-7]{1,3}/)) { |
| | | match = res[0]; |
| | | if (match !== '0') { |
| | | bailOctalEscapeIfUnicode(from, pos); |
| | | } |
| | | if (/^0{1,3}$/.test(match)) { |
| | | // If they are all zeros, then only take the first one. |
| | | return createEscaped('null', 0x0000, '0', match.length); |
| | |
| | | return false; |
| | | } |
| | | |
| | | function bailOctalEscapeIfUnicode(from, pos) { |
| | | if (isUnicodeMode) { |
| | | bail("Invalid decimal escape in unicode mode", null, from, pos); |
| | | } |
| | | } |
| | | |
| | | function parseCharacterClassEscape() { |
| | | // CharacterClassEscape :: one of d D s S w W |
| | | var res; |
| | | if (res = matchReg(/^[dDsSwW]/)) { |
| | | return createCharacterClassEscape(res[0]); |
| | | } else if (features.unicodePropertyEscape && (hasUnicodeFlag || hasUnicodeSetFlag) && (res = matchReg(/^([pP])\{([^\}]+)\}/))) { |
| | | } else if (features.unicodePropertyEscape && isUnicodeMode && (res = matchReg(/^([pP])\{([^\}]+)\}/))) { |
| | | // https://github.com/jviereck/regjsparser/issues/77 |
| | | return addRaw({ |
| | | type: 'unicodePropertyEscape', |
| | |
| | | range: [res.range[0] - 1, res.range[1]], |
| | | raw: res[0] |
| | | }); |
| | | } else if (features.unicodeSet && hasUnicodeSetFlag && match('q{')) { |
| | | return parseClassStrings(); |
| | | } |
| | | return false; |
| | | } |
| | |
| | | return parseUnicodeSurrogatePairEscape( |
| | | createEscaped('unicodeEscape', parseInt(res[1], 16), res[1], 2) |
| | | ); |
| | | } else if (hasUnicodeFlag && (res = matchReg(/^u\{([0-9a-fA-F]+)\}/))) { |
| | | } else if (isUnicodeMode && (res = matchReg(/^u\{([0-9a-fA-F]+)\}/))) { |
| | | // RegExpUnicodeEscapeSequence (ES6 Unicode code point escape) |
| | | return createEscaped('unicodeCodePointEscape', parseInt(res[1], 16), res[1], 4); |
| | | } |
| | |
| | | // Taken from the Esprima parser. |
| | | function isIdentifierPart(ch) { |
| | | // Generated by `tools/generate-identifier-regex.js`. |
| | | // eslint-disable-next-line no-misleading-character-class |
| | | var NonAsciiIdentifierPartOnly = /[0-9_\xB7\u0300-\u036F\u0387\u0483-\u0487\u0591-\u05BD\u05BF\u05C1\u05C2\u05C4\u05C5\u05C7\u0610-\u061A\u064B-\u0669\u0670\u06D6-\u06DC\u06DF-\u06E4\u06E7\u06E8\u06EA-\u06ED\u06F0-\u06F9\u0711\u0730-\u074A\u07A6-\u07B0\u07C0-\u07C9\u07EB-\u07F3\u07FD\u0816-\u0819\u081B-\u0823\u0825-\u0827\u0829-\u082D\u0859-\u085B\u08D3-\u08E1\u08E3-\u0903\u093A-\u093C\u093E-\u094F\u0951-\u0957\u0962\u0963\u0966-\u096F\u0981-\u0983\u09BC\u09BE-\u09C4\u09C7\u09C8\u09CB-\u09CD\u09D7\u09E2\u09E3\u09E6-\u09EF\u09FE\u0A01-\u0A03\u0A3C\u0A3E-\u0A42\u0A47\u0A48\u0A4B-\u0A4D\u0A51\u0A66-\u0A71\u0A75\u0A81-\u0A83\u0ABC\u0ABE-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AE2\u0AE3\u0AE6-\u0AEF\u0AFA-\u0AFF\u0B01-\u0B03\u0B3C\u0B3E-\u0B44\u0B47\u0B48\u0B4B-\u0B4D\u0B56\u0B57\u0B62\u0B63\u0B66-\u0B6F\u0B82\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE6-\u0BEF\u0C00-\u0C04\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55\u0C56\u0C62\u0C63\u0C66-\u0C6F\u0C81-\u0C83\u0CBC\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5\u0CD6\u0CE2\u0CE3\u0CE6-\u0CEF\u0D00-\u0D03\u0D3B\u0D3C\u0D3E-\u0D44\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D62\u0D63\u0D66-\u0D6F\u0D82\u0D83\u0DCA\u0DCF-\u0DD4\u0DD6\u0DD8-\u0DDF\u0DE6-\u0DEF\u0DF2\u0DF3\u0E31\u0E34-\u0E3A\u0E47-\u0E4E\u0E50-\u0E59\u0EB1\u0EB4-\u0EB9\u0EBB\u0EBC\u0EC8-\u0ECD\u0ED0-\u0ED9\u0F18\u0F19\u0F20-\u0F29\u0F35\u0F37\u0F39\u0F3E\u0F3F\u0F71-\u0F84\u0F86\u0F87\u0F8D-\u0F97\u0F99-\u0FBC\u0FC6\u102B-\u103E\u1040-\u1049\u1056-\u1059\u105E-\u1060\u1062-\u1064\u1067-\u106D\u1071-\u1074\u1082-\u108D\u108F-\u109D\u135D-\u135F\u1369-\u1371\u1712-\u1714\u1732-\u1734\u1752\u1753\u1772\u1773\u17B4-\u17D3\u17DD\u17E0-\u17E9\u180B-\u180D\u1810-\u1819\u18A9\u1920-\u192B\u1930-\u193B\u1946-\u194F\u19D0-\u19DA\u1A17-\u1A1B\u1A55-\u1A5E\u1A60-\u1A7C\u1A7F-\u1A89\u1A90-\u1A99\u1AB0-\u1ABD\u1B00-\u1B04\u1B34-\u1B44\u1B50-\u1B59\u1B6B-\u1B73\u1B80-\u1B82\u1BA1-\u1BAD\u1BB0-\u1BB9\u1BE6-\u1BF3\u1C24-\u1C37\u1C40-\u1C49\u1C50-\u1C59\u1CD0-\u1CD2\u1CD4-\u1CE8\u1CED\u1CF2-\u1CF4\u1CF7-\u1CF9\u1DC0-\u1DF9\u1DFB-\u1DFF\u200C\u200D\u203F\u2040\u2054\u20D0-\u20DC\u20E1\u20E5-\u20F0\u2CEF-\u2CF1\u2D7F\u2DE0-\u2DFF\u302A-\u302F\u3099\u309A\uA620-\uA629\uA66F\uA674-\uA67D\uA69E\uA69F\uA6F0\uA6F1\uA802\uA806\uA80B\uA823-\uA827\uA880\uA881\uA8B4-\uA8C5\uA8D0-\uA8D9\uA8E0-\uA8F1\uA8FF-\uA909\uA926-\uA92D\uA947-\uA953\uA980-\uA983\uA9B3-\uA9C0\uA9D0-\uA9D9\uA9E5\uA9F0-\uA9F9\uAA29-\uAA36\uAA43\uAA4C\uAA4D\uAA50-\uAA59\uAA7B-\uAA7D\uAAB0\uAAB2-\uAAB4\uAAB7\uAAB8\uAABE\uAABF\uAAC1\uAAEB-\uAAEF\uAAF5\uAAF6\uABE3-\uABEA\uABEC\uABED\uABF0-\uABF9\uFB1E\uFE00-\uFE0F\uFE20-\uFE2F\uFE33\uFE34\uFE4D-\uFE4F\uFF10-\uFF19\uFF3F]|\uD800[\uDDFD\uDEE0\uDF76-\uDF7A]|\uD801[\uDCA0-\uDCA9]|\uD802[\uDE01-\uDE03\uDE05\uDE06\uDE0C-\uDE0F\uDE38-\uDE3A\uDE3F\uDEE5\uDEE6]|\uD803[\uDD24-\uDD27\uDD30-\uDD39\uDF46-\uDF50]|\uD804[\uDC00-\uDC02\uDC38-\uDC46\uDC66-\uDC6F\uDC7F-\uDC82\uDCB0-\uDCBA\uDCF0-\uDCF9\uDD00-\uDD02\uDD27-\uDD34\uDD36-\uDD3F\uDD45\uDD46\uDD73\uDD80-\uDD82\uDDB3-\uDDC0\uDDC9-\uDDCC\uDDD0-\uDDD9\uDE2C-\uDE37\uDE3E\uDEDF-\uDEEA\uDEF0-\uDEF9\uDF00-\uDF03\uDF3B\uDF3C\uDF3E-\uDF44\uDF47\uDF48\uDF4B-\uDF4D\uDF57\uDF62\uDF63\uDF66-\uDF6C\uDF70-\uDF74]|\uD805[\uDC35-\uDC46\uDC50-\uDC59\uDC5E\uDCB0-\uDCC3\uDCD0-\uDCD9\uDDAF-\uDDB5\uDDB8-\uDDC0\uDDDC\uDDDD\uDE30-\uDE40\uDE50-\uDE59\uDEAB-\uDEB7\uDEC0-\uDEC9\uDF1D-\uDF2B\uDF30-\uDF39]|\uD806[\uDC2C-\uDC3A\uDCE0-\uDCE9\uDE01-\uDE0A\uDE33-\uDE39\uDE3B-\uDE3E\uDE47\uDE51-\uDE5B\uDE8A-\uDE99]|\uD807[\uDC2F-\uDC36\uDC38-\uDC3F\uDC50-\uDC59\uDC92-\uDCA7\uDCA9-\uDCB6\uDD31-\uDD36\uDD3A\uDD3C\uDD3D\uDD3F-\uDD45\uDD47\uDD50-\uDD59\uDD8A-\uDD8E\uDD90\uDD91\uDD93-\uDD97\uDDA0-\uDDA9\uDEF3-\uDEF6]|\uD81A[\uDE60-\uDE69\uDEF0-\uDEF4\uDF30-\uDF36\uDF50-\uDF59]|\uD81B[\uDF51-\uDF7E\uDF8F-\uDF92]|\uD82F[\uDC9D\uDC9E]|\uD834[\uDD65-\uDD69\uDD6D-\uDD72\uDD7B-\uDD82\uDD85-\uDD8B\uDDAA-\uDDAD\uDE42-\uDE44]|\uD835[\uDFCE-\uDFFF]|\uD836[\uDE00-\uDE36\uDE3B-\uDE6C\uDE75\uDE84\uDE9B-\uDE9F\uDEA1-\uDEAF]|\uD838[\uDC00-\uDC06\uDC08-\uDC18\uDC1B-\uDC21\uDC23\uDC24\uDC26-\uDC2A]|\uD83A[\uDCD0-\uDCD6\uDD44-\uDD4A\uDD50-\uDD59]|\uDB40[\uDD00-\uDDEF]/; |
| | | |
| | | return isIdentifierStart(ch) || |
| | |
| | | var tmp; |
| | | var l = lookahead(); |
| | | if ( |
| | | (hasUnicodeFlag && /[\^\$\.\*\+\?\(\)\\\[\]\{\}\|\/]/.test(l)) || |
| | | (!hasUnicodeFlag && l !== "c") |
| | | (isUnicodeMode && /[\^\$\.\*\+\?\(\)\\\[\]\{\}\|\/]/.test(l)) || |
| | | (!isUnicodeMode && l !== "c") |
| | | ) { |
| | | if (l === "k" && features.lookbehind) { |
| | | return null; |
| | |
| | | |
| | | // Check if both the from and atomTo have codePoints. |
| | | if (!('codePoint' in atom) || !('codePoint' in atomTo)) { |
| | | if (!hasUnicodeFlag) { |
| | | if (!isUnicodeMode) { |
| | | // If not, don't create a range but treat them as |
| | | // `atom` `-` `atom` instead. |
| | | // |
| | |
| | | |
| | | var body = []; |
| | | var kind; |
| | | var from = pos; |
| | | |
| | | var operand = parseClassOperand(/* allowRanges*/ true); |
| | | body.push(operand); |
| | |
| | | skip('&'); |
| | | skip('&'); |
| | | if (current('&')) { |
| | | bail('&& cannot be followed by &. Wrap it in parentheses: &&(&).'); |
| | | bail('&& cannot be followed by &. Wrap it in brackets: &&[&].'); |
| | | } |
| | | } else if (kind === 'subtraction') { |
| | | skip('-'); |
| | |
| | | var start, res; |
| | | |
| | | if (match('\\')) { |
| | | if (res = parseCharacterClassEscape()) { |
| | | // ClassOperand :: |
| | | // ... |
| | | // ClassStrings |
| | | // NestedClass |
| | | // |
| | | // NestedClass :: |
| | | // ... |
| | | // \ CharacterClassEscape[+U, +V] |
| | | if (res = parseClassEscape()) { |
| | | start = res; |
| | | } else if (res = parseClassCharacterEscapedHelper()) { |
| | | // ClassOperand :: |
| | | // ... |
| | | // NestedClass |
| | | // |
| | | // NestedClass :: |
| | | // ... |
| | | // \ CharacterClassEscape[+U, +V] |
| | | return res; |
| | | } else { |
| | | bail('Invalid escape', '\\' + lookahead(), from); |
| | | } |
| | | } else if (res = parseClassCharacterUnescapedHelper()) { |
| | | start = res; |
| | | } else if (res = parseClassStrings() || parseCharacterClass()) { |
| | | } else if (res = parseCharacterClass()) { |
| | | // ClassOperand :: |
| | | // ... |
| | | // ClassStrings |
| | | // NestedClass |
| | | // |
| | | // NestedClass :: |
| | |
| | | // \ b |
| | | |
| | | if (match('\\')) { |
| | | var res, from = pos; |
| | | if (res = parseClassCharacterEscapedHelper()) { |
| | | return res; |
| | | } else { |
| | |
| | | var res; |
| | | if (res = matchReg(/^[^()[\]{}/\-\\|]/)) { |
| | | return createCharacter(res); |
| | | }; |
| | | } |
| | | } |
| | | |
| | | function parseClassCharacterEscapedHelper() { |
| | |
| | | // \ ClassHalfOfDouble |
| | | // \ b |
| | | |
| | | var res; |
| | | if (match('b')) { |
| | | return createEscaped('singleEscape', 0x0008, '\\b'); |
| | | } else if (match('B')) { |
| | |
| | | |
| | | function parseClassStrings() { |
| | | // ClassStrings :: |
| | | // ( ClassString MoreClassStrings? ) |
| | | // \q{ ClassString MoreClassStrings? } |
| | | |
| | | // When calling this function, \q{ has already been consumed. |
| | | var from = pos - 3; |
| | | |
| | | var res = []; |
| | | var from = pos; |
| | | |
| | | if (!match('(')) { |
| | | return null; |
| | | } |
| | | |
| | | do { |
| | | res.push(parseClassString()); |
| | | } while (match('|')); |
| | | |
| | | skip(')'); |
| | | skip('}'); |
| | | |
| | | return createClassStrings(res, from, pos); |
| | | } |
| | |
| | | var backrefDenied = []; |
| | | var closedCaptureCounter = 0; |
| | | var firstIteration = true; |
| | | var shouldReparse = false; |
| | | var hasUnicodeFlag = (flags || "").indexOf("u") !== -1; |
| | | var hasUnicodeSetFlag = (flags || "").indexOf("v") !== -1; |
| | | var isUnicodeMode = hasUnicodeFlag || hasUnicodeSetFlag; |
| | | var pos = 0; |
| | | |
| | | if (hasUnicodeSetFlag && !features.unicodeSet) { |
| | |
| | | // the total number of capture groups set. |
| | | // |
| | | // SEE: https://github.com/jviereck/regjsparser/issues/70 |
| | | for (var i = 0; i < backrefDenied.length; i++) { |
| | | if (backrefDenied[i] <= closedCaptureCounter) { |
| | | // Parse the input a second time. |
| | | pos = 0; |
| | | firstIteration = false; |
| | | return parseDisjunction(); |
| | | } |
| | | shouldReparse = shouldReparse || backrefDenied.some(function (ref) { |
| | | return ref <= closedCaptureCounter; |
| | | }); |
| | | if (shouldReparse) { |
| | | // Parse the input a second time. |
| | | pos = 0; |
| | | firstIteration = false; |
| | | return parseDisjunction(); |
| | | } |
| | | |
| | | return result; |