From 26fa49f4b0aa658d65a21fffe828f39e78302573 Mon Sep 17 00:00:00 2001 From: HelenHuang <LinHuang@pollex.com.tw> Date: 星期四, 09 六月 2022 17:46:58 +0800 Subject: [PATCH] Revert "Update#139889 [ 快速篩選 ] 年資文案調整" --- PAMapp/node_modules/regexpu-core/rewrite-pattern.js | 480 +++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 files changed, 410 insertions(+), 70 deletions(-) diff --git a/PAMapp/node_modules/regexpu-core/rewrite-pattern.js b/PAMapp/node_modules/regexpu-core/rewrite-pattern.js index 5ae76b6..c54618e 100644 --- a/PAMapp/node_modules/regexpu-core/rewrite-pattern.js +++ b/PAMapp/node_modules/regexpu-core/rewrite-pattern.js @@ -11,9 +11,6 @@ // Prepare a Regenerate set containing all code points, used for negative // character classes (if any). const UNICODE_SET = regenerate().addRange(0x0, 0x10FFFF); -// Without the `u` flag, the range stops at 0xFFFF. -// https://mths.be/es6#sec-pattern-semantics -const BMP_SET = regenerate().addRange(0x0, 0xFFFF); // Prepare a Regenerate set containing all code points that are supposed to be // matched by `/./u`. https://mths.be/es6#sec-atom @@ -62,8 +59,13 @@ const category = unicodeMatchPropertyValue(property, value); return getUnicodePropertyValueSet(property, category); } catch (exception) {} - // It� not a `General_Category` value, so check if it� a binary - // property. Note: `unicodeMatchProperty` throws on invalid properties. + // It� not a `General_Category` value, so check if it� a property + // of strings. + try { + return getUnicodePropertyValueSet('Property_of_Strings', value); + } catch (exception) {} + // Lastly, check if it� a binary property of single code points. + // Note: `unicodeMatchProperty` throws on invalid properties. const property = unicodeMatchProperty(value); return getUnicodePropertyValueSet(property); }; @@ -81,9 +83,29 @@ set = getUnicodePropertyValueSet(property, value); } if (isNegative) { - return UNICODE_SET.clone().remove(set); + if (set.strings) { + throw new Error('Cannot negate Unicode property of strings'); + } + return { + characters: UNICODE_SET.clone().remove(set.characters), + strings: new Set() + }; } - return set.clone(); + return { + characters: set.characters.clone(), + strings: new Set(set.strings || []) + }; +}; + +const getUnicodePropertyEscapeCharacterClassData = (property, isNegative) => { + const set = getUnicodePropertyEscapeSet(property, isNegative); + const data = getCharacterClassEmptyData(); + data.singleChars = set.characters; + if (set.strings.size > 0) { + data.longStrings = set.strings; + data.maybeIncludesStrings = true; + } + return data; }; // Given a range of code points, add any case-folded code points in that range @@ -94,6 +116,16 @@ const folded = caseFold(min); if (folded) { $this.add(folded); + } + } while (++min <= max); + return $this; +}; +regenerate.prototype.iuRemoveRange = function(min, max) { + const $this = this; + do { + const folded = caseFold(min); + if (folded) { + $this.remove(folded); } } while (++min <= max); return $this; @@ -128,36 +160,248 @@ return iuMappings.get(codePoint) || false; }; -const processCharacterClass = (characterClassItem, regenerateOptions) => { - const set = regenerate(); +const buildHandler = (action) => { + switch (action) { + case 'union': + return { + single: (data, cp) => { + data.singleChars.add(cp); + }, + regSet: (data, set2) => { + data.singleChars.add(set2); + }, + range: (data, start, end) => { + data.singleChars.addRange(start, end); + }, + iuRange: (data, start, end) => { + data.singleChars.iuAddRange(start, end); + }, + nested: (data, nestedData) => { + data.singleChars.add(nestedData.singleChars); + for (const str of nestedData.longStrings) data.longStrings.add(str); + if (nestedData.maybeIncludesStrings) data.maybeIncludesStrings = true; + } + }; + case 'union-negative': { + const regSet = (data, set2) => { + data.singleChars = UNICODE_SET.clone().remove(set2).add(data.singleChars); + }; + return { + single: (data, cp) => { + const unicode = UNICODE_SET.clone(); + data.singleChars = data.singleChars.contains(cp) ? unicode : unicode.remove(cp); + }, + regSet: regSet, + range: (data, start, end) => { + data.singleChars = UNICODE_SET.clone().removeRange(start, end).add(data.singleChars); + }, + iuRange: (data, start, end) => { + data.singleChars = UNICODE_SET.clone().iuRemoveRange(start, end).add(data.singleChars); + }, + nested: (data, nestedData) => { + regSet(data, nestedData.singleChars); + if (nestedData.maybeIncludesStrings) throw new Error('ASSERTION ERROR'); + } + }; + } + case 'intersection': { + const regSet = (data, set2) => { + if (data.first) data.singleChars = set2; + else data.singleChars.intersection(set2); + }; + return { + single: (data, cp) => { + data.singleChars = data.first || data.singleChars.contains(cp) ? regenerate(cp) : regenerate(); + data.longStrings.clear(); + data.maybeIncludesStrings = false; + }, + regSet: (data, set) => { + regSet(data, set); + data.longStrings.clear(); + data.maybeIncludesStrings = false; + }, + range: (data, start, end) => { + if (data.first) data.singleChars.addRange(start, end); + else data.singleChars.intersection(regenerate().addRange(start, end)); + data.longStrings.clear(); + data.maybeIncludesStrings = false; + }, + iuRange: (data, start, end) => { + if (data.first) data.singleChars.iuAddRange(start, end); + else data.singleChars.intersection(regenerate().iuAddRange(start, end)); + data.longStrings.clear(); + data.maybeIncludesStrings = false; + }, + nested: (data, nestedData) => { + regSet(data, nestedData.singleChars); + + if (data.first) { + data.longStrings = nestedData.longStrings; + data.maybeIncludesStrings = nestedData.maybeIncludesStrings; + } else { + for (const str of data.longStrings) { + if (!nestedData.longStrings.has(str)) data.longStrings.delete(str); + } + if (!nestedData.maybeIncludesStrings) data.maybeIncludesStrings = false; + } + } + }; + } + case 'subtraction': { + const regSet = (data, set2) => { + if (data.first) data.singleChars.add(set2); + else data.singleChars.remove(set2); + }; + return { + single: (data, cp) => { + if (data.first) data.singleChars.add(cp); + else data.singleChars.remove(cp); + }, + regSet: regSet, + range: (data, start, end) => { + if (data.first) data.singleChars.addRange(start, end); + else data.singleChars.removeRange(start, end); + }, + iuRange: (data, start, end) => { + if (data.first) data.singleChars.iuAddRange(start, end); + else data.singleChars.iuRemoveRange(start, end); + }, + nested: (data, nestedData) => { + regSet(data, nestedData.singleChars); + + if (data.first) { + data.longStrings = nestedData.longStrings; + data.maybeIncludesStrings = nestedData.maybeIncludesStrings; + } else { + for (const str of data.longStrings) { + if (nestedData.longStrings.has(str)) data.longStrings.delete(str); + } + } + } + }; + } + // The `default` clause is only here as a safeguard; it should never be + // reached. Code coverage tools should ignore it. + /* istanbul ignore next */ + default: + throw new Error(`Unknown set action: ${ characterClassItem.kind }`); + } +}; + +const getCharacterClassEmptyData = () => ({ + transformed: config.transform.unicodeFlag, + singleChars: regenerate(), + longStrings: new Set(), + hasEmptyString: false, + first: true, + maybeIncludesStrings: false +}); + +const maybeFold = (codePoint) => { + if (config.flags.ignoreCase && config.transform.unicodeFlag) { + const folded = caseFold(codePoint); + if (folded) { + return [codePoint, folded]; + } + } + return [codePoint]; +}; + +const computeClassStrings = (classStrings, regenerateOptions) => { + let data = getCharacterClassEmptyData(); + + for (const string of classStrings.strings) { + if (string.characters.length === 1) { + maybeFold(string.characters[0].codePoint).forEach((cp) => { + data.singleChars.add(cp); + }); + } else { + let stringifiedString; + if (config.flags.ignoreCase && config.transform.unicodeFlag) { + stringifiedString = ''; + for (const ch of string.characters) { + let set = regenerate(ch.codePoint); + const folded = caseFold(ch.codePoint); + if (folded) set.add(folded); + stringifiedString += set.toString(regenerateOptions); + } + } else { + stringifiedString = string.characters.map(ch => generate(ch)).join('') + } + + data.longStrings.add(stringifiedString); + data.maybeIncludesStrings = true; + } + } + + return data; +} + +const computeCharacterClass = (characterClassItem, regenerateOptions) => { + let data = getCharacterClassEmptyData(); + + let handlePositive; + let handleNegative; + + switch (characterClassItem.kind) { + case 'union': + handlePositive = buildHandler('union'); + handleNegative = buildHandler('union-negative'); + break; + case 'intersection': + handlePositive = buildHandler('intersection'); + handleNegative = buildHandler('subtraction'); + break; + case 'subtraction': + handlePositive = buildHandler('subtraction'); + handleNegative = buildHandler('intersection'); + break; + // The `default` clause is only here as a safeguard; it should never be + // reached. Code coverage tools should ignore it. + /* istanbul ignore next */ + default: + throw new Error(`Unknown character class kind: ${ characterClassItem.kind }`); + } + for (const item of characterClassItem.body) { switch (item.type) { case 'value': - set.add(item.codePoint); - if (config.ignoreCase && config.unicode && !config.useUnicodeFlag) { - const folded = caseFold(item.codePoint); - if (folded) { - set.add(folded); - } - } + maybeFold(item.codePoint).forEach((cp) => { + handlePositive.single(data, cp); + }); break; case 'characterClassRange': const min = item.min.codePoint; const max = item.max.codePoint; - set.addRange(min, max); - if (config.ignoreCase && config.unicode && !config.useUnicodeFlag) { - set.iuAddRange(min, max); + handlePositive.range(data, min, max); + if (config.flags.ignoreCase && config.transform.unicodeFlag) { + handlePositive.iuRange(data, min, max); } break; case 'characterClassEscape': - set.add(getCharacterClassEscapeSet( + handlePositive.regSet(data, getCharacterClassEscapeSet( item.value, - config.unicode, - config.ignoreCase + config.flags.unicode, + config.flags.ignoreCase )); break; case 'unicodePropertyEscape': - set.add(getUnicodePropertyEscapeSet(item.value, item.negative)); + const nestedData = getUnicodePropertyEscapeCharacterClassData(item.value, item.negative); + handlePositive.nested(data, nestedData); + data.transformed = + data.transformed || + config.transform.unicodePropertyEscapes || + (config.transform.unicodeSetsFlag && nestedData.maybeIncludesStrings); + break; + case 'characterClass': + const handler = item.negative ? handleNegative : handlePositive; + const res = computeCharacterClass(item, regenerateOptions); + handler.nested(data, res); + data.transformed = true; + break; + case 'classStrings': + handlePositive.nested(data, computeClassStrings(item, regenerateOptions)); + data.transformed = true; break; // The `default` clause is only here as a safeguard; it should never be // reached. Code coverage tools should ignore it. @@ -165,11 +409,42 @@ default: throw new Error(`Unknown term type: ${ item.type }`); } + + data.first = false; } - if (characterClassItem.negative) { - update(characterClassItem, `(?!${set.toString(regenerateOptions)})[\\s\\S]`) - } else { - update(characterClassItem, set.toString(regenerateOptions)); + + if (characterClassItem.negative && data.maybeIncludesStrings) { + throw new SyntaxError('Cannot negate set containing strings'); + } + + return data; +} + +const processCharacterClass = ( + characterClassItem, + regenerateOptions, + computed = computeCharacterClass(characterClassItem, regenerateOptions) +) => { + const negative = characterClassItem.negative; + const { singleChars, transformed, longStrings } = computed; + if (transformed) { + const setStr = singleChars.toString(regenerateOptions); + + if (negative) { + if (config.useUnicodeFlag) { + update(characterClassItem, `[^${setStr[0] === '[' ? setStr.slice(1, -1) : setStr}]`) + } else { + update(characterClassItem, `(?!${setStr})[\\s\\S]`) + } + } else { + const hasEmptyString = longStrings.has(''); + const pieces = Array.from(longStrings).sort((a, b) => b.length - a.length); + if (setStr !== '[]' || longStrings.size === 0) { + pieces.splice(pieces.length - (hasEmptyString ? 1 : 0), 0, setStr); + } + + update(characterClassItem, pieces.join('|')); + } } return characterClassItem; }; @@ -189,14 +464,12 @@ const processTerm = (item, regenerateOptions, groups) => { switch (item.type) { case 'dot': - if (config.useDotAllFlag) { - break; - } else if (config.unicode) { + if (config.transform.unicodeFlag) { update( item, - getUnicodeDotSet(config.dotAll).toString(regenerateOptions) + getUnicodeDotSet(config.flags.dotAll).toString(regenerateOptions) ); - } else if (config.dotAll) { + } else if (config.transform.dotAllFlag) { // TODO: consider changing this at the regenerate level. update(item, '[\\s\\S]'); } @@ -205,29 +478,41 @@ item = processCharacterClass(item, regenerateOptions); break; case 'unicodePropertyEscape': - if (config.unicodePropertyEscape) { + const data = getUnicodePropertyEscapeCharacterClassData(item.value, item.negative); + if (data.maybeIncludesStrings) { + if (!config.flags.unicodeSets) { + throw new Error( + 'Properties of strings are only supported when using the unicodeSets (v) flag.' + ); + } + if (config.transform.unicodeSetsFlag) { + data.transformed = true; + item = processCharacterClass(item, regenerateOptions, data); + } + } else if (config.transform.unicodePropertyEscapes) { update( item, - getUnicodePropertyEscapeSet(item.value, item.negative) - .toString(regenerateOptions) + data.singleChars.toString(regenerateOptions) ); } break; case 'characterClassEscape': - update( - item, - getCharacterClassEscapeSet( - item.value, - config.unicode, - config.ignoreCase - ).toString(regenerateOptions) - ); + if (config.transform.unicodeFlag) { + update( + item, + getCharacterClassEscapeSet( + item.value, + /* config.transform.unicodeFlag implies config.flags.unicode */ true, + config.flags.ignoreCase + ).toString(regenerateOptions) + ); + } break; case 'group': if (item.behavior == 'normal') { groups.lastIndex++; } - if (item.name && config.namedGroup) { + if (item.name && config.transform.namedGroups) { const name = item.name.value; if (groups.names[name]) { @@ -262,7 +547,7 @@ case 'value': const codePoint = item.codePoint; const set = regenerate(codePoint); - if (config.ignoreCase && config.unicode && !config.useUnicodeFlag) { + if (config.flags.ignoreCase && config.transform.unicodeFlag) { const folded = caseFold(codePoint); if (folded) { set.add(folded); @@ -301,41 +586,96 @@ }; const config = { - 'ignoreCase': false, - 'unicode': false, - 'dotAll': false, - 'useDotAllFlag': false, - 'useUnicodeFlag': false, - 'unicodePropertyEscape': false, - 'namedGroup': false -}; -const rewritePattern = (pattern, flags, options) => { - config.unicode = flags && flags.includes('u'); - const regjsparserFeatures = { - 'unicodePropertyEscape': config.unicode, - 'namedGroups': true, - 'lookbehind': options && options.lookbehind - }; - config.ignoreCase = flags && flags.includes('i'); - const supportDotAllFlag = options && options.dotAllFlag; - config.dotAll = supportDotAllFlag && flags && flags.includes('s'); - config.namedGroup = options && options.namedGroup; - config.useDotAllFlag = options && options.useDotAllFlag; - config.useUnicodeFlag = options && options.useUnicodeFlag; - config.unicodePropertyEscape = options && options.unicodePropertyEscape; - if (supportDotAllFlag && config.useDotAllFlag) { - throw new Error('`useDotAllFlag` and `dotAllFlag` cannot both be true!'); + 'flags': { + 'ignoreCase': false, + 'unicode': false, + 'unicodeSets': false, + 'dotAll': false, + }, + 'transform': { + 'dotAllFlag': false, + 'unicodeFlag': false, + 'unicodeSetsFlag': false, + 'unicodePropertyEscapes': false, + 'namedGroups': false, + }, + get useUnicodeFlag() { + return (this.flags.unicode || this.flags.unicodeSets) && !this.transform.unicodeFlag; } +}; + +const validateOptions = (options) => { + if (!options) return; + + for (const key of Object.keys(options)) { + const value = options[key]; + switch (key) { + case 'dotAllFlag': + case 'unicodeFlag': + case 'unicodePropertyEscapes': + case 'namedGroups': + if (value != null && value !== false && value !== 'transform') { + throw new Error(`.${key} must be false (default) or 'transform'.`); + } + break; + case 'unicodeSetsFlag': + if (value != null && value !== false && value !== 'parse' && value !== 'transform') { + throw new Error(`.${key} must be false (default), 'parse' or 'transform'.`); + } + break; + case 'onNamedGroup': + if (value != null && typeof value !== 'function') { + throw new Error('.onNamedGroup must be a function.'); + } + break; + default: + throw new Error(`.${key} is not a valid regexpu-core option.`); + } + } +}; + +const hasFlag = (flags, flag) => flags ? flags.includes(flag) : false; +const transform = (options, name) => options ? options[name] === 'transform' : false; + +const rewritePattern = (pattern, flags, options) => { + validateOptions(options); + + config.flags.unicode = hasFlag(flags, 'u'); + config.flags.unicodeSets = hasFlag(flags, 'v'); + config.flags.ignoreCase = hasFlag(flags, 'i'); + config.flags.dotAll = hasFlag(flags, 's'); + + config.transform.dotAllFlag = config.flags.dotAll && transform(options, 'dotAllFlag'); + config.transform.unicodeFlag = (config.flags.unicode || config.flags.unicodeSets) && transform(options, 'unicodeFlag'); + config.transform.unicodeSetsFlag = config.flags.unicodeSets && transform(options, 'unicodeSetsFlag'); + + // unicodeFlag: 'transform' implies unicodePropertyEscapes: 'transform' + config.transform.unicodePropertyEscapes = config.flags.unicode && ( + transform(options, 'unicodeFlag') || transform(options, 'unicodePropertyEscapes') + ); + config.transform.namedGroups = transform(options, 'namedGroups'); + + const regjsparserFeatures = { + 'unicodeSet': Boolean(options && options.unicodeSetsFlag), + + // Enable every stable RegExp feature by default + 'unicodePropertyEscape': true, + 'namedGroups': true, + 'lookbehind': true, + }; + const regenerateOptions = { 'hasUnicodeFlag': config.useUnicodeFlag, - 'bmpOnly': !config.unicode + 'bmpOnly': !config.flags.unicode }; + const groups = { 'onNamedGroup': options && options.onNamedGroup, 'lastIndex': 0, 'names': Object.create(null), // { [name]: index } 'unmatchedReferences': Object.create(null) // { [name]: Array<reference> } }; + const tree = parse(pattern, flags, regjsparserFeatures); // Note: `processTerm` mutates `tree` and `groups`. processTerm(tree, regenerateOptions, groups); -- Gitblit v1.8.0