|
|
- /**
- * @author Toru Nagashima <https://github.com/mysticatea>
- */
- "use strict";
-
- const { CALL, CONSTRUCT, ReferenceTracker, getStringIfConstant } = require("@eslint-community/eslint-utils");
- const { RegExpParser, visitRegExpAST } = require("@eslint-community/regexpp");
- const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode");
- const astUtils = require("./utils/ast-utils.js");
- const { isValidWithUnicodeFlag } = require("./utils/regular-expressions");
-
- //------------------------------------------------------------------------------
- // Helpers
- //------------------------------------------------------------------------------
-
- /**
- * @typedef {import('@eslint-community/regexpp').AST.Character} Character
- * @typedef {import('@eslint-community/regexpp').AST.CharacterClassElement} CharacterClassElement
- */
-
- /**
- * Iterate character sequences of a given nodes.
- *
- * CharacterClassRange syntax can steal a part of character sequence,
- * so this function reverts CharacterClassRange syntax and restore the sequence.
- * @param {CharacterClassElement[]} nodes The node list to iterate character sequences.
- * @returns {IterableIterator<Character[]>} The list of character sequences.
- */
- function *iterateCharacterSequence(nodes) {
-
- /** @type {Character[]} */
- let seq = [];
-
- for (const node of nodes) {
- switch (node.type) {
- case "Character":
- seq.push(node);
- break;
-
- case "CharacterClassRange":
- seq.push(node.min);
- yield seq;
- seq = [node.max];
- break;
-
- case "CharacterSet":
- case "CharacterClass": // [[]] nesting character class
- case "ClassStringDisjunction": // \q{...}
- case "ExpressionCharacterClass": // [A--B]
- if (seq.length > 0) {
- yield seq;
- seq = [];
- }
- break;
-
- // no default
- }
- }
-
- if (seq.length > 0) {
- yield seq;
- }
- }
-
-
- /**
- * Checks whether the given character node is a Unicode code point escape or not.
- * @param {Character} char the character node to check.
- * @returns {boolean} `true` if the character node is a Unicode code point escape.
- */
- function isUnicodeCodePointEscape(char) {
- return /^\\u\{[\da-f]+\}$/iu.test(char.raw);
- }
-
- /**
- * Each function returns `true` if it detects that kind of problem.
- * @type {Record<string, (chars: Character[]) => boolean>}
- */
- const hasCharacterSequence = {
- surrogatePairWithoutUFlag(chars) {
- return chars.some((c, i) => {
- if (i === 0) {
- return false;
- }
- const c1 = chars[i - 1];
-
- return (
- isSurrogatePair(c1.value, c.value) &&
- !isUnicodeCodePointEscape(c1) &&
- !isUnicodeCodePointEscape(c)
- );
- });
- },
-
- surrogatePair(chars) {
- return chars.some((c, i) => {
- if (i === 0) {
- return false;
- }
- const c1 = chars[i - 1];
-
- return (
- isSurrogatePair(c1.value, c.value) &&
- (
- isUnicodeCodePointEscape(c1) ||
- isUnicodeCodePointEscape(c)
- )
- );
- });
- },
-
- combiningClass(chars) {
- return chars.some((c, i) => (
- i !== 0 &&
- isCombiningCharacter(c.value) &&
- !isCombiningCharacter(chars[i - 1].value)
- ));
- },
-
- emojiModifier(chars) {
- return chars.some((c, i) => (
- i !== 0 &&
- isEmojiModifier(c.value) &&
- !isEmojiModifier(chars[i - 1].value)
- ));
- },
-
- regionalIndicatorSymbol(chars) {
- return chars.some((c, i) => (
- i !== 0 &&
- isRegionalIndicatorSymbol(c.value) &&
- isRegionalIndicatorSymbol(chars[i - 1].value)
- ));
- },
-
- zwj(chars) {
- const lastIndex = chars.length - 1;
-
- return chars.some((c, i) => (
- i !== 0 &&
- i !== lastIndex &&
- c.value === 0x200d &&
- chars[i - 1].value !== 0x200d &&
- chars[i + 1].value !== 0x200d
- ));
- }
- };
-
- const kinds = Object.keys(hasCharacterSequence);
-
- //------------------------------------------------------------------------------
- // Rule Definition
- //------------------------------------------------------------------------------
-
- /** @type {import('../shared/types').Rule} */
- module.exports = {
- meta: {
- type: "problem",
-
- docs: {
- description: "Disallow characters which are made with multiple code points in character class syntax",
- recommended: true,
- url: "https://eslint.org/docs/latest/rules/no-misleading-character-class"
- },
-
- hasSuggestions: true,
-
- schema: [],
-
- messages: {
- surrogatePairWithoutUFlag: "Unexpected surrogate pair in character class. Use 'u' flag.",
- surrogatePair: "Unexpected surrogate pair in character class.",
- combiningClass: "Unexpected combined character in character class.",
- emojiModifier: "Unexpected modified Emoji in character class.",
- regionalIndicatorSymbol: "Unexpected national flag in character class.",
- zwj: "Unexpected joined character sequence in character class.",
- suggestUnicodeFlag: "Add unicode 'u' flag to regex."
- }
- },
- create(context) {
- const sourceCode = context.sourceCode;
- const parser = new RegExpParser();
-
- /**
- * Verify a given regular expression.
- * @param {Node} node The node to report.
- * @param {string} pattern The regular expression pattern to verify.
- * @param {string} flags The flags of the regular expression.
- * @param {Function} unicodeFixer Fixer for missing "u" flag.
- * @returns {void}
- */
- function verify(node, pattern, flags, unicodeFixer) {
- let patternNode;
-
- try {
- patternNode = parser.parsePattern(
- pattern,
- 0,
- pattern.length,
- {
- unicode: flags.includes("u"),
- unicodeSets: flags.includes("v")
- }
- );
- } catch {
-
- // Ignore regular expressions with syntax errors
- return;
- }
-
- const foundKinds = new Set();
-
- visitRegExpAST(patternNode, {
- onCharacterClassEnter(ccNode) {
- for (const chars of iterateCharacterSequence(ccNode.elements)) {
- for (const kind of kinds) {
- if (hasCharacterSequence[kind](chars)) {
- foundKinds.add(kind);
- }
- }
- }
- }
- });
-
- for (const kind of foundKinds) {
- let suggest;
-
- if (kind === "surrogatePairWithoutUFlag") {
- suggest = [{
- messageId: "suggestUnicodeFlag",
- fix: unicodeFixer
- }];
- }
-
- context.report({
- node,
- messageId: kind,
- suggest
- });
- }
- }
-
- return {
- "Literal[regex]"(node) {
- verify(node, node.regex.pattern, node.regex.flags, fixer => {
- if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, node.regex.pattern)) {
- return null;
- }
-
- return fixer.insertTextAfter(node, "u");
- });
- },
- "Program"(node) {
- const scope = sourceCode.getScope(node);
- const tracker = new ReferenceTracker(scope);
-
- /*
- * Iterate calls of RegExp.
- * E.g., `new RegExp()`, `RegExp()`, `new window.RegExp()`,
- * `const {RegExp: a} = window; new a()`, etc...
- */
- for (const { node: refNode } of tracker.iterateGlobalReferences({
- RegExp: { [CALL]: true, [CONSTRUCT]: true }
- })) {
- const [patternNode, flagsNode] = refNode.arguments;
- const pattern = getStringIfConstant(patternNode, scope);
- const flags = getStringIfConstant(flagsNode, scope);
-
- if (typeof pattern === "string") {
- verify(refNode, pattern, flags || "", fixer => {
-
- if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, pattern)) {
- return null;
- }
-
- if (refNode.arguments.length === 1) {
- const penultimateToken = sourceCode.getLastToken(refNode, { skip: 1 }); // skip closing parenthesis
-
- return fixer.insertTextAfter(
- penultimateToken,
- astUtils.isCommaToken(penultimateToken)
- ? ' "u",'
- : ', "u"'
- );
- }
-
- if ((flagsNode.type === "Literal" && typeof flagsNode.value === "string") || flagsNode.type === "TemplateLiteral") {
- const range = [flagsNode.range[0], flagsNode.range[1] - 1];
-
- return fixer.insertTextAfterRange(range, "u");
- }
-
- return null;
- });
- }
- }
- }
- };
- }
- };
|