wikiparser-node
Version:
A Node.js parser for MediaWiki markup with AST
467 lines (466 loc) • 17.6 kB
JavaScript
Object.defineProperty(exports, "__esModule", { value: true });
exports.getCondition = void 0;
/* NOT FOR BROWSER */
const constants_1 = require("../util/constants");
const ranges_1 = require("../lib/ranges");
const title_1 = require("../lib/title");
const attributes_1 = require("../lib/attributes");
/* NOT FOR BROWSER END */
/**
* type和name选择器
* @param selector
* @param type
* @param name
*/
const basic = (selector, type, name) => {
if (selector.includes('#')) {
const i = selector.indexOf('#');
return (i === 0 || selector.slice(0, i) === type) && selector.slice(i + 1) === name;
}
return !selector || selector === type;
};
/* NOT FOR BROWSER */
const simplePseudos = new Set([
'root',
'first-child',
'first-of-type',
'last-child',
'last-of-type',
'only-child',
'only-of-type',
'empty',
'parent',
'header',
'hidden',
'visible',
'only-whitespace',
'any-link',
'local-link',
'invalid',
'valid',
'required',
'optional',
'scope',
]), complexPseudos = new Set([
'is',
'not',
'nth-child',
'nth-of-type',
'nth-last-child',
'nth-last-of-type',
'contains',
'has',
'lang',
'regex',
]), specialChars = [
['[', '['],
[']', ']'],
['(', '('],
[')', ')'],
['"', '"'],
[`'`, '''],
[':', ':'],
['\\', '\'],
['&', '&'],
], regularRegex = /[[(,>+~]|\s+/u, attributeRegex = /^\s*(\w+)\s*(?:([~|^$*!]?=)\s*("[^"]*"|'[^']*'|[^\s[\]]+)(?:\s+(i))?\s*)?\]/u, functionRegex = /^(\s*"[^"]*"\s*|\s*'[^']*'\s*|[^()]*)\)/u, grouping = new Set([',', '>', '+', '~']), combinator = new Set(['>', '+', '~', '']), primitives = new Set(['string', 'number', 'boolean', 'undefined']);
/**
* optionally convert to lower cases
* @param val 属性值
* @param i 是否对大小写不敏感
*/
const toCase = (val, i) => i ? val.toLowerCase() : val;
/**
* 检查某个下标是否符合表达式
* @param str 表达式
* @param i 待检查的下标
*/
const nth = (str, i) => new ranges_1.Ranges(str).has(i, i + 1);
/**
* 是否受保护。保护条件来自Token,这里仅提前用于:required和:optional伪选择器。
* @param token 节点
*/
const isProtected = (token) => {
const { parentNode } = token;
if (!parentNode) {
return undefined;
}
const { childNodes, fixed } = parentNode;
return fixed
|| parentNode.getAttribute('protectedChildren').has(childNodes.indexOf(token), childNodes.length);
};
/**
* 获取属性
* @param token 节点
* @param key 属性键
*/
const getAttr = (token, key) => {
if (typeof token.getAttr === 'function') {
const attr = token.getAttr(key);
if (attr !== undefined) {
return attr;
}
}
const val = token[key];
return val instanceof RegExp ? val.source : val;
};
/**
* 检查是否符合解析后的选择器,不含节点关系
* @param token 节点
* @param step 解析后的选择器
* @param scope 作用对象
* @param has `:has()`伪选择器
* @throws `SyntaxError` 错误的正则伪选择器
* @throws `SyntaxError` 未定义的伪选择器
*/
const matches = (token, step, scope, has) => {
const { parentNode, type, name, childNodes } = token, attributes = new attributes_1.Attributes(token);
return step.every(selector => {
if (typeof selector === 'string') {
switch (selector) { // 情形1:简单伪选择器、type和name
case '':
return token === has;
case '*':
return true;
case ':root':
return !parentNode;
case ':first-child':
return attributes.index === 1;
case ':first-of-type':
return attributes.indexOfType === 1;
case ':last-child':
return attributes.lastIndex === 1;
case ':last-of-type':
return attributes.lastIndexOfType === 1;
case ':only-child':
return attributes.siblingsCount === 1;
case ':only-of-type':
return attributes.siblingsCountOfType === 1;
case ':empty':
return !childNodes.some(({ type: t, data }) => t !== 'text' || data);
case ':parent':
return childNodes.some(({ type: t, data }) => t !== 'text' || data);
case ':header':
return type === 'heading';
case ':hidden':
return !token.text();
case ':visible':
return Boolean(token.text());
case ':only-whitespace':
return !token.text().trim();
case ':any-link':
return type === 'link'
|| type === 'redirect-target'
|| type === 'free-ext-link'
|| type === 'magic-link'
|| type === 'ext-link'
|| (type === 'file' || type === 'gallery-image') && attributes.link;
case ':local-link':
return (type === 'link' || type === 'file' || type === 'gallery-image')
&& attributes.link instanceof title_1.Title
&& !attributes.link.title;
case ':invalid':
return attributes.invalid;
case ':valid':
return !attributes.invalid;
case ':required':
return isProtected(token) === true;
case ':optional':
return isProtected(token) === false;
case ':scope':
if (!scope) {
throw new SyntaxError('The :scope pseudo-selector must be used with an element node.');
}
return token === scope;
default:
return basic(selector, type, name);
}
}
else if (selector.length === 4) { // 情形2:属性选择器
const [key, equal, val = '', i] = selector, isAttr = typeof token.hasAttr === 'function' && typeof token.getAttr === 'function';
if (!(key in token || isAttr && token.hasAttr(key))) {
return equal === '!=';
}
const v = toCase(val, i), thisVal = getAttr(token, key);
if (!equal) {
return thisVal !== undefined && thisVal !== false;
}
/* istanbul ignore else */
if (equal === '~=') {
const thisVals = typeof thisVal === 'string' ? thisVal.split(/\s/u) : thisVal;
return Boolean(thisVals?.[Symbol.iterator])
&& [...thisVals].some(w => typeof w === 'string' && toCase(w, i) === v);
}
else if (!(primitives.has(typeof thisVal) || thisVal instanceof title_1.Title)) {
throw new RangeError(`The complex attribute ${key} cannot be used in a selector!`);
}
const stringVal = toCase(String(thisVal), i);
switch (equal) {
case '|=':
return stringVal === v || stringVal.startsWith(`${v}-`);
case '^=':
return stringVal.startsWith(v);
case '$=':
return stringVal.endsWith(v);
case '*=':
return stringVal.includes(v);
case '!=':
return stringVal !== v;
default: // `=`
return stringVal === v;
}
}
const [s, pseudo] = selector; // 情形3:复杂伪选择器
switch (pseudo) {
case 'is':
return (0, exports.getCondition)(s, scope)(token);
case 'not':
return !(0, exports.getCondition)(s, scope)(token);
case 'nth-child':
return nth(s, attributes.index);
case 'nth-of-type':
return nth(s, attributes.indexOfType);
case 'nth-last-child':
return nth(s, attributes.lastIndex);
case 'nth-last-of-type':
return nth(s, attributes.lastIndexOfType);
case 'contains':
return token.text().includes(s);
case 'has': {
/* istanbul ignore if */
if (has) {
throw new SyntaxError('The :has() pseudo-selector cannot be nested.');
}
const condition = (0, exports.getCondition)(s, scope, token), childOrSibling = attributes.siblings && /(?:^|,)\s*[+~]/u.test(s)
? [...token.childNodes, ...attributes.siblings.slice(attributes.siblings.indexOf(token))]
: token.childNodes;
/**
* 递归查找元素
* @param child 子节点
*/
const hasElement = (child) => child.type !== 'text' && (condition(child) || child.childNodes.some(hasElement));
return childOrSibling.some(hasElement);
}
case 'lang': {
/^zh(?:-|$)/iu; // eslint-disable-line @typescript-eslint/no-unused-expressions
const regex = new RegExp(`^${s}(?:-|$)`, 'iu');
let node = token;
for (; node; node = node.parentNode) {
const lang = node.attributes?.['lang'];
if (lang !== undefined) {
return typeof lang === 'string' && regex.test(lang);
}
}
return false;
}
case 'regex': {
const mt = /^([^,]+),\s*\/(.+)\/([a-z]*)$/u.exec(s);
/* istanbul ignore if */
if (!mt) {
throw new SyntaxError(`Wrong usage of the regex pseudo-selector. Use ":regex('attr, /re/i')" format.`);
}
try {
return new RegExp(mt[2], mt[3]).test(String(getAttr(token, mt[1].trim())));
}
catch /* istanbul ignore next */ {
throw new SyntaxError(`Invalid regular expression: /${mt[2]}/${mt[3]}`);
}
}
/* istanbul ignore next */
default:
throw new SyntaxError(`Undefined pseudo-selector: ${pseudo}`);
}
});
};
/**
* 检查是否符合解析后的选择器
* @param token 节点
* @param copy 解析后的选择器
* @param scope 作用对象
* @param has `:has()`伪选择器
*/
const matchesArray = (token, copy, scope, has) => {
const condition = [...copy];
if (matches(token, condition.pop(), scope, has)) {
const { parentNode, previousElementSibling } = token;
switch (condition.at(-1)?.relation) {
case undefined:
return true;
case '>':
return Boolean(parentNode && matchesArray(parentNode, condition, scope, has));
case '+':
return Boolean(previousElementSibling && matchesArray(previousElementSibling, condition, scope, has));
case '~': {
if (!parentNode) {
return false;
}
const { children } = parentNode;
return children.slice(0, children.indexOf(token))
.some(child => matchesArray(child, condition, scope, has));
}
default: // ' '
return token.getAncestors().some(ancestor => matchesArray(ancestor, condition, scope, has));
}
}
return false;
};
/**
* 还原转义符号
* @param selector
*/
const desanitize = (selector) => {
for (const [c, entity] of specialChars) {
selector = selector.replaceAll(entity, c);
}
return selector.trim();
};
/**
* 去除首尾的引号
* @param val 属性值或伪选择器函数的参数
*/
const deQuote = (val) => /^(["']).*\1$/u.test(val) ? val.slice(1, -1) : val.trim();
/**
* 检查节点是否符合选择器
* @param selector
* @param scope 作用对象
* @param has `:has()`伪选择器
*/
const checkToken = (selector, scope, has) => (token) => {
let sanitized = selector.trim();
for (const [c, entity] of specialChars) {
sanitized = sanitized.replaceAll(`\\${c}`, entity);
}
const stack = [[[]]];
let regex = regularRegex, mt = regex.exec(sanitized), [condition] = stack, [step] = condition;
/**
* 解析简单伪选择器
* @param index 伪选择器的终点位置
* @throws `SyntaxError` 选择器排序
* @throws `SyntaxError` 非法的选择器
*/
const pushSimple = (index) => {
const str = sanitized.slice(0, index).trim();
if (!str) {
return;
}
const pieces = str.split(/(?=[:#])/u);
for (let i = 0; i < pieces.length; i++) {
const piece = pieces[i];
if (!/^[:#]/u.test(piece)) {
/* istanbul ignore if */
if (step.length > 0) {
throw new SyntaxError(`Invalid selector!\n${selector}\nType selectors must come first.`);
}
else {
step.push(piece);
}
}
else if (piece.startsWith(':')) {
if (simplePseudos.has(piece.slice(1))) {
step.push(piece);
}
else /* istanbul ignore else */ if (pieces[i - 1]?.startsWith('#')) {
pieces[i - 1] += piece;
pieces.splice(i, 1);
i--;
}
else {
throw new SyntaxError(`Undefined pseudo selector!\n${desanitize(piece)}`);
}
}
}
step.push(...pieces.filter(piece => piece.startsWith('#')).map(desanitize));
};
/**
* 检查是否需要通用选择器
* @throws `SyntaxError` 非法的选择器
*/
const needUniversal = () => {
/* istanbul ignore if */
if (step.length === 0 && (condition.length > 1 || !has)) {
throw new SyntaxError(`Invalid selector!\n${selector}\nYou may need the universal selector '*'.`);
}
};
while (mt) {
let { 0: syntax, index } = mt;
if (!syntax.trim()) {
index += syntax.length;
const char = sanitized[index];
syntax = grouping.has(char) ? char : '';
}
if (syntax === ',') { // 情形1:并列
pushSimple(index);
needUniversal();
condition = [[]];
[step] = condition;
stack.push(condition);
}
else if (combinator.has(syntax)) { // 情形2:关系
if (has && syntax && condition.length === 1 && step.length === 0 && !sanitized.slice(0, index).trim()) {
step.push('');
}
else {
pushSimple(index);
}
needUniversal();
step.relation = syntax;
step = [];
condition.push(step);
}
else if (syntax === '[') { // 情形3:属性开启
pushSimple(index);
regex = attributeRegex;
}
else if (syntax.endsWith(']')) { // 情形4:属性闭合
mt[3] &&= desanitize(deQuote(mt[3]));
step.push(mt.slice(1));
regex = regularRegex;
}
else if (syntax === '(') { // 情形5:伪选择器开启
const i = sanitized.lastIndexOf(':', index), pseudo = sanitized.slice(i + 1, index);
/* istanbul ignore if */
if (i === -1 || !complexPseudos.has(pseudo)) {
throw new SyntaxError(`Undefined pseudo selector!\n${desanitize(sanitized)}`);
}
pushSimple(i);
step.push(pseudo); // 临时存放复杂伪选择器
regex = functionRegex;
}
else { // 情形6:伪选择器闭合
mt.push(step.pop());
mt[1] &&= deQuote(mt[1]);
step.push(mt.slice(1));
regex = regularRegex;
}
sanitized = sanitized.slice(index + syntax.length);
if (grouping.has(syntax)) {
sanitized = sanitized.trim();
}
mt = regex.exec(sanitized);
}
if (regex === regularRegex) {
pushSimple();
needUniversal();
return stack.some(copy => matchesArray(token, copy, scope, has));
}
/* istanbul ignore next */
throw new SyntaxError(`Unclosed '${regex === attributeRegex ? '[' : '('}' in the selector!\n${desanitize(sanitized)}`);
};
/* NOT FOR BROWSER END */
/**
* 将选择器转化为类型谓词
* @param selector 选择器
* @param scope 作用对象
* @param has `:has()`伪选择器
*/
const getCondition = (selector, scope, has) => {
/* NOT FOR BROWSER */
if (/[^a-z\-,#\s]|(?<![\s,])\s+(?![\s,])/u.test(selector.trim())) {
return checkToken(selector, scope, has);
}
/* NOT FOR BROWSER END */
const parts = selector.split(',');
return (({ type, name }) => parts.some(str => basic(str.trim(), type, name)));
};
exports.getCondition = getCondition;
constants_1.parsers['parseSelector'] = __filename;
;