UNPKG

@adguard/agtree

Version:
200 lines (197 loc) 9.47 kB
/* * AGTree v3.2.2 (build date: Tue, 08 Jul 2025 13:39:47 GMT) * (c) 2025 Adguard Software Ltd. * Released under the MIT license * https://github.com/AdguardTeam/tsurlfilter/tree/master/packages/agtree#readme */ import { AdblockSyntax } from '../../utils/adblockers.js'; import { PREPROCESSOR_MARKER, PREPROCESSOR_MARKER_LEN, HASHMARK, PREPROCESSOR_SEPARATOR, OPEN_PARENTHESIS, SAFARI_CB_AFFINITY, CLOSE_PARENTHESIS, COMMA, IF, INCLUDE } from '../../utils/constants.js'; import { StringUtils } from '../../utils/string.js'; import { RuleCategory, CommentRuleType } from '../../nodes/index.js'; import { LogicalExpressionParser } from '../misc/logical-expression-parser.js'; import { AdblockSyntaxError } from '../../errors/adblock-syntax-error.js'; import { ParameterListParser } from '../misc/parameter-list-parser.js'; import { defaultParserOptions } from '../options.js'; import { BaseParser } from '../base-parser.js'; import { ValueParser } from '../misc/value-parser.js'; /** * Pre-processor directives * * @see {@link https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters#pre-processor-directives} * @see {@link https://github.com/gorhill/uBlock/wiki/Static-filter-syntax#pre-parsing-directives} */ /** * `PreProcessorParser` is responsible for parsing preprocessor rules. * Pre-processor comments are special comments that are used to control the behavior of the filter list processor. * Please note that this parser only handles general syntax for now, and does not validate the parameters at * the parsing stage. * * @example * If your rule is * ```adblock * !#if (adguard) * ``` * then the directive's name is `if` and its value is `(adguard)`, but the parameter list * is not parsed / validated further. * @see {@link https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters#pre-processor-directives} * @see {@link https://github.com/gorhill/uBlock/wiki/Static-filter-syntax#pre-parsing-directives} */ class PreProcessorCommentParser extends BaseParser { /** * Determines whether the rule is a pre-processor rule. * * @param raw Raw rule * @returns `true` if the rule is a pre-processor rule, `false` otherwise */ static isPreProcessorRule(raw) { const trimmed = raw.trim(); // Avoid this case: !##... (commonly used in AdGuard filters) return trimmed.startsWith(PREPROCESSOR_MARKER) && trimmed[PREPROCESSOR_MARKER_LEN] !== HASHMARK; } /** * Parses a raw rule as a pre-processor comment. * * @param raw Raw input to parse. * @param options Global parser options. * @param baseOffset Starting offset of the input. Node locations are calculated relative to this offset. * @returns * Pre-processor comment AST or null (if the raw rule cannot be parsed as a pre-processor comment) */ static parse(raw, options = defaultParserOptions, baseOffset = 0) { // Ignore non-pre-processor rules if (!PreProcessorCommentParser.isPreProcessorRule(raw)) { return null; } let offset = 0; // Ignore whitespace characters before the rule (if any) offset = StringUtils.skipWS(raw, offset); // Ignore the pre-processor marker offset += PREPROCESSOR_MARKER_LEN; // Ignore whitespace characters after the pre-processor marker (if any) // Note: this is incorrect according to the spec, but we do it for tolerance offset = StringUtils.skipWS(raw, offset); // Directive name should start at this offset, so we save this offset now const nameStart = offset; // Consume directive name, so parse the sequence until the first // whitespace / opening parenthesis / end of string while (offset < raw.length) { const ch = raw[offset]; if (ch === PREPROCESSOR_SEPARATOR || ch === OPEN_PARENTHESIS) { break; } offset += 1; } // Save name end offset const nameEnd = offset; // Create name node const name = ValueParser.parse(raw.slice(nameStart, nameEnd), options, baseOffset + nameStart); // Ignore whitespace characters after the directive name (if any) // Note: this may incorrect according to the spec, but we do it for tolerance offset = StringUtils.skipWS(raw, offset); // If the directive name is "safari_cb_affinity", then we have a special case if (name.value === SAFARI_CB_AFFINITY) { // Throw error if there are spaces after the directive name if (offset > nameEnd) { throw new AdblockSyntaxError(`Unexpected whitespace after "${SAFARI_CB_AFFINITY}" directive name`, baseOffset + nameEnd, baseOffset + offset); } // safari_cb_affinity directive optionally accepts a parameter list // So at this point we should check if there are parameters or not // (cb_affinity directive followed by an opening parenthesis or if we // skip the whitespace we reach the end of the string) if (StringUtils.skipWS(raw, offset) !== raw.length) { if (raw[offset] !== OPEN_PARENTHESIS) { throw new AdblockSyntaxError(`Unexpected character '${raw[offset]}' after '${SAFARI_CB_AFFINITY}' directive name`, baseOffset + offset, baseOffset + offset + 1); } // If we have parameters, then we should parse them // Note: we don't validate the parameters at this stage // Ignore opening parenthesis offset += 1; // Save parameter list start offset const parameterListStart = offset; // Check for closing parenthesis const closingParenthesesIndex = StringUtils.skipWSBack(raw); if (closingParenthesesIndex === -1 || raw[closingParenthesesIndex] !== CLOSE_PARENTHESIS) { throw new AdblockSyntaxError(`Missing closing parenthesis for '${SAFARI_CB_AFFINITY}' directive`, baseOffset + offset, baseOffset + raw.length); } // Save parameter list end offset const parameterListEnd = closingParenthesesIndex; // Parse parameters between the opening and closing parentheses const result = { type: CommentRuleType.PreProcessorCommentRule, category: RuleCategory.Comment, syntax: AdblockSyntax.Adg, name, // comma separated list of parameters params: ParameterListParser.parse(raw.slice(parameterListStart, parameterListEnd), options, baseOffset + parameterListStart, COMMA), }; if (options.includeRaws) { result.raws = { text: raw, }; } if (options.isLocIncluded) { result.start = baseOffset; result.end = baseOffset + raw.length; } return result; } } // If we reached the end of the string, then we have a directive without parameters // (e.g. "!#safari_cb_affinity" or "!#endif") // No need to continue parsing in this case. if (offset === raw.length) { // Throw error if the directive name is "if" or "include", because these directives // should have parameters if (name.value === IF || name.value === INCLUDE) { throw new AdblockSyntaxError(`Directive "${name.value}" requires parameters`, baseOffset, baseOffset + raw.length); } const result = { type: CommentRuleType.PreProcessorCommentRule, category: RuleCategory.Comment, syntax: AdblockSyntax.Common, name, }; if (options.includeRaws) { result.raws = { text: raw, }; } if (options.isLocIncluded) { result.start = baseOffset; result.end = baseOffset + raw.length; } return result; } // Get start and end offsets of the directive parameters const paramsStart = offset; const paramsEnd = StringUtils.skipWSBack(raw) + 1; // Prepare parameters node let params; // Parse parameters. Handle "if" and "safari_cb_affinity" directives // separately. if (name.value === IF) { params = LogicalExpressionParser.parse(raw.slice(paramsStart, paramsEnd), options, baseOffset + paramsStart); } else { params = ValueParser.parse(raw.slice(paramsStart, paramsEnd), options, baseOffset + paramsStart); } const result = { type: CommentRuleType.PreProcessorCommentRule, category: RuleCategory.Comment, syntax: AdblockSyntax.Common, name, params, }; if (options.includeRaws) { result.raws = { text: raw, }; } if (options.isLocIncluded) { result.start = baseOffset; result.end = baseOffset + raw.length; } return result; } } export { PreProcessorCommentParser };