UNPKG

@adguard/agtree

Version:
494 lines (491 loc) 22.6 kB
/* * AGTree v3.2.2 (build date: Tue, 08 Jul 2025 13:39:47 GMT) * (c) 2025 Adguard Software Ltd. * Released under the MIT license * https://github.com/AdguardTeam/tsurlfilter/tree/master/packages/agtree#readme */ import { sprintf } from 'sprintf-js'; import { hasToken, TokenType } from '@adguard/css-tokenizer'; import { CosmeticRuleSeparatorUtils } from '../../utils/cosmetic-rule-separator.js'; import { AdblockSyntax } from '../../utils/adblockers.js'; import { DomainListParser } from '../misc/domain-list-parser.js'; import { ModifierListParser } from '../misc/modifier-list.js'; import { OPEN_SQUARE_BRACKET, DOLLAR_SIGN, CLOSE_SQUARE_BRACKET, CSS_BLOCK_OPEN, CSS_BLOCK_CLOSE, UBO_SCRIPTLET_MASK, UBO_SCRIPTLET_MASK_LEGACY, ADG_SCRIPTLET_MASK, UBO_HTML_MASK } from '../../utils/constants.js'; import { RuleCategory, CosmeticRuleType } from '../../nodes/index.js'; import { AdblockSyntaxError } from '../../errors/adblock-syntax-error.js'; import { StringUtils } from '../../utils/string.js'; import { CommentParser } from '../comment/comment-parser.js'; import { defaultParserOptions } from '../options.js'; import { UboSelectorParser } from '../css/ubo-selector-parser.js'; import { AdgCssInjectionParser } from '../css/adg-css-injection-parser.js'; import { AbpSnippetInjectionBodyParser } from './body/abp-snippet-injection-body-parser.js'; import { UboScriptletInjectionBodyParser } from './body/ubo-scriptlet-injection-body-parser.js'; import { AdgScriptletInjectionBodyParser } from './body/adg-scriptlet-injection-body-parser.js'; import { BaseParser } from '../base-parser.js'; import { UboPseudoName } from '../../common/ubo-selector-common.js'; /** * Possible error messages for uBO selectors. Formatted with {@link sprintf}. */ const ERROR_MESSAGES = { EMPTY_RULE_BODY: 'Empty rule body', INVALID_BODY_FOR_SEPARATOR: "Body '%s' is not valid for the '%s' cosmetic rule separator", MISSING_ADGUARD_MODIFIER_LIST_END: "Missing '%s' at the end of the AdGuard modifier list in pattern '%s'", MISSING_ADGUARD_MODIFIER_LIST_MARKER: "Missing '%s' at the beginning of the AdGuard modifier list in pattern '%s'", SYNTAXES_CANNOT_BE_MIXED: "'%s' syntax cannot be mixed with '%s' syntax", SYNTAX_DISABLED: "Parsing '%s' syntax is disabled, but the rule uses it", }; const ADG_CSS_INJECTION_PATTERN = /^(?:.+){(?:.+)}$/; /** * `CosmeticRuleParser` is responsible for parsing cosmetic rules. * * Where possible, it automatically detects the difference between supported syntaxes: * - AdGuard * - uBlock Origin * - Adblock Plus * * If the syntax is common / cannot be determined, the parser gives `Common` syntax. * * Please note that syntactically correct rules are parsed even if they are not actually * compatible with the given adblocker. This is a completely natural behavior, meaningful * checking of compatibility is not done at the parser level. */ // TODO: Make raw body parsing optional // TODO: Split into smaller sections class CosmeticRuleParser extends BaseParser { /** * Determines whether a rule is a cosmetic rule. The rule is considered cosmetic if it * contains a cosmetic rule separator. * * @param raw Raw rule * @returns `true` if the rule is a cosmetic rule, `false` otherwise */ static isCosmeticRule(raw) { const trimmed = raw.trim(); if (CommentParser.isCommentRule(trimmed)) { return false; } return CosmeticRuleSeparatorUtils.find(trimmed) !== null; } /** * Parses a cosmetic rule. The structure of the cosmetic rules: * - pattern (AdGuard pattern can have modifiers, other syntaxes don't) * - separator * - body * * @param raw Raw input to parse. * @param options Global parser options. * @param baseOffset Starting offset of the input. Node locations are calculated relative to this offset. * @returns * Parsed cosmetic rule AST or null if it failed to parse based on the known cosmetic rules * @throws If the input matches the cosmetic rule pattern but syntactically invalid */ // TODO: Split to smaller functions static parse(raw, options = defaultParserOptions, baseOffset = 0) { // Find cosmetic rule separator - each cosmetic rule must have it, otherwise it is not a cosmetic rule const separatorResult = CosmeticRuleSeparatorUtils.find(raw); if (!separatorResult) { return null; } let syntax = AdblockSyntax.Common; let modifiers; const patternStart = StringUtils.skipWS(raw); const patternEnd = StringUtils.skipWSBack(raw, separatorResult.start - 1) + 1; const bodyStart = StringUtils.skipWS(raw, separatorResult.end); const bodyEnd = StringUtils.skipWSBack(raw) + 1; // Note we use '<=' instead of '===' because we have bidirectional trim if (bodyEnd <= bodyStart) { throw new AdblockSyntaxError(ERROR_MESSAGES.EMPTY_RULE_BODY, baseOffset, baseOffset + raw.length); } // Step 1. Parse the pattern: it can be a domain list or a domain list with modifiers (AdGuard) const rawPattern = raw.slice(patternStart, patternEnd); let patternOffset = patternStart; if (rawPattern[patternOffset] === OPEN_SQUARE_BRACKET) { // Save offset to the beginning of the modifier list for later const modifierListStart = patternOffset; // Consume opening square bracket patternOffset += 1; // Skip whitespace after opening square bracket patternOffset = StringUtils.skipWS(rawPattern, patternOffset); // Open square bracket should be followed by a modifier separator: [$ if (rawPattern[patternOffset] !== DOLLAR_SIGN) { throw new AdblockSyntaxError(sprintf(ERROR_MESSAGES.MISSING_ADGUARD_MODIFIER_LIST_MARKER, DOLLAR_SIGN, rawPattern), baseOffset + patternOffset, baseOffset + rawPattern.length); } // Consume modifier separator patternOffset += 1; // Skip whitespace after modifier separator patternOffset = StringUtils.skipWS(rawPattern, patternOffset); // Modifier list ends with the last unescaped square bracket // We search for the last unescaped square bracket, because some modifiers can contain square brackets, // e.g. [$domain=/example[0-9]\.(com|org)/]##.ad const modifierListEnd = StringUtils.findLastUnescapedCharacter(rawPattern, CLOSE_SQUARE_BRACKET); if (modifierListEnd === -1) { throw new AdblockSyntaxError(sprintf(ERROR_MESSAGES.MISSING_ADGUARD_MODIFIER_LIST_END, CLOSE_SQUARE_BRACKET, rawPattern), baseOffset + patternOffset, baseOffset + rawPattern.length); } // Parse modifier list modifiers = ModifierListParser.parse(raw.slice(patternOffset, modifierListEnd), options, baseOffset + patternOffset); // Expand modifier list location to include the opening and closing square brackets if (options.isLocIncluded) { modifiers.start = baseOffset + modifierListStart; modifiers.end = baseOffset + modifierListEnd + 1; } // Consume modifier list patternOffset = modifierListEnd + 1; // Change the syntax to ADG syntax = AdblockSyntax.Adg; } // Skip whitespace after modifier list patternOffset = StringUtils.skipWS(rawPattern, patternOffset); // Parse domains const domains = DomainListParser.parse(rawPattern.slice(patternOffset), options, baseOffset + patternOffset); // Step 2. Parse the separator const separator = { type: 'Value', value: separatorResult.separator, }; if (options.isLocIncluded) { separator.start = baseOffset + separatorResult.start; separator.end = baseOffset + separatorResult.end; } const exception = CosmeticRuleSeparatorUtils.isException(separatorResult.separator); // Step 3. Parse the rule body let rawBody = raw.slice(bodyStart, bodyEnd); /** * Ensures that the rule syntax is common or the expected one. This function is used to prevent mixing * different syntaxes in the same rule. * * @example * The following rule mixes AdGuard and uBO syntaxes, because it uses AdGuard modifier list and uBO * CSS injection: * ```adblock * [$path=/something]example.com##.foo:style(color: red) * ``` * In this case, parser sets syntax to AdGuard, because it detects the AdGuard modifier list, but * when parsing the rule body, it detects uBO CSS injection, which is not compatible with AdGuard. * * @param expectedSyntax Expected syntax * @throws If the rule syntax is not common or the expected one */ const expectCommonOrSpecificSyntax = (expectedSyntax) => { if (syntax !== AdblockSyntax.Common && syntax !== expectedSyntax) { throw new AdblockSyntaxError(sprintf(ERROR_MESSAGES.SYNTAXES_CANNOT_BE_MIXED, expectedSyntax, syntax), baseOffset + patternStart, baseOffset + bodyEnd); } }; let uboSelector; // Parse UBO rule modifiers if (options.parseUboSpecificRules) { uboSelector = UboSelectorParser.parse(rawBody, options, baseOffset + bodyStart); rawBody = uboSelector.selector.value; // Do not allow ADG modifiers and UBO modifiers in the same rule if (uboSelector.modifiers && uboSelector.modifiers.children.length > 0) { // If modifiers are present, that means that the ADG modifier list was parsed expectCommonOrSpecificSyntax(AdblockSyntax.Ubo); // Change the syntax to uBO syntax = AdblockSyntax.Ubo; // Store the rule modifiers // Please note that not each special uBO modifier is a rule modifier, some of them are // used for CSS injection, for example `:style()` and `:remove()` for (const modifier of uboSelector.modifiers.children) { // TODO: Add support for matches-media and element hiding rules // TODO: Improve this condition if new uBO modifiers are added if (modifier.name.value === UboPseudoName.MatchesPath) { // Prepare the modifier list if it does not exist yet if (!modifiers) { modifiers = { type: 'ModifierList', children: [], }; if (options.isLocIncluded) { modifiers.start = baseOffset + bodyStart; modifiers.end = baseOffset + bodyEnd; } } modifiers.children.push(modifier); } } } } const raws = { text: raw, }; const baseRule = { category: RuleCategory.Cosmetic, exception, modifiers, domains, separator, }; if (options.includeRaws) { baseRule.raws = raws; } if (options.isLocIncluded) { baseRule.start = baseOffset; baseRule.end = baseOffset + raw.length; } const parseUboCssInjection = () => { if (!uboSelector || !uboSelector.modifiers || uboSelector.modifiers.children?.length < 1) { return null; } expectCommonOrSpecificSyntax(AdblockSyntax.Ubo); const selectorList = uboSelector.selector; let declarationList; let mediaQueryList; let remove = false; for (const modifier of uboSelector.modifiers.children) { switch (modifier.name.value) { case UboPseudoName.Style: declarationList = modifier.value; break; case UboPseudoName.Remove: declarationList = { type: 'Value', value: '', }; remove = true; break; case UboPseudoName.MatchesMedia: mediaQueryList = modifier.value; break; } } // If neither `:style()` nor `:remove()` is present if (!declarationList) { return null; } const body = { type: 'CssInjectionRuleBody', selectorList, declarationList, mediaQueryList, remove, }; if (options.isLocIncluded) { body.start = baseOffset + bodyStart; body.end = baseOffset + bodyEnd; } return { syntax: AdblockSyntax.Ubo, type: CosmeticRuleType.CssInjectionRule, body, }; }; const parseElementHiding = () => { const selectorList = { type: 'Value', value: rawBody, }; if (options.isLocIncluded) { selectorList.start = baseOffset + bodyStart; selectorList.end = baseOffset + bodyEnd; } const body = { type: 'ElementHidingRuleBody', selectorList, }; if (options.isLocIncluded) { body.start = baseOffset + bodyStart; body.end = baseOffset + bodyEnd; } return { syntax, type: CosmeticRuleType.ElementHidingRule, body, }; }; const parseAdgCssInjection = () => { // TODO: Improve this detection. Need to cover the following cases: // #$#body { color: red; // #$#@media (min-width: 100px) { body { color: red; } // ADG CSS injection if (!ADG_CSS_INJECTION_PATTERN.test(rawBody)) { return null; } expectCommonOrSpecificSyntax(AdblockSyntax.Adg); return { syntax: AdblockSyntax.Adg, type: CosmeticRuleType.CssInjectionRule, body: AdgCssInjectionParser.parse(rawBody, options, baseOffset + bodyStart), }; }; /** * Parses Adb CSS injection rules * eg: example.com##.foo { display: none; } * * @returns parsed rule */ const parseAbpCssInjection = () => { if (!options.parseAbpSpecificRules) { return null; } // check if the rule contains both CSS block open and close characters // if none of them is present we can stop parsing if (rawBody.indexOf(CSS_BLOCK_OPEN) === -1 && rawBody.indexOf(CSS_BLOCK_CLOSE) === -1) { return null; } if (!hasToken(rawBody, new Set([TokenType.OpenCurlyBracket, TokenType.CloseCurlyBracket]))) { return null; } // try to parse the raw body as an AdGuard CSS injection rule const body = AdgCssInjectionParser.parse(rawBody, options, baseOffset + bodyStart); // if the parsed rule type is a 'CssInjectionRuleBody', return the parsed rule return { syntax: AdblockSyntax.Abp, type: CosmeticRuleType.CssInjectionRule, body, }; }; const parseAbpSnippetInjection = () => { if (!options.parseAbpSpecificRules) { throw new AdblockSyntaxError(sprintf(ERROR_MESSAGES.SYNTAX_DISABLED, AdblockSyntax.Abp), baseOffset + bodyStart, baseOffset + bodyEnd); } expectCommonOrSpecificSyntax(AdblockSyntax.Abp); const body = AbpSnippetInjectionBodyParser.parse(rawBody, options, baseOffset + bodyStart); if (options.isLocIncluded) { body.start = baseOffset + bodyStart; body.end = baseOffset + bodyEnd; } return { syntax: AdblockSyntax.Abp, type: CosmeticRuleType.ScriptletInjectionRule, body, }; }; const parseUboScriptletInjection = () => { if (!rawBody.startsWith(UBO_SCRIPTLET_MASK) && !rawBody.startsWith(UBO_SCRIPTLET_MASK_LEGACY)) { return null; } if (!options.parseUboSpecificRules) { throw new AdblockSyntaxError(sprintf(ERROR_MESSAGES.SYNTAX_DISABLED, AdblockSyntax.Ubo), baseOffset + bodyStart, baseOffset + bodyEnd); } expectCommonOrSpecificSyntax(AdblockSyntax.Ubo); const body = UboScriptletInjectionBodyParser.parse(rawBody, options, baseOffset + bodyStart); if (options.isLocIncluded) { body.start = baseOffset + bodyStart; body.end = baseOffset + bodyEnd; } return { syntax: AdblockSyntax.Ubo, type: CosmeticRuleType.ScriptletInjectionRule, body, }; }; const parseAdgScriptletInjection = () => { // ADG scriptlet injection if (!rawBody.startsWith(ADG_SCRIPTLET_MASK)) { return null; } expectCommonOrSpecificSyntax(AdblockSyntax.Adg); const body = AdgScriptletInjectionBodyParser.parse(rawBody, options, baseOffset + bodyStart); if (options.isLocIncluded) { body.start = baseOffset + bodyStart; body.end = baseOffset + bodyEnd; } return { syntax: AdblockSyntax.Adg, type: CosmeticRuleType.ScriptletInjectionRule, body, }; }; const parseAdgJsInjection = () => { expectCommonOrSpecificSyntax(AdblockSyntax.Adg); const body = { type: 'Value', value: rawBody, }; if (options.isLocIncluded) { body.start = baseOffset + bodyStart; body.end = baseOffset + bodyEnd; } return { syntax: AdblockSyntax.Adg, type: CosmeticRuleType.JsInjectionRule, body, }; }; const parseUboHtmlFiltering = () => { if (!rawBody.startsWith(UBO_HTML_MASK)) { return null; } if (!options.parseUboSpecificRules) { throw new AdblockSyntaxError(sprintf(ERROR_MESSAGES.SYNTAX_DISABLED, AdblockSyntax.Ubo), baseOffset + bodyStart, baseOffset + bodyEnd); } expectCommonOrSpecificSyntax(AdblockSyntax.Ubo); const body = { type: 'Value', value: rawBody, }; if (options.isLocIncluded) { body.start = baseOffset + bodyStart; body.end = baseOffset + bodyEnd; } return { syntax: AdblockSyntax.Ubo, type: CosmeticRuleType.HtmlFilteringRule, body, }; }; const parseAdgHtmlFiltering = () => { expectCommonOrSpecificSyntax(AdblockSyntax.Adg); const body = { type: 'Value', value: rawBody, }; if (options.isLocIncluded) { body.start = baseOffset + bodyStart; body.end = baseOffset + bodyEnd; } return { syntax: AdblockSyntax.Adg, type: CosmeticRuleType.HtmlFilteringRule, body, }; }; // Create a fast lookup table for cosmetic rule separators and their parsing functions. // One separator can have multiple parsing functions. If the first function returns null, // the next function is called, and so on. // If all functions return null, an error should be thrown. const separatorMap = { '##': [ parseUboHtmlFiltering, parseUboScriptletInjection, parseUboCssInjection, parseAbpCssInjection, parseElementHiding, ], '#@#': [ parseUboHtmlFiltering, parseUboScriptletInjection, parseUboCssInjection, parseAbpCssInjection, parseElementHiding, ], '#?#': [parseUboCssInjection, parseAbpCssInjection, parseElementHiding], '#@?#': [parseUboCssInjection, parseAbpCssInjection, parseElementHiding], '#$#': [parseAdgCssInjection, parseAbpSnippetInjection], '#@$#': [parseAdgCssInjection, parseAbpSnippetInjection], '#$?#': [parseAdgCssInjection], '#@$?#': [parseAdgCssInjection], '#%#': [parseAdgScriptletInjection, parseAdgJsInjection], '#@%#': [parseAdgScriptletInjection, parseAdgJsInjection], $$: [parseAdgHtmlFiltering], '$@$': [parseAdgHtmlFiltering], }; const parseFunctions = separatorMap[separatorResult.separator]; let restProps; for (const parseFunction of parseFunctions) { restProps = parseFunction(); if (restProps) { break; } } // If none of the parsing functions returned a result, it means that the rule is unknown / invalid. if (!restProps) { throw new AdblockSyntaxError(sprintf(ERROR_MESSAGES.INVALID_BODY_FOR_SEPARATOR, rawBody, separatorResult.separator), baseOffset + bodyStart, baseOffset + bodyEnd); } // Combine the base rule with the rest of the properties. return { ...baseRule, ...restProps, }; } } export { CosmeticRuleParser, ERROR_MESSAGES };