UNPKG

@adguard/agtree

Version:

Tool set for working with adblock filter lists

github.com/AdguardTeam/tsurlfilter/tree/master/packages/agtree

AdguardTeam/tsurlfilter

354 lines (351 loc) • 13.7 kB

JavaScript

/* * AGTree v3.2.2 (build date: Tue, 08 Jul 2025 13:39:47 GMT) * (c) 2025 Adguard Software Ltd. * Released under the MIT license * https://github.com/AdguardTeam/tsurlfilter/tree/master/packages/agtree#readme */ import { TokenType, getFormattedTokenName } from '@adguard/css-tokenizer'; import { sprintf } from 'sprintf-js'; import { tokenizeBalanced } from './balancing.js'; import { EMPTY } from '../../utils/constants.js'; import { AdblockSyntaxError } from '../../errors/adblock-syntax-error.js'; import { ERROR_MESSAGES, END_OF_INPUT } from './constants.js'; import { EXT_CSS_PSEUDO_CLASSES, EXT_CSS_PSEUDO_CLASSES_STRICT, LEGACY_EXT_CSS_ATTRIBUTE_PREFIX, ABP_EXT_CSS_PREFIX } from '../../converter/data/css.js'; /** * @file CSS token stream. */ /** * Represents a stream of CSS tokens. */ class CssTokenStream { /** * The tokens in the stream. */ tokens = []; /** * The source string. */ source = EMPTY; /** * The current index in the stream. */ index = 0; /** * The base offset of the source string. */ baseOffset; /** * Initializes a new instance of the TokenStream class. * * @param source The source string to tokenize. * @param baseOffset The base offset of the source string. */ constructor(source, baseOffset = 0) { this.source = source; // Tokenize the source string with the CSS tokenizer and add balance level to each token. // 'onToken' callback is invoked when a token is found in the source string. // Passed parameters: // - type: type of the token // - start: start index of the token // - end: end index of the token // - props: additional properties of the token, if any (we don't use it here, this is why we use underscore) // - balance: balance level of the token try { tokenizeBalanced(source, (type, start, end, _, balance) => { this.tokens.push({ type, start, end, balance, }); }); } catch (error) { // If the error is an AdblockSyntaxError, adjust the error positions to the base offset if (error instanceof AdblockSyntaxError) { error.start += baseOffset; error.end += baseOffset; throw error; } } this.index = 0; this.baseOffset = baseOffset; } /** * Gets the number of tokens in the stream. * * @returns The number of tokens in the stream. */ get length() { return this.tokens.length; } /** * Checks if the end of the token stream is reached. * * @returns True if the end of the stream is reached, otherwise false. */ isEof() { return this.index >= this.tokens.length; } /** * Gets the token at the specified index. * * @param index The index of the token to retrieve. * @returns The token at the specified index or undefined if the index is out of bounds. */ get(index = this.index) { return this.tokens[index]; } /** * Gets the token at the specified index or throws if no token is found at the specified index. * * @param index The index of the token to retrieve. * @returns The token at the specified index or undefined if the index is out of bounds. * @throws If no token is found at the specified index. */ getOrFail(index = this.index) { const token = this.get(index); if (!token) { throw new AdblockSyntaxError(sprintf(ERROR_MESSAGES.EXPECTED_ANY_TOKEN_BUT_GOT, END_OF_INPUT), this.baseOffset + this.source.length - 1, this.baseOffset + this.source.length); } return token; } /** * Gets the source fragment of the token at the specified index. * * @param index The index of the token to retrieve the fragment for. * @returns The source fragment of the token or an empty string if the index is out of bounds. */ fragment(index = this.index) { const token = this.get(index); if (token) { return this.source.slice(token.start, token.end); } return EMPTY; } /** * Moves the index to the next token and returns it. * * @returns The next token or undefined if the end of the stream is reached. */ advance() { if (this.isEof()) { return undefined; } this.index += 1; return this.tokens[this.index]; } /** * Looks ahead in the stream without changing the index. * * @param index The relative index to look ahead to, starting from the current index. * @returns The next token or undefined if the end of the stream is reached. */ lookahead(index = 1) { return this.tokens[this.index + Math.max(1, index)]; } /** * Looks behind in the stream without changing the index. * * @param index The relative index to look behind to, starting from the current index. * @returns The previous token or undefined if the current token is the first in the stream. */ lookbehind(index = 1) { if (this.index === 0) { return undefined; } return this.tokens[this.index - Math.max(1, index)]; } /** * Looks behind in the stream for the previous non-whitespace token without changing the index. * * @returns The previous non-whitespace token or undefined if it could not be found. */ lookbehindForNonWs() { for (let i = this.index - 1; i >= 0; i -= 1) { if (this.tokens[i].type !== TokenType.Whitespace) { return this.tokens[i]; } } return undefined; } /** * Skips whitespace tokens in the stream. */ skipWhitespace() { while (this.get()?.type === TokenType.Whitespace) { this.index += 1; } } /** * Skips tokens until the current balance level is reached. * * @returns The number of tokens skipped. */ skipUntilBalanced() { if (this.isEof()) { return 0; } // It is safe to use ! here, because we check for EOF above // eslint-disable-next-line @typescript-eslint/no-non-null-assertion const currentBalance = this.get().balance; // If the current balance is 0, do nothing if (currentBalance === 0) { return 0; } // Otherwise, skip tokens until the balance is the current balance - 1 let skipped = 0; while (!this.isEof() && this.get()?.balance !== currentBalance - 1) { this.index += 1; skipped += 1; } return skipped; } /** * Skips tokens until a token with the specified type or the end of the stream is reached. * * @param type The type of token to skip until. * @param balance The balance level of the token to skip until. * @returns The number of tokens skipped. */ skipUntil(type, balance) { let skipped = 0; while (!this.isEof() && (this.get()?.type !== type || (balance !== undefined && this.get()?.balance !== balance))) { this.index += 1; skipped += 1; } return skipped; } /** * Skips tokens until a token with the specified type or the end of the stream is reached. This is an extended * version of skipUntil that also returns the number of tokens skipped without calculating leading and trailing * whitespace tokens. * * @param type The type of token to skip until. * @param balance The balance level of the token to skip until. * @returns An array containing the number of tokens skipped and the number of tokens skipped without leading and * trailing whitespace tokens. */ skipUntilExt(type, balance) { let i = this.index; let firstNonWsToken = -1; // -1 means no non-whitespace token found yet let lastNonWsToken = -1; // -1 means no non-whitespace token found yet while (i < this.tokens.length) { const currentToken = this.tokens[i]; if (currentToken.type === TokenType.Whitespace) { i += 1; continue; } else if (currentToken.type === type && currentToken.balance === balance) { break; } if (firstNonWsToken === -1) { firstNonWsToken = i; } lastNonWsToken = i; i += 1; } const skipped = i - this.index; this.index = i; return { skipped, // if firstNonWsToken is -1, then lastNonWsToken is also -1 skippedTrimmed: firstNonWsToken === -1 ? 0 : lastNonWsToken - firstNonWsToken + 1, }; } /** * Expects that the end of the stream is not reached. */ expectNotEof() { if (this.isEof()) { throw new AdblockSyntaxError('Unexpected end of input', this.baseOffset + this.source.length - 1, this.baseOffset + this.source.length); } } /** * Expects the current token to have a specific type and optional value and balance level. * * @param type The expected token type. * @param data Optional expectation data. * @throws If the end of the stream is reached or if the token type or expectation data does not match. */ expect(type, data) { const token = this.get(); if (!token) { throw new AdblockSyntaxError(sprintf(ERROR_MESSAGES.EXPECTED_TOKEN_BUT_GOT, getFormattedTokenName(type), END_OF_INPUT), this.baseOffset + this.source.length - 1, this.baseOffset + this.source.length); } if (token.type !== type) { throw new AdblockSyntaxError(sprintf(ERROR_MESSAGES.EXPECTED_TOKEN_BUT_GOT, getFormattedTokenName(type), getFormattedTokenName(token.type)), this.baseOffset + token.start, this.baseOffset + token.end); } if (data?.balance !== undefined && token.balance !== data.balance) { throw new AdblockSyntaxError(sprintf(ERROR_MESSAGES.EXPECTED_TOKEN_WITH_BALANCE_BUT_GOT, getFormattedTokenName(type), data.balance, token.balance), this.baseOffset + token.start, this.baseOffset + token.end); } if (data?.value && this.fragment() !== data.value) { throw new AdblockSyntaxError(sprintf(ERROR_MESSAGES.EXPECTED_TOKEN_WITH_VALUE_BUT_GOT, getFormattedTokenName(type), data.value, this.fragment()), this.baseOffset + token.start, this.baseOffset + token.end); } } /** * Gets the balance level of the token at the specified index. * * @param index The index of the token to retrieve the balance level for. * @returns The balance level of the token or 0 if the index is out of bounds. */ getBalance(index = this.index) { return this.tokens[index]?.balance || 0; } /** * Checks whether the token stream contains any Extended CSS elements, such as `:contains()`, etc. * * @returns `true` if the stream contains any Extended CSS elements, otherwise `false`. */ hasAnySelectorExtendedCssNode() { return this.hasAnySelectorExtendedCssNodeInternal(EXT_CSS_PSEUDO_CLASSES); } /** * Strictly checks whether the token stream contains any Extended CSS elements, such as `:contains()`. * Some Extended CSS elements are natively supported by browsers, like `:has()`. * This method is used to check for Extended CSS elements that are not natively supported by browsers, * this is why it called "strict", because it strictly checks for Extended CSS elements. * * @returns `true` if the stream contains any Extended CSS elements, otherwise `false`. */ hasAnySelectorExtendedCssNodeStrict() { return this.hasAnySelectorExtendedCssNodeInternal(EXT_CSS_PSEUDO_CLASSES_STRICT); } /** * Checks whether the token stream contains any Extended CSS elements, such as `:has()`, `:contains()`, etc. * * @param pseudos Set of pseudo-classes to check for. * * @returns `true` if the stream contains any Extended CSS elements, otherwise `false`. */ hasAnySelectorExtendedCssNodeInternal(pseudos) { for (let i = 0; i < this.tokens.length; i += 1) { const token = this.tokens[i]; if (token.type === TokenType.Function) { const name = this.source.slice(token.start, token.end - 1); // omit the last parenthesis if (pseudos.has(name)) { return true; } } else if (token.type === TokenType.OpenSquareBracket) { let j = i + 1; // skip whitespace while (j < this.tokens.length && this.tokens[j].type === TokenType.Whitespace) { j += 1; } if (j < this.tokens.length && this.tokens[j].type === TokenType.Ident) { const attr = this.source.slice(this.tokens[j].start, this.tokens[j].end); // [-ext-<name>=...] or [-abp-<name>=...] if (attr.startsWith(LEGACY_EXT_CSS_ATTRIBUTE_PREFIX) || attr.startsWith(ABP_EXT_CSS_PREFIX)) { return true; } } // do not check these tokens again i = j; } } return false; } } export { CssTokenStream };