@adguard/agtree
Version:
Tool set for working with adblock filter lists
354 lines (351 loc) • 13.7 kB
JavaScript
/*
* AGTree v3.2.2 (build date: Tue, 08 Jul 2025 13:39:47 GMT)
* (c) 2025 Adguard Software Ltd.
* Released under the MIT license
* https://github.com/AdguardTeam/tsurlfilter/tree/master/packages/agtree#readme
*/
import { TokenType, getFormattedTokenName } from '@adguard/css-tokenizer';
import { sprintf } from 'sprintf-js';
import { tokenizeBalanced } from './balancing.js';
import { EMPTY } from '../../utils/constants.js';
import { AdblockSyntaxError } from '../../errors/adblock-syntax-error.js';
import { ERROR_MESSAGES, END_OF_INPUT } from './constants.js';
import { EXT_CSS_PSEUDO_CLASSES, EXT_CSS_PSEUDO_CLASSES_STRICT, LEGACY_EXT_CSS_ATTRIBUTE_PREFIX, ABP_EXT_CSS_PREFIX } from '../../converter/data/css.js';
/**
* @file CSS token stream.
*/
/**
* Represents a stream of CSS tokens.
*/
class CssTokenStream {
/**
* The tokens in the stream.
*/
tokens = [];
/**
* The source string.
*/
source = EMPTY;
/**
* The current index in the stream.
*/
index = 0;
/**
* The base offset of the source string.
*/
baseOffset;
/**
* Initializes a new instance of the TokenStream class.
*
* @param source The source string to tokenize.
* @param baseOffset The base offset of the source string.
*/
constructor(source, baseOffset = 0) {
this.source = source;
// Tokenize the source string with the CSS tokenizer and add balance level to each token.
// 'onToken' callback is invoked when a token is found in the source string.
// Passed parameters:
// - type: type of the token
// - start: start index of the token
// - end: end index of the token
// - props: additional properties of the token, if any (we don't use it here, this is why we use underscore)
// - balance: balance level of the token
try {
tokenizeBalanced(source, (type, start, end, _, balance) => {
this.tokens.push({
type,
start,
end,
balance,
});
});
}
catch (error) {
// If the error is an AdblockSyntaxError, adjust the error positions to the base offset
if (error instanceof AdblockSyntaxError) {
error.start += baseOffset;
error.end += baseOffset;
throw error;
}
}
this.index = 0;
this.baseOffset = baseOffset;
}
/**
* Gets the number of tokens in the stream.
*
* @returns The number of tokens in the stream.
*/
get length() {
return this.tokens.length;
}
/**
* Checks if the end of the token stream is reached.
*
* @returns True if the end of the stream is reached, otherwise false.
*/
isEof() {
return this.index >= this.tokens.length;
}
/**
* Gets the token at the specified index.
*
* @param index The index of the token to retrieve.
* @returns The token at the specified index or undefined if the index is out of bounds.
*/
get(index = this.index) {
return this.tokens[index];
}
/**
* Gets the token at the specified index or throws if no token is found at the specified index.
*
* @param index The index of the token to retrieve.
* @returns The token at the specified index or undefined if the index is out of bounds.
* @throws If no token is found at the specified index.
*/
getOrFail(index = this.index) {
const token = this.get(index);
if (!token) {
throw new AdblockSyntaxError(sprintf(ERROR_MESSAGES.EXPECTED_ANY_TOKEN_BUT_GOT, END_OF_INPUT), this.baseOffset + this.source.length - 1, this.baseOffset + this.source.length);
}
return token;
}
/**
* Gets the source fragment of the token at the specified index.
*
* @param index The index of the token to retrieve the fragment for.
* @returns The source fragment of the token or an empty string if the index is out of bounds.
*/
fragment(index = this.index) {
const token = this.get(index);
if (token) {
return this.source.slice(token.start, token.end);
}
return EMPTY;
}
/**
* Moves the index to the next token and returns it.
*
* @returns The next token or undefined if the end of the stream is reached.
*/
advance() {
if (this.isEof()) {
return undefined;
}
this.index += 1;
return this.tokens[this.index];
}
/**
* Looks ahead in the stream without changing the index.
*
* @param index The relative index to look ahead to, starting from the current index.
* @returns The next token or undefined if the end of the stream is reached.
*/
lookahead(index = 1) {
return this.tokens[this.index + Math.max(1, index)];
}
/**
* Looks behind in the stream without changing the index.
*
* @param index The relative index to look behind to, starting from the current index.
* @returns The previous token or undefined if the current token is the first in the stream.
*/
lookbehind(index = 1) {
if (this.index === 0) {
return undefined;
}
return this.tokens[this.index - Math.max(1, index)];
}
/**
* Looks behind in the stream for the previous non-whitespace token without changing the index.
*
* @returns The previous non-whitespace token or undefined if it could not be found.
*/
lookbehindForNonWs() {
for (let i = this.index - 1; i >= 0; i -= 1) {
if (this.tokens[i].type !== TokenType.Whitespace) {
return this.tokens[i];
}
}
return undefined;
}
/**
* Skips whitespace tokens in the stream.
*/
skipWhitespace() {
while (this.get()?.type === TokenType.Whitespace) {
this.index += 1;
}
}
/**
* Skips tokens until the current balance level is reached.
*
* @returns The number of tokens skipped.
*/
skipUntilBalanced() {
if (this.isEof()) {
return 0;
}
// It is safe to use ! here, because we check for EOF above
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
const currentBalance = this.get().balance;
// If the current balance is 0, do nothing
if (currentBalance === 0) {
return 0;
}
// Otherwise, skip tokens until the balance is the current balance - 1
let skipped = 0;
while (!this.isEof() && this.get()?.balance !== currentBalance - 1) {
this.index += 1;
skipped += 1;
}
return skipped;
}
/**
* Skips tokens until a token with the specified type or the end of the stream is reached.
*
* @param type The type of token to skip until.
* @param balance The balance level of the token to skip until.
* @returns The number of tokens skipped.
*/
skipUntil(type, balance) {
let skipped = 0;
while (!this.isEof()
&& (this.get()?.type !== type || (balance !== undefined && this.get()?.balance !== balance))) {
this.index += 1;
skipped += 1;
}
return skipped;
}
/**
* Skips tokens until a token with the specified type or the end of the stream is reached. This is an extended
* version of skipUntil that also returns the number of tokens skipped without calculating leading and trailing
* whitespace tokens.
*
* @param type The type of token to skip until.
* @param balance The balance level of the token to skip until.
* @returns An array containing the number of tokens skipped and the number of tokens skipped without leading and
* trailing whitespace tokens.
*/
skipUntilExt(type, balance) {
let i = this.index;
let firstNonWsToken = -1; // -1 means no non-whitespace token found yet
let lastNonWsToken = -1; // -1 means no non-whitespace token found yet
while (i < this.tokens.length) {
const currentToken = this.tokens[i];
if (currentToken.type === TokenType.Whitespace) {
i += 1;
continue;
}
else if (currentToken.type === type && currentToken.balance === balance) {
break;
}
if (firstNonWsToken === -1) {
firstNonWsToken = i;
}
lastNonWsToken = i;
i += 1;
}
const skipped = i - this.index;
this.index = i;
return {
skipped,
// if firstNonWsToken is -1, then lastNonWsToken is also -1
skippedTrimmed: firstNonWsToken === -1 ? 0 : lastNonWsToken - firstNonWsToken + 1,
};
}
/**
* Expects that the end of the stream is not reached.
*/
expectNotEof() {
if (this.isEof()) {
throw new AdblockSyntaxError('Unexpected end of input', this.baseOffset + this.source.length - 1, this.baseOffset + this.source.length);
}
}
/**
* Expects the current token to have a specific type and optional value and balance level.
*
* @param type The expected token type.
* @param data Optional expectation data.
* @throws If the end of the stream is reached or if the token type or expectation data does not match.
*/
expect(type, data) {
const token = this.get();
if (!token) {
throw new AdblockSyntaxError(sprintf(ERROR_MESSAGES.EXPECTED_TOKEN_BUT_GOT, getFormattedTokenName(type), END_OF_INPUT), this.baseOffset + this.source.length - 1, this.baseOffset + this.source.length);
}
if (token.type !== type) {
throw new AdblockSyntaxError(sprintf(ERROR_MESSAGES.EXPECTED_TOKEN_BUT_GOT, getFormattedTokenName(type), getFormattedTokenName(token.type)), this.baseOffset + token.start, this.baseOffset + token.end);
}
if (data?.balance !== undefined && token.balance !== data.balance) {
throw new AdblockSyntaxError(sprintf(ERROR_MESSAGES.EXPECTED_TOKEN_WITH_BALANCE_BUT_GOT, getFormattedTokenName(type), data.balance, token.balance), this.baseOffset + token.start, this.baseOffset + token.end);
}
if (data?.value && this.fragment() !== data.value) {
throw new AdblockSyntaxError(sprintf(ERROR_MESSAGES.EXPECTED_TOKEN_WITH_VALUE_BUT_GOT, getFormattedTokenName(type), data.value, this.fragment()), this.baseOffset + token.start, this.baseOffset + token.end);
}
}
/**
* Gets the balance level of the token at the specified index.
*
* @param index The index of the token to retrieve the balance level for.
* @returns The balance level of the token or 0 if the index is out of bounds.
*/
getBalance(index = this.index) {
return this.tokens[index]?.balance || 0;
}
/**
* Checks whether the token stream contains any Extended CSS elements, such as `:contains()`, etc.
*
* @returns `true` if the stream contains any Extended CSS elements, otherwise `false`.
*/
hasAnySelectorExtendedCssNode() {
return this.hasAnySelectorExtendedCssNodeInternal(EXT_CSS_PSEUDO_CLASSES);
}
/**
* Strictly checks whether the token stream contains any Extended CSS elements, such as `:contains()`.
* Some Extended CSS elements are natively supported by browsers, like `:has()`.
* This method is used to check for Extended CSS elements that are not natively supported by browsers,
* this is why it called "strict", because it strictly checks for Extended CSS elements.
*
* @returns `true` if the stream contains any Extended CSS elements, otherwise `false`.
*/
hasAnySelectorExtendedCssNodeStrict() {
return this.hasAnySelectorExtendedCssNodeInternal(EXT_CSS_PSEUDO_CLASSES_STRICT);
}
/**
* Checks whether the token stream contains any Extended CSS elements, such as `:has()`, `:contains()`, etc.
*
* @param pseudos Set of pseudo-classes to check for.
*
* @returns `true` if the stream contains any Extended CSS elements, otherwise `false`.
*/
hasAnySelectorExtendedCssNodeInternal(pseudos) {
for (let i = 0; i < this.tokens.length; i += 1) {
const token = this.tokens[i];
if (token.type === TokenType.Function) {
const name = this.source.slice(token.start, token.end - 1); // omit the last parenthesis
if (pseudos.has(name)) {
return true;
}
}
else if (token.type === TokenType.OpenSquareBracket) {
let j = i + 1;
// skip whitespace
while (j < this.tokens.length && this.tokens[j].type === TokenType.Whitespace) {
j += 1;
}
if (j < this.tokens.length && this.tokens[j].type === TokenType.Ident) {
const attr = this.source.slice(this.tokens[j].start, this.tokens[j].end);
// [-ext-<name>=...] or [-abp-<name>=...]
if (attr.startsWith(LEGACY_EXT_CSS_ATTRIBUTE_PREFIX) || attr.startsWith(ABP_EXT_CSS_PREFIX)) {
return true;
}
}
// do not check these tokens again
i = j;
}
}
return false;
}
}
export { CssTokenStream };