@adguard/agtree
Version:
Tool set for working with adblock filter lists
600 lines (597 loc) • 22.4 kB
JavaScript
/*
* AGTree v3.4.3 (build date: Thu, 11 Dec 2025 13:43:19 GMT)
* (c) 2025 Adguard Software Ltd.
* Released under the MIT license
* https://github.com/AdguardTeam/tsurlfilter/tree/master/packages/agtree#readme
*/
import { ESCAPE_CHARACTER, REGEX_MARKER, SPACE, TAB, NUMBER_0, NUMBER_9, SMALL_LETTER_A, SMALL_LETTER_Z, CAPITAL_LETTER_A, CAPITAL_LETTER_Z, EMPTY, CR, LF, FF, CRLF } from './constants.js';
/**
* @file Utility functions for string manipulation.
*/
const SINGLE_QUOTE_MARKER = "'";
const DOUBLE_QUOTE_MARKER = '"';
/**
* Utility functions for string manipulation.
*/
class StringUtils {
/**
* Finds the first occurrence of a character that:
* - isn't preceded by an escape character
*
* @param pattern - Source pattern
* @param searchedCharacter - Searched character
* @param start - Start index
* @param escapeCharacter - Escape character, \ by default
* @param end - End index (excluded)
* @returns Index or -1 if the character not found
*/
static findNextUnescapedCharacter(pattern, searchedCharacter, start = 0, escapeCharacter = ESCAPE_CHARACTER, end = pattern.length) {
for (let i = start; i < end; i += 1) {
// The searched character cannot be preceded by an escape
if (pattern[i] === searchedCharacter && pattern[i - 1] !== escapeCharacter) {
return i;
}
}
return -1;
}
/**
* Finds the first occurrence in backward direction of a character that isn't preceded by an escape character.
*
* @param pattern - Source pattern
* @param searchedCharacter - Searched character
* @param start - Start index
* @param escapeCharacter - Escape character, \ by default
* @param end - End index (Included)
* @returns Index or -1 if the character not found
*/
static findNextUnescapedCharacterBackwards(pattern, searchedCharacter, start = pattern.length - 1, escapeCharacter = ESCAPE_CHARACTER, end = 0) {
for (let i = start; i >= end; i -= 1) {
// The searched character cannot be preceded by an escape
if (pattern[i] === searchedCharacter && pattern[i - 1] !== escapeCharacter) {
return i;
}
}
return -1;
}
/**
* Finds the last occurrence of a character that:
* - isn't preceded by an escape character
*
* @param pattern - Source pattern
* @param searchedCharacter - Searched character
* @param escapeCharacter - Escape character, \ by default
* @returns Index or -1 if the character not found
*/
static findLastUnescapedCharacter(pattern, searchedCharacter, escapeCharacter = ESCAPE_CHARACTER) {
for (let i = pattern.length - 1; i >= 0; i -= 1) {
// The searched character cannot be preceded by an escape
if (pattern[i] === searchedCharacter && pattern[i - 1] !== escapeCharacter) {
return i;
}
}
return -1;
}
/**
* Finds the next occurrence of a character that:
* - isn't preceded by an escape character
* - isn't followed by the specified character
*
* @param pattern - Source pattern
* @param start - Start index
* @param searchedCharacter - Searched character
* @param notFollowedBy - Searched character not followed by this character
* @param escapeCharacter - Escape character, \ by default
* @returns Index or -1 if the character not found
*/
static findNextUnescapedCharacterThatNotFollowedBy(pattern, start, searchedCharacter, notFollowedBy, escapeCharacter = ESCAPE_CHARACTER) {
for (let i = start; i < pattern.length; i += 1) {
// The searched character cannot be preceded by an escape
if (pattern[i] === searchedCharacter
&& pattern[i + 1] !== notFollowedBy
&& pattern[i - 1] !== escapeCharacter) {
return i;
}
}
return -1;
}
/**
* Finds the last occurrence of a character that:
* - isn't preceded by an escape character
* - isn't followed by the specified character
*
* @param pattern - Source pattern
* @param searchedCharacter - Searched character
* @param notFollowedBy - Searched character not followed by this character
* @param escapeCharacter - Escape character, \ by default
* @returns Index or -1 if the character not found
*/
static findLastUnescapedCharacterThatNotFollowedBy(pattern, searchedCharacter, notFollowedBy, escapeCharacter = ESCAPE_CHARACTER) {
for (let i = pattern.length - 1; i >= 0; i -= 1) {
// The searched character cannot be preceded by an escape
if (pattern[i] === searchedCharacter
&& pattern[i + 1] !== notFollowedBy
&& pattern[i - 1] !== escapeCharacter) {
return i;
}
}
return -1;
}
/**
* Finds the next occurrence of a character that:
* - isn't part of any string literal ('literal' or "literal")
* - isn't part of any RegExp expression (/regexp/)
*
* @param pattern - Source pattern
* @param searchedCharacter - Searched character
* @param start - Start index
* @returns Index or -1 if the character not found
*/
static findUnescapedNonStringNonRegexChar(pattern, searchedCharacter, start = 0) {
let open = null;
for (let i = start; i < pattern.length; i += 1) {
if ((pattern[i] === SINGLE_QUOTE_MARKER
|| pattern[i] === DOUBLE_QUOTE_MARKER
|| pattern[i] === REGEX_MARKER)
&& pattern[i - 1] !== ESCAPE_CHARACTER) {
if (open === pattern[i]) {
open = null;
}
else if (open === null) {
open = pattern[i];
}
}
else if (open === null && pattern[i] === searchedCharacter && pattern[i - 1] !== ESCAPE_CHARACTER) {
return i;
}
}
return -1;
}
/**
* Finds the last occurrence of a character that is:
* - not part of any string literal ('literal' or "literal")
* - not part of any RegExp expression (/regexp/)
* - not preceded by an escape character.
*
* Searches backwards from the end of the pattern.
*
* @param pattern Source pattern.
* @param searchedCharacter Searched character.
* @param escapeCharacter Escape character, `\` by default.
*
* @returns Index of the character or -1 if the character not found.
*/
static findLastUnescapedNonStringNonRegexChar(pattern, searchedCharacter, escapeCharacter = ESCAPE_CHARACTER) {
let open = null;
// Search backwards through the pattern
for (let i = pattern.length - 1; i >= 0; i -= 1) {
if ((pattern[i] === SINGLE_QUOTE_MARKER
|| pattern[i] === DOUBLE_QUOTE_MARKER
|| pattern[i] === REGEX_MARKER)
&& pattern[i - 1] !== escapeCharacter) {
// When searching backwards,
// we close when we see the marker and are already inside,
// and open when we see it and are not inside.
if (open === pattern[i]) {
open = null;
}
else if (open === null) {
open = pattern[i];
}
}
else if (open === null
&& pattern[i] === searchedCharacter
&& pattern[i - 1] !== escapeCharacter) {
return i;
}
}
return -1;
}
/**
* Finds the next occurrence of a character that:
* - isn't part of any string literal ('literal' or "literal")
* - isn't preceded by an escape character
*
* @param pattern - Source pattern
* @param searchedCharacter - Searched character
* @param start - Start index
* @param escapeCharacter - Escape character, \ by default
* @returns Index or -1 if the character not found
*/
static findNextUnquotedUnescapedCharacter(pattern, searchedCharacter, start = 0, escapeCharacter = ESCAPE_CHARACTER) {
let openQuote = null;
for (let i = start; i < pattern.length; i += 1) {
// Unescaped ' or "
if ((pattern[i] === SINGLE_QUOTE_MARKER || pattern[i] === DOUBLE_QUOTE_MARKER)
&& pattern[i - 1] !== escapeCharacter) {
if (!openQuote)
openQuote = pattern[i];
else if (openQuote === pattern[i])
openQuote = null;
}
else if (pattern[i] === searchedCharacter && pattern[i - 1] !== escapeCharacter) {
// Unescaped character
if (!openQuote) {
return i;
}
}
}
return -1;
}
/**
* Finds the next occurrence of a character that:
* - isn't "bracketed"
* - isn't preceded by an escape character
*
* @param pattern - Source pattern
* @param searchedCharacter - Searched character
* @param start - Start index
* @param escapeCharacter - Escape character, \ by default
* @param openBracket - Open bracket, ( by default
* @param closeBracket - Close bracket, ( by default
* @throws If the opening and closing brackets are the same
* @returns Index or -1 if the character not found
*/
static findNextNotBracketedUnescapedCharacter(pattern, searchedCharacter, start = 0, escapeCharacter = ESCAPE_CHARACTER, openBracket = '(', closeBracket = ')') {
if (openBracket === closeBracket) {
throw new Error('Open and close bracket cannot be the same');
}
let depth = 0;
for (let i = start; i < pattern.length; i += 1) {
if (pattern[i] === openBracket) {
depth += 1;
}
else if (pattern[i] === closeBracket) {
depth -= 1;
}
else if (depth < 1 && pattern[i] === searchedCharacter && pattern[i - 1] !== escapeCharacter) {
return i;
}
}
return -1;
}
/**
* Splits the source pattern along characters that:
* - isn't part of any string literal ('literal' or "literal")
* - isn't preceded by an escape character
*
* @param pattern - Source pattern
* @param delimeterCharacter - Delimeter character
* @returns Splitted string
*/
static splitStringByUnquotedUnescapedCharacter(pattern, delimeterCharacter) {
const parts = [];
let delimeterIndex = -1;
do {
const prevDelimeterIndex = delimeterIndex;
delimeterIndex = StringUtils.findNextUnquotedUnescapedCharacter(pattern, delimeterCharacter, delimeterIndex + 1);
if (delimeterIndex !== -1) {
parts.push(pattern.substring(prevDelimeterIndex + 1, delimeterIndex));
}
else {
parts.push(pattern.substring(prevDelimeterIndex + 1, pattern.length));
}
} while (delimeterIndex !== -1);
return parts;
}
/**
* Splits the source pattern along characters that:
* - isn't part of any string literal ('literal' or "literal")
* - isn't part of any RegExp expression (/regexp/)
* - isn't preceded by an escape character
*
* @param pattern - Source pattern
* @param delimeterCharacter - Delimeter character
* @returns Splitted string
*/
static splitStringByUnescapedNonStringNonRegexChar(pattern, delimeterCharacter) {
const parts = [];
let delimeterIndex = -1;
do {
const prevDelimeterIndex = delimeterIndex;
delimeterIndex = StringUtils.findUnescapedNonStringNonRegexChar(pattern, delimeterCharacter, delimeterIndex + 1);
if (delimeterIndex !== -1) {
parts.push(pattern.substring(prevDelimeterIndex + 1, delimeterIndex));
}
else {
parts.push(pattern.substring(prevDelimeterIndex + 1, pattern.length));
}
} while (delimeterIndex !== -1);
return parts;
}
/**
* Splits the source pattern along characters that:
* - isn't preceded by an escape character
*
* @param pattern - Source pattern
* @param delimeterCharacter - Delimeter character
* @returns Splitted string
*/
static splitStringByUnescapedCharacter(pattern, delimeterCharacter) {
const parts = [];
let delimeterIndex = -1;
do {
const prevDelimeterIndex = delimeterIndex;
delimeterIndex = StringUtils.findNextUnescapedCharacter(pattern, delimeterCharacter, delimeterIndex + 1);
if (delimeterIndex !== -1) {
parts.push(pattern.substring(prevDelimeterIndex + 1, delimeterIndex));
}
else {
parts.push(pattern.substring(prevDelimeterIndex + 1, pattern.length));
}
} while (delimeterIndex !== -1);
return parts;
}
/**
* Determines whether the given character is a space or tab character.
*
* @param char - The character to check.
* @returns true if the given character is a space or tab character, false otherwise.
*/
static isWhitespace(char) {
return char === SPACE || char === TAB;
}
/**
* Checks if the given character is a digit.
*
* @param char The character to check.
* @returns `true` if the given character is a digit, `false` otherwise.
*/
static isDigit(char) {
return char >= NUMBER_0 && char <= NUMBER_9;
}
/**
* Checks if the given character is a small letter.
*
* @param char The character to check.
* @returns `true` if the given character is a small letter, `false` otherwise.
*/
static isSmallLetter(char) {
return char >= SMALL_LETTER_A && char <= SMALL_LETTER_Z;
}
/**
* Checks if the given character is a capital letter.
*
* @param char The character to check.
* @returns `true` if the given character is a capital letter, `false` otherwise.
*/
static isCapitalLetter(char) {
return char >= CAPITAL_LETTER_A && char <= CAPITAL_LETTER_Z;
}
/**
* Checks if the given character is a letter (small or capital).
*
* @param char The character to check.
* @returns `true` if the given character is a letter, `false` otherwise.
*/
static isLetter(char) {
return StringUtils.isSmallLetter(char) || StringUtils.isCapitalLetter(char);
}
/**
* Checks if the given character is a letter or a digit.
*
* @param char Character to check
* @returns `true` if the given character is a letter or a digit, `false` otherwise.
*/
static isAlphaNumeric(char) {
return StringUtils.isLetter(char) || StringUtils.isDigit(char);
}
/**
* Searches for the first non-whitespace character in the source pattern.
*
* @param pattern - Source pattern
* @param start - Start index
* @returns Index or -1 if the character not found
*/
static findFirstNonWhitespaceCharacter(pattern, start = 0) {
for (let i = start; i < pattern.length; i += 1) {
if (!StringUtils.isWhitespace(pattern[i])) {
return i;
}
}
return -1;
}
/**
* Searches for the last non-whitespace character in the source pattern.
*
* @param pattern - Source pattern
* @returns Index or -1 if the character not found
*/
static findLastNonWhitespaceCharacter(pattern) {
for (let i = pattern.length - 1; i >= 0; i -= 1) {
if (!StringUtils.isWhitespace(pattern[i])) {
return i;
}
}
return -1;
}
/**
* Finds the next whitespace character in the pattern.
*
* @param pattern Pattern to search in
* @param start Start index
* @returns Index of the next whitespace character or the length of the pattern if not found
*/
static findNextWhitespaceCharacter(pattern, start = 0) {
for (let i = start; i < pattern.length; i += 1) {
if (StringUtils.isWhitespace(pattern[i])) {
return i;
}
}
return pattern.length;
}
/**
* Escapes a specified character in the string.
*
* @param pattern - Input string
* @param character - Character to escape
* @param escapeCharacter - Escape character (optional)
* @returns Escaped string
*/
static escapeCharacter(pattern, character, escapeCharacter = ESCAPE_CHARACTER) {
let result = EMPTY;
for (let i = 0; i < pattern.length; i += 1) {
if (pattern[i] === character && pattern[i - 1] !== escapeCharacter) {
result += escapeCharacter;
}
result += pattern[i];
}
return result;
}
/**
* Searches for the next non-whitespace character in the source pattern.
*
* @param pattern Pattern to search
* @param start Start index
* @returns Index of the next non-whitespace character or the length of the pattern
*/
static skipWS(pattern, start = 0) {
let i = start;
while (i < pattern.length && StringUtils.isWhitespace(pattern[i])) {
i += 1;
}
return Math.min(i, pattern.length);
}
/**
* Searches for the previous non-whitespace character in the source pattern.
*
* @param pattern Pattern to search
* @param start Start index
* @returns Index of the previous non-whitespace character or -1
*/
static skipWSBack(pattern, start = pattern.length - 1) {
let i = start;
while (i >= 0 && StringUtils.isWhitespace(pattern[i])) {
i -= 1;
}
return Math.max(i, -1);
}
/**
* Checks if the given character is a new line character.
*
* @param char Character to check
* @returns `true` if the given character is a new line character, `false` otherwise.
*/
static isEOL(char) {
return char === CR || char === LF || char === FF;
}
/**
* Splits a string along newline characters.
*
* @param input - Input string
* @returns Splitted string
*/
static splitStringByNewLines(input) {
return input.split(/\r?\n/);
}
/**
* Splits a string by new lines and stores the new line type for each line
*
* @param input The input string to be split
* @returns An array of tuples, where each tuple contains a line of the input string and its
* corresponding new line type ("lf", "crlf", or "cr")
*/
static splitStringByNewLinesEx(input) {
// Array to store the tuples of line and new line type
const result = [];
let currentLine = EMPTY;
let newLineType = null;
// Iterate over each character in the input string
for (let i = 0; i < input.length; i += 1) {
const char = input[i];
if (char === CR) {
if (input[i + 1] === LF) {
newLineType = 'crlf';
i += 1;
}
else {
newLineType = 'cr';
}
result.push([currentLine, newLineType]);
currentLine = EMPTY;
newLineType = null;
}
else if (char === LF) {
newLineType = 'lf';
result.push([currentLine, newLineType]);
currentLine = EMPTY;
newLineType = null;
}
else {
currentLine += char;
}
}
if (result.length === 0 || currentLine !== EMPTY) {
result.push([currentLine, newLineType]);
}
return result;
}
/**
* Merges an array of tuples (line, newLineType) into a single string
*
* @param input The array of tuples to be merged
* @returns A single string containing the lines and new line characters from the input array
*/
static mergeStringByNewLines(input) {
let result = EMPTY;
// Iterate over each tuple in the input array
for (let i = 0; i < input.length; i += 1) {
const [line, newLineType] = input[i];
// Add the line to the result string
result += line;
// Add the appropriate new line character based on the newLineType
if (newLineType !== null) {
if (newLineType === 'crlf') {
result += CRLF;
}
else if (newLineType === 'cr') {
result += CR;
}
else {
result += LF;
}
}
}
return result;
}
/**
* Helper method to parse a raw string as a number
*
* @param raw Raw string to parse
* @returns Parsed number
* @throws If the raw string can't be parsed as a number
*/
static parseNumber(raw) {
const result = parseInt(raw, 10);
if (Number.isNaN(result)) {
throw new Error('Expected a number');
}
return result;
}
/**
* Checks if the given value is a string.
*
* @param value Value to check
* @returns `true` if the value is a string, `false` otherwise
*/
static isString(value) {
return typeof value === 'string';
}
/**
* Escapes the given characters in the input string.
*
* @param input Input string
* @param characters Characters to escape (by default, no characters are escaped)
* @returns Escaped string
*/
static escapeCharacters(input, characters = new Set()) {
let result = EMPTY;
for (let i = 0; i < input.length; i += 1) {
if (characters.has(input[i])) {
result += ESCAPE_CHARACTER;
}
result += input[i];
}
return result;
}
}
export { DOUBLE_QUOTE_MARKER, SINGLE_QUOTE_MARKER, StringUtils };