UNPKG

@adguard/agtree

Version:

Tool set for working with adblock filter lists

github.com/AdguardTeam/tsurlfilter/tree/master/packages/agtree

AdguardTeam/tsurlfilter

117 lines (114 loc) • 5.86 kB

JavaScript

/* * AGTree v3.2.2 (build date: Tue, 08 Jul 2025 13:39:47 GMT) * (c) 2025 Adguard Software Ltd. * Released under the MIT license * https://github.com/AdguardTeam/tsurlfilter/tree/master/packages/agtree#readme */ import { ListItemNodeType } from '../../nodes/index.js'; import { defaultParserOptions } from '../options.js'; import { COMMA, NEGATION_MARKER } from '../../utils/constants.js'; import { StringUtils } from '../../utils/string.js'; import { AdblockSyntaxError } from '../../errors/adblock-syntax-error.js'; /** * Prefixes for error messages which are used for parsing of value lists. */ const LIST_PARSE_ERROR_PREFIX = { EMPTY_ITEM: 'Empty value specified in the list', NO_MULTIPLE_NEGATION: 'Exception marker cannot be followed by another exception marker', NO_SEPARATOR_AFTER_NEGATION: 'Exception marker cannot be followed by a separator', NO_SEPARATOR_AT_THE_BEGINNING: 'Value list cannot start with a separator', NO_SEPARATOR_AT_THE_END: 'Value list cannot end with a separator', NO_WHITESPACE_AFTER_NEGATION: 'Exception marker cannot be followed by whitespace', }; /** * Parser for list items in modifiers. */ class ListItemsParser { /** * Parses a `raw` modifier value which may be represented as a list of items separated by `separator`. * Needed for $app, $denyallow, $domain, $method. * * @param raw Raw input to parse. * @param options Global parser options. * @param baseOffset Starting offset of the input. Node locations are calculated relative to this offset. * @param separator Separator character (default: comma) * @param type Type of the list items (default: {@link ListItemNodeType.Domain}). * @template T Type of the list items. * * @returns List of parsed items. * @throws An {@link AdblockSyntaxError} if the list is syntactically invalid * * @example * - parses an app list — `com.example.app|Example.exe` * - parses a domain list — `example.com,example.org,~example.org` or `example.com|~example.org` * - parses a method list — `~post|~put` */ static parse(raw, options = defaultParserOptions, baseOffset = 0, separator = COMMA, type = ListItemNodeType.Unknown) { // Function body here const rawListItems = []; let offset = 0; // Skip whitespace before the list offset = StringUtils.skipWS(raw, offset); // If the first character is a separator, then the list is invalid // and no need to continue parsing if (raw[offset] === separator) { throw new AdblockSyntaxError(LIST_PARSE_ERROR_PREFIX.NO_SEPARATOR_AT_THE_BEGINNING, baseOffset + offset, baseOffset + raw.length); } // If the last character is a separator, then the list item is invalid // and no need to continue parsing const realEndIndex = StringUtils.skipWSBack(raw); if (raw[realEndIndex] === separator) { throw new AdblockSyntaxError(LIST_PARSE_ERROR_PREFIX.NO_SEPARATOR_AT_THE_END, baseOffset + realEndIndex, baseOffset + realEndIndex + 1); } // Split list items by unescaped separators while (offset < raw.length) { // Skip whitespace before the list item offset = StringUtils.skipWS(raw, offset); let itemStart = offset; // Find the index of the first unescaped separator character const separatorStartIndex = StringUtils.findNextUnescapedCharacter(raw, separator, offset); const itemEnd = separatorStartIndex === -1 ? StringUtils.skipWSBack(raw) + 1 : StringUtils.skipWSBack(raw, separatorStartIndex - 1) + 1; const exception = raw[itemStart] === NEGATION_MARKER; // Skip the exception marker if (exception) { itemStart += 1; const item = raw[itemStart]; // Exception marker cannot be followed by another exception marker if (item === NEGATION_MARKER) { throw new AdblockSyntaxError(LIST_PARSE_ERROR_PREFIX.NO_MULTIPLE_NEGATION, baseOffset + itemStart, baseOffset + itemStart + 1); } // Exception marker cannot be followed by a separator if (item === separator) { throw new AdblockSyntaxError(LIST_PARSE_ERROR_PREFIX.NO_SEPARATOR_AFTER_NEGATION, baseOffset + itemStart, baseOffset + itemStart + 1); } // Exception marker cannot be followed by whitespace if (StringUtils.isWhitespace(item)) { throw new AdblockSyntaxError(LIST_PARSE_ERROR_PREFIX.NO_WHITESPACE_AFTER_NEGATION, baseOffset + itemStart, baseOffset + itemStart + 1); } } // List item can't be empty // Note we use '<=' instead of '===' because we have bidirectional trim // This is needed to handle cases like 'example.com, ,example.org' if (itemEnd <= itemStart) { throw new AdblockSyntaxError(LIST_PARSE_ERROR_PREFIX.EMPTY_ITEM, baseOffset + itemStart, baseOffset + raw.length); } const listItem = { type, value: raw.slice(itemStart, itemEnd), exception, }; if (options.isLocIncluded) { listItem.start = baseOffset + itemStart; listItem.end = baseOffset + itemEnd; } // Collect list item rawListItems.push(listItem); // Increment the offset to the next list item (or the end of the string) offset = separatorStartIndex === -1 ? raw.length : separatorStartIndex + 1; } return rawListItems; } } export { LIST_PARSE_ERROR_PREFIX, ListItemsParser };