tag-soup

Version:

The fastest pure JS SAX/DOM XML/HTML parser.

92 lines (91 loc) • 3.62 kB

TypeScript

/** * The error thrown by a parser if a {@link ParserError.input input} is malformed. * * @group Parser */ export declare class ParserError extends SyntaxError { input: string; startIndex: number; endIndex: number; /** * Creates a new {@link ParserError} instance. * * @param message The error message. * @param input The text where an error was detected. * @param startIndex The index of the first char in text where an error was detected, inclusive. * @param endIndex The index of the last char in text where an error was detected, exclusive. */ constructor(message: string, input: string, startIndex?: number, endIndex?: number); } /** * A token that can be read from a text. * * @group Tokenizer */ export type Token = 'TEXT' | 'START_TAG_NAME' | 'START_TAG_CLOSING' | 'START_TAG_SELF_CLOSING' | 'END_TAG_NAME' | 'ATTRIBUTE_NAME' | 'ATTRIBUTE_VALUE' | 'COMMENT' | 'PROCESSING_INSTRUCTION_TARGET' | 'PROCESSING_INSTRUCTION_DATA' | 'CDATA_SECTION' | 'DOCTYPE_NAME'; /** * A callback that is invoked when a token is read from a text. * * @param token The token that was read. * @param startIndex The start index of the first meaningful token char, inclusive. * @param endIndex The end index of the last meaningful token char, exclusive. * @group Tokenizer */ export type TokenCallback = (token: Token, startIndex: number, endIndex: number) => void; /** * Options of {@link tokenizeMarkup}. */ export interface ResolvedTokenizerOptions extends TokenReaderOptions { voidTags?: Set<number>; implicitlyClosedTags?: Map<number, Set<number>>; implicitlyOpenedTags?: Set<number>; areUnbalancedStartTagsImplicitlyClosed?: boolean; areUnbalancedEndTagsIgnored?: boolean; } /** * Reads tokens from text and returns them by invoking a callback. * * Tokens are _guaranteed_ to be returned in correct order. Missing tokens are inserted to restore the correct order if * needed. * * @example * tokenizeMarkup( * 'Hello, <b>Bob</b>!', * (token, startIndex, endIndex) => { * // Handle token here * }, * resolveTokenizerOptions(htmlTokenizerOptions) * ); * * @param input The text to read tokens from. * @param callback The callback that is invoked when a token is read. * @param options Tokenizer options prepared by {@link resolveTokenizerOptions}. */ export declare function tokenizeMarkup(input: string, callback: TokenCallback, options?: ResolvedTokenizerOptions): void; export interface ContextualTokenReaderOptions { parentOptions?: ContextualTokenReaderOptions; foreignTags?: Map<number, ContextualTokenReaderOptions>; areSelfClosingTagsRecognized?: boolean; areCDATASectionsRecognized?: boolean; areProcessingInstructionsRecognized?: boolean; } export interface TokenReaderOptions extends ContextualTokenReaderOptions { readTag?: (input: string, startIndex: number, endIndex: number) => number; rawTextTags?: Set<number>; isFragment?: boolean; isStrict?: boolean; } /** * Reads tokens from the text and returns tokens by invoking a callback. * * Tokens returned in the same order they are listed in text. */ export declare function readTokens(input: string, callback: TokenCallback, options?: TokenReaderOptions): void; /** * Returns case-insensitive djb2 hash of a substring. */ export declare function getCaseInsensitiveHashCode(input: string, startIndex: number, endIndex: number): number; /** * Returns case-sensitive djb2 hash of a substring. */ export declare function getCaseSensitiveHashCode(input: string, startIndex: number, endIndex: number): number;