tag-soup
Version:
The fastest pure JS SAX/DOM XML/HTML parser.
92 lines (91 loc) • 3.62 kB
TypeScript
/**
* The error thrown by a parser if a {@link ParserError.input input} is malformed.
*
* @group Parser
*/
export declare class ParserError extends SyntaxError {
input: string;
startIndex: number;
endIndex: number;
/**
* Creates a new {@link ParserError} instance.
*
* @param message The error message.
* @param input The text where an error was detected.
* @param startIndex The index of the first char in text where an error was detected, inclusive.
* @param endIndex The index of the last char in text where an error was detected, exclusive.
*/
constructor(message: string, input: string, startIndex?: number, endIndex?: number);
}
/**
* A token that can be read from a text.
*
* @group Tokenizer
*/
export type Token = 'TEXT' | 'START_TAG_NAME' | 'START_TAG_CLOSING' | 'START_TAG_SELF_CLOSING' | 'END_TAG_NAME' | 'ATTRIBUTE_NAME' | 'ATTRIBUTE_VALUE' | 'COMMENT' | 'PROCESSING_INSTRUCTION_TARGET' | 'PROCESSING_INSTRUCTION_DATA' | 'CDATA_SECTION' | 'DOCTYPE_NAME';
/**
* A callback that is invoked when a token is read from a text.
*
* @param token The token that was read.
* @param startIndex The start index of the first meaningful token char, inclusive.
* @param endIndex The end index of the last meaningful token char, exclusive.
* @group Tokenizer
*/
export type TokenCallback = (token: Token, startIndex: number, endIndex: number) => void;
/**
* Options of {@link tokenizeMarkup}.
*/
export interface ResolvedTokenizerOptions extends TokenReaderOptions {
voidTags?: Set<number>;
implicitlyClosedTags?: Map<number, Set<number>>;
implicitlyOpenedTags?: Set<number>;
areUnbalancedStartTagsImplicitlyClosed?: boolean;
areUnbalancedEndTagsIgnored?: boolean;
}
/**
* Reads tokens from text and returns them by invoking a callback.
*
* Tokens are _guaranteed_ to be returned in correct order. Missing tokens are inserted to restore the correct order if
* needed.
*
* @example
* tokenizeMarkup(
* 'Hello, <b>Bob</b>!',
* (token, startIndex, endIndex) => {
* // Handle token here
* },
* resolveTokenizerOptions(htmlTokenizerOptions)
* );
*
* @param input The text to read tokens from.
* @param callback The callback that is invoked when a token is read.
* @param options Tokenizer options prepared by {@link resolveTokenizerOptions}.
*/
export declare function tokenizeMarkup(input: string, callback: TokenCallback, options?: ResolvedTokenizerOptions): void;
export interface ContextualTokenReaderOptions {
parentOptions?: ContextualTokenReaderOptions;
foreignTags?: Map<number, ContextualTokenReaderOptions>;
areSelfClosingTagsRecognized?: boolean;
areCDATASectionsRecognized?: boolean;
areProcessingInstructionsRecognized?: boolean;
}
export interface TokenReaderOptions extends ContextualTokenReaderOptions {
readTag?: (input: string, startIndex: number, endIndex: number) => number;
rawTextTags?: Set<number>;
isFragment?: boolean;
isStrict?: boolean;
}
/**
* Reads tokens from the text and returns tokens by invoking a callback.
*
* Tokens returned in the same order they are listed in text.
*/
export declare function readTokens(input: string, callback: TokenCallback, options?: TokenReaderOptions): void;
/**
* Returns case-insensitive djb2 hash of a substring.
*/
export declare function getCaseInsensitiveHashCode(input: string, startIndex: number, endIndex: number): number;
/**
* Returns case-sensitive djb2 hash of a substring.
*/
export declare function getCaseSensitiveHashCode(input: string, startIndex: number, endIndex: number): number;