UNPKG

html-tokenizer

Version:

Small, fast, event-driven, fault-tolerant html tokenizer. Works in node or browsers.

93 lines 2.11 kB
import { Entities } from './types'; /** * Options passed to a tokenizer on instantiation. */ export interface TokenizerOptions { entities?: Entities; } /** * A token emitted during a tokenizing run. */ export declare type Token = StartToken | OpeningTagToken | AttributeToken | OpeningTagEndToken | TextToken | CommentToken | ClosingTagToken | DoneToken; /** * Start of tokenizing run. */ export interface StartToken { type: 'start'; } /** * Beginning of opening tag. */ export interface OpeningTagToken { type: 'opening-tag'; name: string; } /** * Attribute. */ export interface AttributeToken { type: 'attribute'; name: string; value: string; } /** * End of opening tag. */ export interface OpeningTagEndToken { type: 'opening-tag-end'; name: string; token: '>' | '/>'; } /** * Text node chunk. */ export interface TextToken { type: 'text'; text: string; } /** * Comment. */ export interface CommentToken { type: 'comment'; text: string; } /** * Closing tag. */ export interface ClosingTagToken { type: 'closing-tag'; name: string; } /** * End of tokenizing run. */ export interface DoneToken { type: 'done'; } /** * A low-level tokenizer utility used by the HTML parser. */ export declare class Tokenizer { private readonly entityMap; /** * Static method to tokenize HTML without instantiating a Tokenizer instance. * @param html HTML string to tokenize. * @param opts Optional tokenizer configuration options. */ static tokenize(html: string, opts?: TokenizerOptions): IterableIterator<Token>; /** * Static factory to create a tokenizer. * @param opts Tokenizer options. */ static from(opts: TokenizerOptions): Tokenizer; private constructor(); /** * Tokenize an HTML string. Returns an iterator, thus allowing * tokens to be consumed via for/of or other iteration mechanisms. * @param html HTML string to tokenize. */ tokenize(html: string): IterableIterator<Token>; private _tokenize; } //# sourceMappingURL=tokenizer.d.ts.map