html-tokenizer
Version:
Small, fast, event-driven, fault-tolerant html tokenizer. Works in node or browsers.
93 lines • 2.11 kB
TypeScript
import { Entities } from './types';
/**
* Options passed to a tokenizer on instantiation.
*/
export interface TokenizerOptions {
entities?: Entities;
}
/**
* A token emitted during a tokenizing run.
*/
export declare type Token = StartToken | OpeningTagToken | AttributeToken | OpeningTagEndToken | TextToken | CommentToken | ClosingTagToken | DoneToken;
/**
* Start of tokenizing run.
*/
export interface StartToken {
type: 'start';
}
/**
* Beginning of opening tag.
*/
export interface OpeningTagToken {
type: 'opening-tag';
name: string;
}
/**
* Attribute.
*/
export interface AttributeToken {
type: 'attribute';
name: string;
value: string;
}
/**
* End of opening tag.
*/
export interface OpeningTagEndToken {
type: 'opening-tag-end';
name: string;
token: '>' | '/>';
}
/**
* Text node chunk.
*/
export interface TextToken {
type: 'text';
text: string;
}
/**
* Comment.
*/
export interface CommentToken {
type: 'comment';
text: string;
}
/**
* Closing tag.
*/
export interface ClosingTagToken {
type: 'closing-tag';
name: string;
}
/**
* End of tokenizing run.
*/
export interface DoneToken {
type: 'done';
}
/**
* A low-level tokenizer utility used by the HTML parser.
*/
export declare class Tokenizer {
private readonly entityMap;
/**
* Static method to tokenize HTML without instantiating a Tokenizer instance.
* @param html HTML string to tokenize.
* @param opts Optional tokenizer configuration options.
*/
static tokenize(html: string, opts?: TokenizerOptions): IterableIterator<Token>;
/**
* Static factory to create a tokenizer.
* @param opts Tokenizer options.
*/
static from(opts: TokenizerOptions): Tokenizer;
private constructor();
/**
* Tokenize an HTML string. Returns an iterator, thus allowing
* tokens to be consumed via for/of or other iteration mechanisms.
* @param html HTML string to tokenize.
*/
tokenize(html: string): IterableIterator<Token>;
private _tokenize;
}
//# sourceMappingURL=tokenizer.d.ts.map