UNPKG

@joplin/fork-htmlparser2

Version:

Fast & forgiving HTML/XML/RSS parser

163 lines 5.52 kB
/** All the states the tokenizer can be in. */ declare const enum State { Text = 1, BeforeTagName = 2,//after < InTagName = 3, InSelfClosingTag = 4, BeforeClosingTagName = 5, InClosingTagName = 6, AfterClosingTagName = 7, BeforeAttributeName = 8, InAttributeName = 9, AfterAttributeName = 10, BeforeAttributeValue = 11, InAttributeValueDq = 12,// " InAttributeValueSq = 13,// ' InAttributeValueNq = 14, BeforeDeclaration = 15,// ! InDeclaration = 16, InProcessingInstruction = 17,// ? BeforeComment = 18, InComment = 19, InSpecialComment = 20, AfterComment1 = 21, AfterComment2 = 22, BeforeCdata1 = 23,// [ BeforeCdata2 = 24,// C BeforeCdata3 = 25,// D BeforeCdata4 = 26,// A BeforeCdata5 = 27,// T BeforeCdata6 = 28,// A InCdata = 29,// [ AfterCdata1 = 30,// ] AfterCdata2 = 31,// ] BeforeSpecial = 32,//S BeforeSpecialEnd = 33,//S BeforeScript1 = 34,//C BeforeScript2 = 35,//R BeforeScript3 = 36,//I BeforeScript4 = 37,//P BeforeScript5 = 38,//T AfterScript1 = 39,//C AfterScript2 = 40,//R AfterScript3 = 41,//I AfterScript4 = 42,//P AfterScript5 = 43,//T BeforeStyle1 = 44,//T BeforeStyle2 = 45,//Y BeforeStyle3 = 46,//L BeforeStyle4 = 47,//E AfterStyle1 = 48,//T AfterStyle2 = 49,//Y AfterStyle3 = 50,//L AfterStyle4 = 51,//E BeforeEntity = 52,//& BeforeNumericEntity = 53,//# InNamedEntity = 54, InNumericEntity = 55, InHexEntity = 56 } declare const enum Special { None = 1, Script = 2, Style = 3 } interface Callbacks { onattribdata(value: string): void; onattribend(): void; onattribname(name: string): void; oncdata(data: string): void; onclosetag(name: string): void; oncomment(data: string): void; ondeclaration(content: string): void; onend(): void; onerror(error: Error, state?: State): void; onopentagend(): void; onopentagname(name: string): void; onprocessinginstruction(instruction: string): void; onselfclosingtag(): void; ontext(value: string): void; } export default class Tokenizer { /** The current state the tokenizer is in. */ _state: State; /** The read buffer. */ _buffer: string; /** The beginning of the section that is currently being read. */ _sectionStart: number; /** The index within the buffer that we are currently looking at. */ _index: number; /** * Data that has already been processed will be removed from the buffer occasionally. * `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate. */ _bufferOffset: number; /** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */ _baseState: State; /** For special parsing behavior inside of script and style tags. */ _special: Special; /** Indicates whether the tokenizer has been paused. */ _running: boolean; /** Indicates whether the tokenizer has finished running / `.end` has been called. */ _ended: boolean; _cbs: Callbacks; _xmlMode: boolean; _decodeEntities: boolean; constructor(options: { xmlMode?: boolean; decodeEntities?: boolean; } | null, cbs: Callbacks); reset(): void; _stateText(c: string): void; _stateBeforeTagName(c: string): void; _stateInTagName(c: string): void; _stateBeforeClosingTagName(c: string): void; _stateInClosingTagName(c: string): void; _stateAfterClosingTagName(c: string): void; _stateBeforeAttributeName(c: string): void; _stateInSelfClosingTag(c: string): void; _stateInAttributeName(c: string): void; _stateAfterAttributeName(c: string): void; _stateBeforeAttributeValue(c: string): void; _stateInAttributeValueDoubleQuotes(c: string): void; _stateInAttributeValueSingleQuotes(c: string): void; _stateInAttributeValueNoQuotes(c: string): void; _stateBeforeDeclaration(c: string): void; _stateInDeclaration(c: string): void; _stateInProcessingInstruction(c: string): void; _stateBeforeComment(c: string): void; _stateInComment(c: string): void; _stateInSpecialComment(c: string): void; _stateAfterComment1(c: string): void; _stateAfterComment2(c: string): void; _stateBeforeCdata6(c: string): void; _stateInCdata(c: string): void; _stateAfterCdata1(c: string): void; _stateAfterCdata2(c: string): void; _stateBeforeSpecial(c: string): void; _stateBeforeSpecialEnd(c: string): void; _stateBeforeScript5(c: string): void; _stateAfterScript5(c: string): void; _stateBeforeStyle4(c: string): void; _stateAfterStyle4(c: string): void; _parseNamedEntityStrict(): void; _parseLegacyEntity(): void; _stateInNamedEntity(c: string): void; _decodeNumericEntity(offset: number, base: number): void; _stateInNumericEntity(c: string): void; _stateInHexEntity(c: string): void; _cleanup(): void; write(chunk: string): void; _parse(): void; pause(): void; resume(): void; end(chunk?: string): void; _finish(): void; _handleTrailingData(): void; getAbsoluteIndex(): number; _getSection(): string; _emitToken(name: "onopentagname" | "onclosetag" | "onattribdata"): void; _emitPartial(value: string): void; } export {}; //# sourceMappingURL=Tokenizer.d.ts.map