@joplin/fork-htmlparser2
Version:
Fast & forgiving HTML/XML/RSS parser
163 lines • 5.52 kB
TypeScript
/** All the states the tokenizer can be in. */
declare const enum State {
Text = 1,
BeforeTagName = 2,//after <
InTagName = 3,
InSelfClosingTag = 4,
BeforeClosingTagName = 5,
InClosingTagName = 6,
AfterClosingTagName = 7,
BeforeAttributeName = 8,
InAttributeName = 9,
AfterAttributeName = 10,
BeforeAttributeValue = 11,
InAttributeValueDq = 12,// "
InAttributeValueSq = 13,// '
InAttributeValueNq = 14,
BeforeDeclaration = 15,// !
InDeclaration = 16,
InProcessingInstruction = 17,// ?
BeforeComment = 18,
InComment = 19,
InSpecialComment = 20,
AfterComment1 = 21,
AfterComment2 = 22,
BeforeCdata1 = 23,// [
BeforeCdata2 = 24,// C
BeforeCdata3 = 25,// D
BeforeCdata4 = 26,// A
BeforeCdata5 = 27,// T
BeforeCdata6 = 28,// A
InCdata = 29,// [
AfterCdata1 = 30,// ]
AfterCdata2 = 31,// ]
BeforeSpecial = 32,//S
BeforeSpecialEnd = 33,//S
BeforeScript1 = 34,//C
BeforeScript2 = 35,//R
BeforeScript3 = 36,//I
BeforeScript4 = 37,//P
BeforeScript5 = 38,//T
AfterScript1 = 39,//C
AfterScript2 = 40,//R
AfterScript3 = 41,//I
AfterScript4 = 42,//P
AfterScript5 = 43,//T
BeforeStyle1 = 44,//T
BeforeStyle2 = 45,//Y
BeforeStyle3 = 46,//L
BeforeStyle4 = 47,//E
AfterStyle1 = 48,//T
AfterStyle2 = 49,//Y
AfterStyle3 = 50,//L
AfterStyle4 = 51,//E
BeforeEntity = 52,//&
BeforeNumericEntity = 53,//#
InNamedEntity = 54,
InNumericEntity = 55,
InHexEntity = 56
}
declare const enum Special {
None = 1,
Script = 2,
Style = 3
}
interface Callbacks {
onattribdata(value: string): void;
onattribend(): void;
onattribname(name: string): void;
oncdata(data: string): void;
onclosetag(name: string): void;
oncomment(data: string): void;
ondeclaration(content: string): void;
onend(): void;
onerror(error: Error, state?: State): void;
onopentagend(): void;
onopentagname(name: string): void;
onprocessinginstruction(instruction: string): void;
onselfclosingtag(): void;
ontext(value: string): void;
}
export default class Tokenizer {
/** The current state the tokenizer is in. */
_state: State;
/** The read buffer. */
_buffer: string;
/** The beginning of the section that is currently being read. */
_sectionStart: number;
/** The index within the buffer that we are currently looking at. */
_index: number;
/**
* Data that has already been processed will be removed from the buffer occasionally.
* `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate.
*/
_bufferOffset: number;
/** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */
_baseState: State;
/** For special parsing behavior inside of script and style tags. */
_special: Special;
/** Indicates whether the tokenizer has been paused. */
_running: boolean;
/** Indicates whether the tokenizer has finished running / `.end` has been called. */
_ended: boolean;
_cbs: Callbacks;
_xmlMode: boolean;
_decodeEntities: boolean;
constructor(options: {
xmlMode?: boolean;
decodeEntities?: boolean;
} | null, cbs: Callbacks);
reset(): void;
_stateText(c: string): void;
_stateBeforeTagName(c: string): void;
_stateInTagName(c: string): void;
_stateBeforeClosingTagName(c: string): void;
_stateInClosingTagName(c: string): void;
_stateAfterClosingTagName(c: string): void;
_stateBeforeAttributeName(c: string): void;
_stateInSelfClosingTag(c: string): void;
_stateInAttributeName(c: string): void;
_stateAfterAttributeName(c: string): void;
_stateBeforeAttributeValue(c: string): void;
_stateInAttributeValueDoubleQuotes(c: string): void;
_stateInAttributeValueSingleQuotes(c: string): void;
_stateInAttributeValueNoQuotes(c: string): void;
_stateBeforeDeclaration(c: string): void;
_stateInDeclaration(c: string): void;
_stateInProcessingInstruction(c: string): void;
_stateBeforeComment(c: string): void;
_stateInComment(c: string): void;
_stateInSpecialComment(c: string): void;
_stateAfterComment1(c: string): void;
_stateAfterComment2(c: string): void;
_stateBeforeCdata6(c: string): void;
_stateInCdata(c: string): void;
_stateAfterCdata1(c: string): void;
_stateAfterCdata2(c: string): void;
_stateBeforeSpecial(c: string): void;
_stateBeforeSpecialEnd(c: string): void;
_stateBeforeScript5(c: string): void;
_stateAfterScript5(c: string): void;
_stateBeforeStyle4(c: string): void;
_stateAfterStyle4(c: string): void;
_parseNamedEntityStrict(): void;
_parseLegacyEntity(): void;
_stateInNamedEntity(c: string): void;
_decodeNumericEntity(offset: number, base: number): void;
_stateInNumericEntity(c: string): void;
_stateInHexEntity(c: string): void;
_cleanup(): void;
write(chunk: string): void;
_parse(): void;
pause(): void;
resume(): void;
end(chunk?: string): void;
_finish(): void;
_handleTrailingData(): void;
getAbsoluteIndex(): number;
_getSection(): string;
_emitToken(name: "onopentagname" | "onclosetag" | "onattribdata"): void;
_emitPartial(value: string): void;
}
export {};
//# sourceMappingURL=Tokenizer.d.ts.map