@nfrasser/simple-html-tokenizer
Version:
Simple HTML Tokenizer is a lightweight JavaScript library that can be used to tokenize the kind of HTML normally found in templates.
228 lines (188 loc) • 4.53 kB
text/typescript
import EventedTokenizer from './evented-tokenizer';
import {
Attribute,
EntityParser,
Token,
TokenizerDelegate,
TokenMap,
TokenType,
TokenizerOptions
} from './types';
export default class Tokenizer implements TokenizerDelegate {
private token: Token | null = null;
private startLine = 1;
private startColumn = 0;
private tokenizer: EventedTokenizer;
private tokens: Token[] = [];
private _currentAttribute?: Attribute;
constructor(
entityParser: EntityParser,
private options: TokenizerOptions = {}
) {
this.tokenizer = new EventedTokenizer(this, entityParser, options.mode);
this._currentAttribute = undefined;
}
tokenize(input: string) {
this.tokens = [];
this.tokenizer.tokenize(input);
return this.tokens;
}
tokenizePart(input: string) {
this.tokens = [];
this.tokenizer.tokenizePart(input);
return this.tokens;
}
tokenizeEOF() {
this.tokens = [];
this.tokenizer.tokenizeEOF();
return this.tokens[0];
}
reset() {
this.token = null;
this.startLine = 1;
this.startColumn = 0;
}
current<T extends TokenType, U extends TokenType>(
type1: T,
type2: U
): TokenMap[T] | TokenMap[U];
current<T extends TokenType>(type: T): TokenMap[T];
current(): Token;
current(): Token {
const token = this.token;
if (token === null) {
throw new Error('token was unexpectedly null');
}
if (arguments.length === 0) {
return token;
}
for (let i = 0; i < arguments.length; i++) {
if (token.type === arguments[i]) {
return token;
}
}
throw new Error(`token type was unexpectedly ${token.type}`);
}
push(token: Token) {
this.token = token;
this.tokens.push(token);
}
currentAttribute() {
return this._currentAttribute;
}
addLocInfo() {
if (this.options.loc) {
this.current().loc = {
start: {
line: this.startLine,
column: this.startColumn
},
end: {
line: this.tokenizer.line,
column: this.tokenizer.column
}
};
}
this.startLine = this.tokenizer.line;
this.startColumn = this.tokenizer.column;
}
// Data
beginDoctype() {
this.push({
type: TokenType.Doctype,
name: '',
});
}
appendToDoctypeName(char: string) {
this.current(TokenType.Doctype).name += char;
}
appendToDoctypePublicIdentifier(char: string) {
let doctype = this.current(TokenType.Doctype);
if (doctype.publicIdentifier === undefined) {
doctype.publicIdentifier = char;
} else {
doctype.publicIdentifier += char;
}
}
appendToDoctypeSystemIdentifier(char: string) {
let doctype = this.current(TokenType.Doctype);
if (doctype.systemIdentifier === undefined) {
doctype.systemIdentifier = char;
} else {
doctype.systemIdentifier += char;
}
}
endDoctype() {
this.addLocInfo();
}
beginData() {
this.push({
type: TokenType.Chars,
chars: ''
});
}
appendToData(char: string) {
this.current(TokenType.Chars).chars += char;
}
finishData() {
this.addLocInfo();
}
// Comment
beginComment() {
this.push({
type: TokenType.Comment,
chars: ''
});
}
appendToCommentData(char: string) {
this.current(TokenType.Comment).chars += char;
}
finishComment() {
this.addLocInfo();
}
// Tags - basic
tagOpen() {}
beginStartTag() {
this.push({
type: TokenType.StartTag,
tagName: '',
attributes: [],
selfClosing: false
});
}
beginEndTag() {
this.push({
type: TokenType.EndTag,
tagName: ''
});
}
finishTag() {
this.addLocInfo();
}
markTagAsSelfClosing() {
this.current(TokenType.StartTag).selfClosing = true;
}
// Tags - name
appendToTagName(char: string) {
this.current(TokenType.StartTag, TokenType.EndTag).tagName += char;
}
// Tags - attributes
beginAttribute() {
this._currentAttribute = ['', '', false];
}
appendToAttributeName(char: string) {
this.currentAttribute()![0] += char;
}
beginAttributeValue(isQuoted: boolean) {
this.currentAttribute()![2] = isQuoted;
}
appendToAttributeValue(char: string) {
this.currentAttribute()![1] += char;
}
finishAttributeValue() {
this.current(TokenType.StartTag).attributes.push(this._currentAttribute!);
}
reportSyntaxError(message: string) {
this.current().syntaxError = message;
}
}