UNPKG

@nfrasser/simple-html-tokenizer

Version:

Simple HTML Tokenizer is a lightweight JavaScript library that can be used to tokenize the kind of HTML normally found in templates.

128 lines 13 kB
import EventedTokenizer from './evented-tokenizer'; ; var Tokenizer = (function () { function Tokenizer(entityParser, options) { if (options === void 0) { options = {}; } this.options = options; this.token = null; this.startLine = 1; this.startColumn = 0; this.tokens = []; this.currentAttribute = null; this.tokenizer = new EventedTokenizer(this, entityParser); } Tokenizer.prototype.tokenize = function (input) { this.tokens = []; this.tokenizer.tokenize(input); return this.tokens; }; Tokenizer.prototype.tokenizePart = function (input) { this.tokens = []; this.tokenizer.tokenizePart(input); return this.tokens; }; Tokenizer.prototype.tokenizeEOF = function () { this.tokens = []; this.tokenizer.tokenizeEOF(); return this.tokens[0]; }; Tokenizer.prototype.reset = function () { this.token = null; this.startLine = 1; this.startColumn = 0; }; Tokenizer.prototype.addLocInfo = function () { if (this.options.loc) { this.token.loc = { start: { line: this.startLine, column: this.startColumn }, end: { line: this.tokenizer.line, column: this.tokenizer.column } }; } this.startLine = this.tokenizer.line; this.startColumn = this.tokenizer.column; }; // Data Tokenizer.prototype.beginData = function () { this.token = { type: 'Chars', chars: '' }; this.tokens.push(this.token); }; Tokenizer.prototype.appendToData = function (char) { this.token.chars += char; }; Tokenizer.prototype.finishData = function () { this.addLocInfo(); }; // Comment Tokenizer.prototype.beginComment = function () { this.token = { type: 'Comment', chars: '' }; this.tokens.push(this.token); }; Tokenizer.prototype.appendToCommentData = function (char) { this.token.chars += char; }; Tokenizer.prototype.finishComment = function () { this.addLocInfo(); }; // Tags - basic Tokenizer.prototype.beginStartTag = function () { this.token = { type: 'StartTag', tagName: '', attributes: [], selfClosing: false }; this.tokens.push(this.token); }; Tokenizer.prototype.beginEndTag = function () { this.token = { type: 'EndTag', tagName: '' }; this.tokens.push(this.token); }; Tokenizer.prototype.finishTag = function () { this.addLocInfo(); }; Tokenizer.prototype.markTagAsSelfClosing = function () { this.token.selfClosing = true; }; // Tags - name Tokenizer.prototype.appendToTagName = function (char) { this.token.tagName += char; }; // Tags - attributes Tokenizer.prototype.beginAttribute = function () { this.currentAttribute = ["", "", null]; this.token.attributes.push(this.currentAttribute); }; Tokenizer.prototype.appendToAttributeName = function (char) { this.currentAttribute[0] += char; }; Tokenizer.prototype.beginAttributeValue = function (isQuoted) { this.currentAttribute[2] = isQuoted; }; Tokenizer.prototype.appendToAttributeValue = function (char) { this.currentAttribute[1] = this.currentAttribute[1] || ""; this.currentAttribute[1] += char; }; Tokenizer.prototype.finishAttributeValue = function () { }; Tokenizer.prototype.reportSyntaxError = function (message) { this.token.syntaxError = message; }; return Tokenizer; }()); export default Tokenizer; //# sourceMappingURL=data:application/json;base64,{"version":3,"file":"tokenizer.js","sourceRoot":"","sources":["tokenizer.ts"],"names":[],"mappings":"AAAA,OAAO,gBAAgB,MAAM,qBAAqB,CAAC;AAIlD,CAAC;AAuBF;IAQE,mBAAY,YAAY,EAAU,OAA8B;QAA9B,wBAAA,EAAA,YAA8B;QAA9B,YAAO,GAAP,OAAO,CAAuB;QAPxD,UAAK,GAAU,IAAI,CAAC;QACpB,cAAS,GAAG,CAAC,CAAC;QACd,gBAAW,GAAG,CAAC,CAAC;QAEhB,WAAM,GAAY,EAAE,CAAC;QACrB,qBAAgB,GAAc,IAAI,CAAC;QAGzC,IAAI,CAAC,SAAS,GAAG,IAAI,gBAAgB,CAAC,IAAI,EAAE,YAAY,CAAC,CAAC;IAC5D,CAAC;IAED,4BAAQ,GAAR,UAAS,KAAK;QACZ,IAAI,CAAC,MAAM,GAAG,EAAE,CAAC;QACjB,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;QAC/B,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC;IACrB,CAAC;IAED,gCAAY,GAAZ,UAAa,KAAK;QAChB,IAAI,CAAC,MAAM,GAAG,EAAE,CAAC;QACjB,IAAI,CAAC,SAAS,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC;QACnC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC;IACrB,CAAC;IAED,+BAAW,GAAX;QACE,IAAI,CAAC,MAAM,GAAG,EAAE,CAAC;QACjB,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IACxB,CAAC;IAED,yBAAK,GAAL;QACE,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;QAClB,IAAI,CAAC,SAAS,GAAG,CAAC,CAAC;QACnB,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC;IACvB,CAAC;IAED,8BAAU,GAAV;QACE,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC;YACrB,IAAI,CAAC,KAAK,CAAC,GAAG,GAAG;gBACf,KAAK,EAAE;oBACL,IAAI,EAAE,IAAI,CAAC,SAAS;oBACpB,MAAM,EAAE,IAAI,CAAC,WAAW;iBACzB;gBACD,GAAG,EAAE;oBACH,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI;oBACzB,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM;iBAC9B;aACF,CAAC;QACJ,CAAC;QACD,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;QACrC,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC;IAC3C,CAAC;IAED,OAAO;IAEP,6BAAS,GAAT;QACE,IAAI,CAAC,KAAK,GAAG;YACX,IAAI,EAAE,OAAO;YACb,KAAK,EAAE,EAAE;SACV,CAAC;QACF,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC/B,CAAC;IAED,gCAAY,GAAZ,UAAa,IAAI;QACf,IAAI,CAAC,KAAK,CAAC,KAAK,IAAI,IAAI,CAAC;IAC3B,CAAC;IAED,8BAAU,GAAV;QACE,IAAI,CAAC,UAAU,EAAE,CAAC;IACpB,CAAC;IAED,UAAU;IAEV,gCAAY,GAAZ;QACE,IAAI,CAAC,KAAK,GAAG;YACX,IAAI,EAAE,SAAS;YACf,KAAK,EAAE,EAAE;SACV,CAAC;QACF,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC/B,CAAC;IAED,uCAAmB,GAAnB,UAAoB,IAAI;QACtB,IAAI,CAAC,KAAK,CAAC,KAAK,IAAI,IAAI,CAAC;IAC3B,CAAC;IAED,iCAAa,GAAb;QACE,IAAI,CAAC,UAAU,EAAE,CAAC;IACpB,CAAC;IAED,eAAe;IAEf,iCAAa,GAAb;QACE,IAAI,CAAC,KAAK,GAAG;YACX,IAAI,EAAE,UAAU;YAChB,OAAO,EAAE,EAAE;YACX,UAAU,EAAE,EAAE;YACd,WAAW,EAAE,KAAK;SACnB,CAAC;QACF,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC/B,CAAC;IAED,+BAAW,GAAX;QACE,IAAI,CAAC,KAAK,GAAG;YACX,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,EAAE;SACZ,CAAC;QACF,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC/B,CAAC;IAED,6BAAS,GAAT;QACE,IAAI,CAAC,UAAU,EAAE,CAAC;IACpB,CAAC;IAED,wCAAoB,GAApB;QACE,IAAI,CAAC,KAAK,CAAC,WAAW,GAAG,IAAI,CAAC;IAChC,CAAC;IAED,cAAc;IAEd,mCAAe,GAAf,UAAgB,IAAI;QAClB,IAAI,CAAC,KAAK,CAAC,OAAO,IAAI,IAAI,CAAC;IAC7B,CAAC;IAED,oBAAoB;IAEpB,kCAAc,GAAd;QACE,IAAI,CAAC,gBAAgB,GAAG,CAAC,EAAE,EAAE,EAAE,EAAE,IAAI,CAAC,CAAC;QACvC,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;IACpD,CAAC;IAED,yCAAqB,GAArB,UAAsB,IAAI;QACxB,IAAI,CAAC,gBAAgB,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;IACnC,CAAC;IAED,uCAAmB,GAAnB,UAAoB,QAAQ;QAC1B,IAAI,CAAC,gBAAgB,CAAC,CAAC,CAAC,GAAG,QAAQ,CAAC;IACtC,CAAC;IAED,0CAAsB,GAAtB,UAAuB,IAAI;QACzB,IAAI,CAAC,gBAAgB,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,gBAAgB,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAC1D,IAAI,CAAC,gBAAgB,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;IACnC,CAAC;IAED,wCAAoB,GAApB;IACA,CAAC;IAED,qCAAiB,GAAjB,UAAkB,OAAe;QAC/B,IAAI,CAAC,KAAK,CAAC,WAAW,GAAG,OAAO,CAAC;IACnC,CAAC;IACH,gBAAC;AAAD,CAAC,AArJD,IAqJC","sourcesContent":["import EventedTokenizer from './evented-tokenizer';\n\nexport interface TokenizerOptions {\n  loc?: any;\n};\n\nexport type Attribute = [string, string, boolean];\n\nexport interface Token {\n  type: string;\n  chars?: string;\n  attributes?: Attribute[];\n  tagName?: string;\n  selfClosing?: boolean;\n  loc?: {\n    start: {\n      line: number;\n      column: number;\n    },\n    end: {\n      line: number;\n      column: number;\n    }\n  };\n  syntaxError?: string;\n}\n\nexport default class Tokenizer {\n  private token: Token = null;\n  private startLine = 1;\n  private startColumn = 0;\n  private tokenizer: EventedTokenizer;\n  private tokens: Token[] = [];\n  private currentAttribute: Attribute = null;\n\n  constructor(entityParser, private options: TokenizerOptions = {}) {\n    this.tokenizer = new EventedTokenizer(this, entityParser);\n  }\n\n  tokenize(input) {\n    this.tokens = [];\n    this.tokenizer.tokenize(input);\n    return this.tokens;\n  }\n\n  tokenizePart(input) {\n    this.tokens = [];\n    this.tokenizer.tokenizePart(input);\n    return this.tokens;\n  }\n\n  tokenizeEOF() {\n    this.tokens = [];\n    this.tokenizer.tokenizeEOF();\n    return this.tokens[0];\n  }\n\n  reset() {\n    this.token = null;\n    this.startLine = 1;\n    this.startColumn = 0;\n  }\n\n  addLocInfo() {\n    if (this.options.loc) {\n      this.token.loc = {\n        start: {\n          line: this.startLine,\n          column: this.startColumn\n        },\n        end: {\n          line: this.tokenizer.line,\n          column: this.tokenizer.column\n        }\n      };\n    }\n    this.startLine = this.tokenizer.line;\n    this.startColumn = this.tokenizer.column;\n  }\n\n  // Data\n\n  beginData() {\n    this.token = {\n      type: 'Chars',\n      chars: ''\n    };\n    this.tokens.push(this.token);\n  }\n\n  appendToData(char) {\n    this.token.chars += char;\n  }\n\n  finishData() {\n    this.addLocInfo();\n  }\n\n  // Comment\n\n  beginComment() {\n    this.token = {\n      type: 'Comment',\n      chars: ''\n    };\n    this.tokens.push(this.token);\n  }\n\n  appendToCommentData(char) {\n    this.token.chars += char;\n  }\n\n  finishComment() {\n    this.addLocInfo();\n  }\n\n  // Tags - basic\n\n  beginStartTag() {\n    this.token = {\n      type: 'StartTag',\n      tagName: '',\n      attributes: [],\n      selfClosing: false\n    };\n    this.tokens.push(this.token);\n  }\n\n  beginEndTag() {\n    this.token = {\n      type: 'EndTag',\n      tagName: ''\n    };\n    this.tokens.push(this.token);\n  }\n\n  finishTag() {\n    this.addLocInfo();\n  }\n\n  markTagAsSelfClosing() {\n    this.token.selfClosing = true;\n  }\n\n  // Tags - name\n\n  appendToTagName(char) {\n    this.token.tagName += char;\n  }\n\n  // Tags - attributes\n\n  beginAttribute() {\n    this.currentAttribute = [\"\", \"\", null];\n    this.token.attributes.push(this.currentAttribute);\n  }\n\n  appendToAttributeName(char) {\n    this.currentAttribute[0] += char;\n  }\n\n  beginAttributeValue(isQuoted) {\n    this.currentAttribute[2] = isQuoted;\n  }\n\n  appendToAttributeValue(char) {\n    this.currentAttribute[1] = this.currentAttribute[1] || \"\";\n    this.currentAttribute[1] += char;\n  }\n\n  finishAttributeValue() {\n  }\n\n  reportSyntaxError(message: string) {\n    this.token.syntaxError = message;\n  }\n}\n"]}