UNPKG

depends-txt

Version:
68 lines 2.27 kB
export const Token = { Word: 'Word', WhiteSpace: 'WhiteSpace', Comment: 'Comment', NewLine: 'NewLine', }; export function* tokenize(input) { yield* new Tokenizer().tokenize(input); } /** @see {@link https://unicode.org/reports/tr18/#Line_Boundaries} */ const NEWLINE = /([\n\v\f\r\u{0085}\p{Zl}\p{Zp}\q{\r\n}])/v; // eslint-disable-next-line no-control-regex const WHITESPACE = /([\s\u{001C}-\u{001F}]+)/v; const COMMENTER = '#'; class Tokenizer { #location = { line: 0, column: 0, offset: 0 }; *tokenize(input) { this.#location.line = 0; this.#location.offset = 0; for (const [line, newline] of pairs(input.split(NEWLINE))) { this.#location.line += 1; this.#location.column = 1; yield* this.#scanLine(line); if (newline !== undefined) { yield this.#token(Token.NewLine, newline); } } } *#scanLine(input) { if (input.length > 0) { const commentStart = input.indexOf(COMMENTER); if (commentStart < 0) { yield* this.#scanWords(input); } else { yield* this.#scanWords(input.slice(0, commentStart)); yield this.#token(Token.Comment, input.slice(commentStart)); } } } *#scanWords(input) { if (input.length > 0) { for (const [word, ws] of pairs(input.split(WHITESPACE))) { if (word.length > 0) { yield this.#token(Token.Word, word); } if (ws !== undefined) { yield this.#token(Token.WhiteSpace, ws); } } } } #token(type, value) { const start = { ...this.#location }; this.#location.column += value.length; this.#location.offset += value.length; const end = { ...this.#location }; return { type, value, position: { start, end } }; } } /** @remarks This function modifies the argument directly. */ // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types function* pairs(arr) { while (arr.length > 0) { yield arr.splice(0, 2); } } //# sourceMappingURL=tokenize.js.map