UNPKG

cbon

Version:
408 lines (407 loc) 13.5 kB
import { State, Context, ReDo } from "./state_machine"; import { TEOF, TStr, TWord, makeTSymbol, TBlockComment, TLineComment } from "./token"; import { TkRange } from "./pos"; import { _continue, _break } from "./loop"; import { next_micro_tick } from "./utils"; import { AlwaysFalse } from "./canceller"; const EOF = Symbol('EOF'); const reg_Space = /\s/; export function tokenizer(code, config = { show_all_err: false, iterable: false, async: false, cancel: AlwaysFalse }) { var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k; const cancel = (_b = (_a = config) === null || _a === void 0 ? void 0 : _a.cancel, (_b !== null && _b !== void 0 ? _b : AlwaysFalse)); const state = new State((_d = (_c = config) === null || _c === void 0 ? void 0 : _c.show_all_err, (_d !== null && _d !== void 0 ? _d : false))); const tokens = []; state.push(root(new Context(state), t => { tokens.push(t); })); let last = null; let finish = false; const iter = code[Symbol.iterator](); function main() { if (state.queue.length != 0) { state.queue.pop()(); return _continue; } let c; if (finish) c = EOF; else { const r = iter.next(); if (r.done === true) { finish = true; return _continue; } else c = r.value; } state.call(c); if (finish) return _break; state.count++; if (c === '\n') { if (last !== '\r') { state.lines[state.line] = state.char; state.line++; state.char = 0; } last = null; } else if (c === '\r') { state.lines[state.line] = state.char; state.line++; state.char = 0; last = '\r'; } else { state.char++; last = null; } } const loop = (_f = (_e = config) === null || _e === void 0 ? void 0 : _e.iterable, (_f !== null && _f !== void 0 ? _f : false)) ? (_h = (_g = config) === null || _g === void 0 ? void 0 : _g.async, (_h !== null && _h !== void 0 ? _h : false)) ? async function* () { let finish = false; while (true) { if (await cancel()) break; if (tokens.length !== 0) { yield tokens.shift(); } if (finish) break; const s = main(); if (s === _continue) { await next_micro_tick(); continue; } if (s === _break) { finish = true; await next_micro_tick(); continue; } await next_micro_tick(); } yield new TEOF(new TkRange(state.pos, state.pos)); if (state.errors.length !== 0) return state.errors; } : function* () { let finish = false; while (true) { if (cancel()) break; if (tokens.length !== 0) { yield tokens.shift(); } if (finish) break; const s = main(); if (s === _continue) continue; if (s === _break) { finish = true; continue; } } yield new TEOF(new TkRange(state.pos, state.pos)); if (state.errors.length !== 0) return state.errors; } : (_k = (_j = config) === null || _j === void 0 ? void 0 : _j.async, (_k !== null && _k !== void 0 ? _k : false)) ? async function () { while (true) { if (await cancel()) break; const s = main(); if (s === _continue) { await next_micro_tick(); continue; } if (s === _break) break; await next_micro_tick(); } tokens.push(new TEOF(new TkRange(state.pos, state.pos))); return state.errors.length !== 0 ? { err: state.errors, val: tokens } : { val: tokens }; } : function () { while (true) { if (cancel()) break; const s = main(); if (s === _continue) continue; if (s === _break) break; } tokens.push(new TEOF(new TkRange(state.pos, state.pos))); return state.errors.length !== 0 ? { err: state.errors, val: tokens } : { val: tokens }; }; return loop(); } function root(ctx, push) { return (c) => { if (c === EOF) { ctx.end(); } else if (reg_Space.test(c)) { return; } else if (c === ',' || c === ':' || c === '=' || c === '[' || c === ']' || c === '{' || c === '}') { ctx.flag(); push(makeTSymbol(ctx.range(), c)); } else if (c === '"' || c === "'") { return ctx.callNoFirst(str, c, push); } else if (c === '/' || c === '#') { return ctx.callNoFirst(comment, c, push); } else { return ctx.call(word, push); } }; } function str(ctx, first, push) { const chars = []; ctx.flag(); return (c) => { if (c === EOF) { ctx.error(ctx.range(), 'String is not closed'); ctx.end(); return ReDo; } else if (c === '"' || c === "'") { if (c === first) { const s = new TStr(ctx.range(), chars.join(''), first); ctx.end(); push(s); } else { chars.push(c); } } else if (c === '\\') { return ctx.callNoFirst(escape, c => chars.push(c)); } else { chars.push(c); } }; } const hex_digits = /[0-9a-fA-F]/i; function escape(ctx, push) { let onUnicode = false; let chars = []; let block = false; ctx.flag(); return (c) => { if (onUnicode) { if (c === EOF) { ctx.error(ctx.range(), 'Unicode escape is not finish'); ctx.end(); return ReDo; } else if (hex_digits.test(c)) { chars.push(c); if (!block && chars.length === 4) { ctx.end(); push(String.fromCodePoint(Number(`0x${chars.join('')}`))); } } else if (c === '{') { if (chars.length !== 0 || block) { ctx.error(ctx.range(), 'Unicode escape is not finish'); ctx.end(); return ReDo; } block = true; } else if (c === '}') { ctx.end(); if (!block) { ctx.error(ctx.range(), 'Not in Unicode escape block'); return; } else if (chars.length === 0 || chars.length > 6) { ctx.error(ctx.range(), 'Invalid Unicode escape sequence'); return; } push(String.fromCodePoint(Number(`0x${chars.join('')}`))); } else { ctx.error(ctx.range(), 'Unicode escape is not finish'); ctx.end(); return ReDo; } } else { if (c === 'u') { onUnicode = true; } else { ctx.end(); if (c === EOF) { return ReDo; } else { push(c === 'n' ? '\n' : c === 'r' ? '\r' : c === 't' ? '\t' : c === '\\' ? '\\' : c === '"' ? '"' : c === "'" ? "'" : c === '0' ? '\0' : c === 'b' ? '\b' : c === 'f' ? '\f' : c === 'v' ? '\v' : c); } } } }; } function word(ctx, push) { const chars = []; ctx.flag(); return (c) => { if (c === EOF || reg_Space.test(c) || c === '"' || c === "'" || c === ',' || c === ':' || c === '=' || c === '[' || c === ']' || c === '{' || c === '}') { push(new TWord(ctx.range(true), chars.join(''))); ctx.end(); return ReDo; } else if (c === '/' || c === '#') { push(new TWord(ctx.range(true), chars.join(''))); ctx.end(); return ctx.callNoFirst(comment, c, push); } else { chars.push(c); } }; } const comment_noerr = comment; function comment(ctx, first, finish, nocc = false) { ctx.flag(); const flag = ctx.last_flag; return (c) => { ctx.end(); if (first === '/') { if (c === '/') { return ctx.callNoFirst(line_comment, first, flag, finish); } else if (c === '*') { return ctx.callNoFirst(block_comment, first, flag, finish); } else { if (nocc) { finish(c); return ReDo; } else ctx.error(ctx.range(), 'Line Comment need two /'); return ctx.call(line_comment, first, flag, finish); } } else { if (c === '*') { return ctx.callNoFirst(block_comment, first, flag, finish); } else { if (nocc) { finish(c); return ReDo; } return ctx.call(line_comment, first, flag, finish); } } }; } function line_comment(ctx, first, flag, finish) { const chars = []; const items = []; ctx.last_flag = flag; return (c) => { if (c === EOF || c === '\n' || c === '\r') { if (chars.length > 0) items.push(chars.join('')); ctx.end(); finish(new TLineComment(ctx.range(), items)); return ReDo; } else if (c === '/' || c === '#') { if (chars.length > 0) items.push(chars.join('')); chars.length = 0; return ctx.callNoFirst(comment, c, cm => { items.push(cm); }); } else { chars.push(c); } }; } function block_comment(ctx, first, flag, finish) { const chars = []; const items = []; let star = false; ctx.last_flag = flag; let redo_end = false; return (c) => { if (redo_end) { ctx.end(); return ReDo; } if (c === EOF) { if (star) chars.push('*'); if (chars.length > 0) items.push(chars.join('')); ctx.end(); finish(new TBlockComment(ctx.range(), items)); ctx.flag(); ctx.error(ctx.range(), 'Block Comment is not close'); return ReDo; } else if (c === '*') { if (star) chars.push('*'); else star = true; } else if (c === '/' || c === '#') { if (star) { star = false; if (c === first) { return ctx.callNoFirst(comment_noerr, c, w => { if (w instanceof TLineComment || w instanceof TBlockComment) { if (chars.length > 0) items.push(chars.join('')); chars.length = 0; items.push(w); } else { if (chars.length > 0) items.push(chars.join('')); chars.length = 0; redo_end = true; finish(new TBlockComment(ctx.range(), items)); } }, true); } else { chars.push('*'); chars.push(c); } } else { return ctx.callNoFirst(comment_noerr, c, w => { if (w === EOF) { chars.push(c); } else if (w instanceof TLineComment || w instanceof TBlockComment) { if (chars.length > 0) items.push(chars.join('')); chars.length = 0; items.push(w); } else { chars.push(c); chars.push(w); } }, true); } } else { if (star) chars.push('*'); star = false; chars.push(c); } }; }