UNPKG

cbon

Version:
411 lines (410 loc) 14 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); const state_machine_1 = require("./state_machine"); const token_1 = require("./token"); const pos_1 = require("./pos"); const loop_1 = require("./loop"); const utils_1 = require("./utils"); const canceller_1 = require("./canceller"); const EOF = Symbol('EOF'); const reg_Space = /\s/; function tokenizer(code, config = { show_all_err: false, iterable: false, async: false, cancel: canceller_1.AlwaysFalse }) { var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k; const cancel = (_b = (_a = config) === null || _a === void 0 ? void 0 : _a.cancel, (_b !== null && _b !== void 0 ? _b : canceller_1.AlwaysFalse)); const state = new state_machine_1.State((_d = (_c = config) === null || _c === void 0 ? void 0 : _c.show_all_err, (_d !== null && _d !== void 0 ? _d : false))); const tokens = []; state.push(root(new state_machine_1.Context(state), t => { tokens.push(t); })); let last = null; let finish = false; const iter = code[Symbol.iterator](); function main() { if (state.queue.length != 0) { state.queue.pop()(); return loop_1._continue; } let c; if (finish) c = EOF; else { const r = iter.next(); if (r.done === true) { finish = true; return loop_1._continue; } else c = r.value; } state.call(c); if (finish) return loop_1._break; state.count++; if (c === '\n') { if (last !== '\r') { state.lines[state.line] = state.char; state.line++; state.char = 0; } last = null; } else if (c === '\r') { state.lines[state.line] = state.char; state.line++; state.char = 0; last = '\r'; } else { state.char++; last = null; } } const loop = (_f = (_e = config) === null || _e === void 0 ? void 0 : _e.iterable, (_f !== null && _f !== void 0 ? _f : false)) ? (_h = (_g = config) === null || _g === void 0 ? void 0 : _g.async, (_h !== null && _h !== void 0 ? _h : false)) ? async function* () { let finish = false; while (true) { if (await cancel()) break; if (tokens.length !== 0) { yield tokens.shift(); } if (finish) break; const s = main(); if (s === loop_1._continue) { await utils_1.next_micro_tick(); continue; } if (s === loop_1._break) { finish = true; await utils_1.next_micro_tick(); continue; } await utils_1.next_micro_tick(); } yield new token_1.TEOF(new pos_1.TkRange(state.pos, state.pos)); if (state.errors.length !== 0) return state.errors; } : function* () { let finish = false; while (true) { if (cancel()) break; if (tokens.length !== 0) { yield tokens.shift(); } if (finish) break; const s = main(); if (s === loop_1._continue) continue; if (s === loop_1._break) { finish = true; continue; } } yield new token_1.TEOF(new pos_1.TkRange(state.pos, state.pos)); if (state.errors.length !== 0) return state.errors; } : (_k = (_j = config) === null || _j === void 0 ? void 0 : _j.async, (_k !== null && _k !== void 0 ? _k : false)) ? async function () { while (true) { if (await cancel()) break; const s = main(); if (s === loop_1._continue) { await utils_1.next_micro_tick(); continue; } if (s === loop_1._break) break; await utils_1.next_micro_tick(); } tokens.push(new token_1.TEOF(new pos_1.TkRange(state.pos, state.pos))); return state.errors.length !== 0 ? { err: state.errors, val: tokens } : { val: tokens }; } : function () { while (true) { if (cancel()) break; const s = main(); if (s === loop_1._continue) continue; if (s === loop_1._break) break; } tokens.push(new token_1.TEOF(new pos_1.TkRange(state.pos, state.pos))); return state.errors.length !== 0 ? { err: state.errors, val: tokens } : { val: tokens }; }; return loop(); } exports.tokenizer = tokenizer; function root(ctx, push) { return (c) => { if (c === EOF) { ctx.end(); } else if (reg_Space.test(c)) { return; } else if (c === ',' || c === ':' || c === '=' || c === '[' || c === ']' || c === '{' || c === '}') { ctx.flag(); push(token_1.makeTSymbol(ctx.range(), c)); } else if (c === '"' || c === "'") { return ctx.callNoFirst(str, c, push); } else if (c === '/' || c === '#') { return ctx.callNoFirst(comment, c, push); } else { return ctx.call(word, push); } }; } function str(ctx, first, push) { const chars = []; ctx.flag(); return (c) => { if (c === EOF) { ctx.error(ctx.range(), 'String is not closed'); ctx.end(); return state_machine_1.ReDo; } else if (c === '"' || c === "'") { if (c === first) { const s = new token_1.TStr(ctx.range(), chars.join(''), first); ctx.end(); push(s); } else { chars.push(c); } } else if (c === '\\') { return ctx.callNoFirst(escape, c => chars.push(c)); } else { chars.push(c); } }; } const hex_digits = /[0-9a-fA-F]/i; function escape(ctx, push) { let onUnicode = false; let chars = []; let block = false; ctx.flag(); return (c) => { if (onUnicode) { if (c === EOF) { ctx.error(ctx.range(), 'Unicode escape is not finish'); ctx.end(); return state_machine_1.ReDo; } else if (hex_digits.test(c)) { chars.push(c); if (!block && chars.length === 4) { ctx.end(); push(String.fromCodePoint(Number(`0x${chars.join('')}`))); } } else if (c === '{') { if (chars.length !== 0 || block) { ctx.error(ctx.range(), 'Unicode escape is not finish'); ctx.end(); return state_machine_1.ReDo; } block = true; } else if (c === '}') { ctx.end(); if (!block) { ctx.error(ctx.range(), 'Not in Unicode escape block'); return; } else if (chars.length === 0 || chars.length > 6) { ctx.error(ctx.range(), 'Invalid Unicode escape sequence'); return; } push(String.fromCodePoint(Number(`0x${chars.join('')}`))); } else { ctx.error(ctx.range(), 'Unicode escape is not finish'); ctx.end(); return state_machine_1.ReDo; } } else { if (c === 'u') { onUnicode = true; } else { ctx.end(); if (c === EOF) { return state_machine_1.ReDo; } else { push(c === 'n' ? '\n' : c === 'r' ? '\r' : c === 't' ? '\t' : c === '\\' ? '\\' : c === '"' ? '"' : c === "'" ? "'" : c === '0' ? '\0' : c === 'b' ? '\b' : c === 'f' ? '\f' : c === 'v' ? '\v' : c); } } } }; } function word(ctx, push) { const chars = []; ctx.flag(); return (c) => { if (c === EOF || reg_Space.test(c) || c === '"' || c === "'" || c === ',' || c === ':' || c === '=' || c === '[' || c === ']' || c === '{' || c === '}') { push(new token_1.TWord(ctx.range(true), chars.join(''))); ctx.end(); return state_machine_1.ReDo; } else if (c === '/' || c === '#') { push(new token_1.TWord(ctx.range(true), chars.join(''))); ctx.end(); return ctx.callNoFirst(comment, c, push); } else { chars.push(c); } }; } const comment_noerr = comment; function comment(ctx, first, finish, nocc = false) { ctx.flag(); const flag = ctx.last_flag; return (c) => { ctx.end(); if (first === '/') { if (c === '/') { return ctx.callNoFirst(line_comment, first, flag, finish); } else if (c === '*') { return ctx.callNoFirst(block_comment, first, flag, finish); } else { if (nocc) { finish(c); return state_machine_1.ReDo; } else ctx.error(ctx.range(), 'Line Comment need two /'); return ctx.call(line_comment, first, flag, finish); } } else { if (c === '*') { return ctx.callNoFirst(block_comment, first, flag, finish); } else { if (nocc) { finish(c); return state_machine_1.ReDo; } return ctx.call(line_comment, first, flag, finish); } } }; } function line_comment(ctx, first, flag, finish) { const chars = []; const items = []; ctx.last_flag = flag; return (c) => { if (c === EOF || c === '\n' || c === '\r') { if (chars.length > 0) items.push(chars.join('')); ctx.end(); finish(new token_1.TLineComment(ctx.range(), items)); return state_machine_1.ReDo; } else if (c === '/' || c === '#') { if (chars.length > 0) items.push(chars.join('')); chars.length = 0; return ctx.callNoFirst(comment, c, cm => { items.push(cm); }); } else { chars.push(c); } }; } function block_comment(ctx, first, flag, finish) { const chars = []; const items = []; let star = false; ctx.last_flag = flag; let redo_end = false; return (c) => { if (redo_end) { ctx.end(); return state_machine_1.ReDo; } if (c === EOF) { if (star) chars.push('*'); if (chars.length > 0) items.push(chars.join('')); ctx.end(); finish(new token_1.TBlockComment(ctx.range(), items)); ctx.flag(); ctx.error(ctx.range(), 'Block Comment is not close'); return state_machine_1.ReDo; } else if (c === '*') { if (star) chars.push('*'); else star = true; } else if (c === '/' || c === '#') { if (star) { star = false; if (c === first) { return ctx.callNoFirst(comment_noerr, c, w => { if (w instanceof token_1.TLineComment || w instanceof token_1.TBlockComment) { if (chars.length > 0) items.push(chars.join('')); chars.length = 0; items.push(w); } else { if (chars.length > 0) items.push(chars.join('')); chars.length = 0; redo_end = true; finish(new token_1.TBlockComment(ctx.range(), items)); } }, true); } else { chars.push('*'); chars.push(c); } } else { return ctx.callNoFirst(comment_noerr, c, w => { if (w === EOF) { chars.push(c); } else if (w instanceof token_1.TLineComment || w instanceof token_1.TBlockComment) { if (chars.length > 0) items.push(chars.join('')); chars.length = 0; items.push(w); } else { chars.push(c); chars.push(w); } }, true); } } else { if (star) chars.push('*'); star = false; chars.push(c); } }; }