cbon
Version:
Common Bracket Object Notation
408 lines (407 loc) • 13.5 kB
JavaScript
import { State, Context, ReDo } from "./state_machine";
import { TEOF, TStr, TWord, makeTSymbol, TBlockComment, TLineComment } from "./token";
import { TkRange } from "./pos";
import { _continue, _break } from "./loop";
import { next_micro_tick } from "./utils";
import { AlwaysFalse } from "./canceller";
const EOF = Symbol('EOF');
const reg_Space = /\s/;
export function tokenizer(code, config = { show_all_err: false, iterable: false, async: false, cancel: AlwaysFalse }) {
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k;
const cancel = (_b = (_a = config) === null || _a === void 0 ? void 0 : _a.cancel, (_b !== null && _b !== void 0 ? _b : AlwaysFalse));
const state = new State((_d = (_c = config) === null || _c === void 0 ? void 0 : _c.show_all_err, (_d !== null && _d !== void 0 ? _d : false)));
const tokens = [];
state.push(root(new Context(state), t => {
tokens.push(t);
}));
let last = null;
let finish = false;
const iter = code[Symbol.iterator]();
function main() {
if (state.queue.length != 0) {
state.queue.pop()();
return _continue;
}
let c;
if (finish)
c = EOF;
else {
const r = iter.next();
if (r.done === true) {
finish = true;
return _continue;
}
else
c = r.value;
}
state.call(c);
if (finish)
return _break;
state.count++;
if (c === '\n') {
if (last !== '\r') {
state.lines[state.line] = state.char;
state.line++;
state.char = 0;
}
last = null;
}
else if (c === '\r') {
state.lines[state.line] = state.char;
state.line++;
state.char = 0;
last = '\r';
}
else {
state.char++;
last = null;
}
}
const loop = (_f = (_e = config) === null || _e === void 0 ? void 0 : _e.iterable, (_f !== null && _f !== void 0 ? _f : false)) ? (_h = (_g = config) === null || _g === void 0 ? void 0 : _g.async, (_h !== null && _h !== void 0 ? _h : false)) ? async function* () {
let finish = false;
while (true) {
if (await cancel())
break;
if (tokens.length !== 0) {
yield tokens.shift();
}
if (finish)
break;
const s = main();
if (s === _continue) {
await next_micro_tick();
continue;
}
if (s === _break) {
finish = true;
await next_micro_tick();
continue;
}
await next_micro_tick();
}
yield new TEOF(new TkRange(state.pos, state.pos));
if (state.errors.length !== 0)
return state.errors;
} : function* () {
let finish = false;
while (true) {
if (cancel())
break;
if (tokens.length !== 0) {
yield tokens.shift();
}
if (finish)
break;
const s = main();
if (s === _continue)
continue;
if (s === _break) {
finish = true;
continue;
}
}
yield new TEOF(new TkRange(state.pos, state.pos));
if (state.errors.length !== 0)
return state.errors;
} : (_k = (_j = config) === null || _j === void 0 ? void 0 : _j.async, (_k !== null && _k !== void 0 ? _k : false)) ? async function () {
while (true) {
if (await cancel())
break;
const s = main();
if (s === _continue) {
await next_micro_tick();
continue;
}
if (s === _break)
break;
await next_micro_tick();
}
tokens.push(new TEOF(new TkRange(state.pos, state.pos)));
return state.errors.length !== 0 ? { err: state.errors, val: tokens } : { val: tokens };
} : function () {
while (true) {
if (cancel())
break;
const s = main();
if (s === _continue)
continue;
if (s === _break)
break;
}
tokens.push(new TEOF(new TkRange(state.pos, state.pos)));
return state.errors.length !== 0 ? { err: state.errors, val: tokens } : { val: tokens };
};
return loop();
}
function root(ctx, push) {
return (c) => {
if (c === EOF) {
ctx.end();
}
else if (reg_Space.test(c)) {
return;
}
else if (c === ',' || c === ':' || c === '=' || c === '[' || c === ']' || c === '{' || c === '}') {
ctx.flag();
push(makeTSymbol(ctx.range(), c));
}
else if (c === '"' || c === "'") {
return ctx.callNoFirst(str, c, push);
}
else if (c === '/' || c === '#') {
return ctx.callNoFirst(comment, c, push);
}
else {
return ctx.call(word, push);
}
};
}
function str(ctx, first, push) {
const chars = [];
ctx.flag();
return (c) => {
if (c === EOF) {
ctx.error(ctx.range(), 'String is not closed');
ctx.end();
return ReDo;
}
else if (c === '"' || c === "'") {
if (c === first) {
const s = new TStr(ctx.range(), chars.join(''), first);
ctx.end();
push(s);
}
else {
chars.push(c);
}
}
else if (c === '\\') {
return ctx.callNoFirst(escape, c => chars.push(c));
}
else {
chars.push(c);
}
};
}
const hex_digits = /[0-9a-fA-F]/i;
function escape(ctx, push) {
let onUnicode = false;
let chars = [];
let block = false;
ctx.flag();
return (c) => {
if (onUnicode) {
if (c === EOF) {
ctx.error(ctx.range(), 'Unicode escape is not finish');
ctx.end();
return ReDo;
}
else if (hex_digits.test(c)) {
chars.push(c);
if (!block && chars.length === 4) {
ctx.end();
push(String.fromCodePoint(Number(`0x${chars.join('')}`)));
}
}
else if (c === '{') {
if (chars.length !== 0 || block) {
ctx.error(ctx.range(), 'Unicode escape is not finish');
ctx.end();
return ReDo;
}
block = true;
}
else if (c === '}') {
ctx.end();
if (!block) {
ctx.error(ctx.range(), 'Not in Unicode escape block');
return;
}
else if (chars.length === 0 || chars.length > 6) {
ctx.error(ctx.range(), 'Invalid Unicode escape sequence');
return;
}
push(String.fromCodePoint(Number(`0x${chars.join('')}`)));
}
else {
ctx.error(ctx.range(), 'Unicode escape is not finish');
ctx.end();
return ReDo;
}
}
else {
if (c === 'u') {
onUnicode = true;
}
else {
ctx.end();
if (c === EOF) {
return ReDo;
}
else {
push(c === 'n' ? '\n' : c === 'r' ? '\r' : c === 't' ? '\t' : c === '\\' ? '\\' : c === '"' ? '"' : c === "'" ? "'" : c === '0' ? '\0' : c === 'b' ? '\b' : c === 'f' ? '\f' : c === 'v' ? '\v' : c);
}
}
}
};
}
function word(ctx, push) {
const chars = [];
ctx.flag();
return (c) => {
if (c === EOF || reg_Space.test(c) || c === '"' || c === "'" || c === ',' || c === ':' || c === '=' || c === '[' || c === ']' || c === '{' || c === '}') {
push(new TWord(ctx.range(true), chars.join('')));
ctx.end();
return ReDo;
}
else if (c === '/' || c === '#') {
push(new TWord(ctx.range(true), chars.join('')));
ctx.end();
return ctx.callNoFirst(comment, c, push);
}
else {
chars.push(c);
}
};
}
const comment_noerr = comment;
function comment(ctx, first, finish, nocc = false) {
ctx.flag();
const flag = ctx.last_flag;
return (c) => {
ctx.end();
if (first === '/') {
if (c === '/') {
return ctx.callNoFirst(line_comment, first, flag, finish);
}
else if (c === '*') {
return ctx.callNoFirst(block_comment, first, flag, finish);
}
else {
if (nocc) {
finish(c);
return ReDo;
}
else
ctx.error(ctx.range(), 'Line Comment need two /');
return ctx.call(line_comment, first, flag, finish);
}
}
else {
if (c === '*') {
return ctx.callNoFirst(block_comment, first, flag, finish);
}
else {
if (nocc) {
finish(c);
return ReDo;
}
return ctx.call(line_comment, first, flag, finish);
}
}
};
}
function line_comment(ctx, first, flag, finish) {
const chars = [];
const items = [];
ctx.last_flag = flag;
return (c) => {
if (c === EOF || c === '\n' || c === '\r') {
if (chars.length > 0)
items.push(chars.join(''));
ctx.end();
finish(new TLineComment(ctx.range(), items));
return ReDo;
}
else if (c === '/' || c === '#') {
if (chars.length > 0)
items.push(chars.join(''));
chars.length = 0;
return ctx.callNoFirst(comment, c, cm => {
items.push(cm);
});
}
else {
chars.push(c);
}
};
}
function block_comment(ctx, first, flag, finish) {
const chars = [];
const items = [];
let star = false;
ctx.last_flag = flag;
let redo_end = false;
return (c) => {
if (redo_end) {
ctx.end();
return ReDo;
}
if (c === EOF) {
if (star)
chars.push('*');
if (chars.length > 0)
items.push(chars.join(''));
ctx.end();
finish(new TBlockComment(ctx.range(), items));
ctx.flag();
ctx.error(ctx.range(), 'Block Comment is not close');
return ReDo;
}
else if (c === '*') {
if (star)
chars.push('*');
else
star = true;
}
else if (c === '/' || c === '#') {
if (star) {
star = false;
if (c === first) {
return ctx.callNoFirst(comment_noerr, c, w => {
if (w instanceof TLineComment || w instanceof TBlockComment) {
if (chars.length > 0)
items.push(chars.join(''));
chars.length = 0;
items.push(w);
}
else {
if (chars.length > 0)
items.push(chars.join(''));
chars.length = 0;
redo_end = true;
finish(new TBlockComment(ctx.range(), items));
}
}, true);
}
else {
chars.push('*');
chars.push(c);
}
}
else {
return ctx.callNoFirst(comment_noerr, c, w => {
if (w === EOF) {
chars.push(c);
}
else if (w instanceof TLineComment || w instanceof TBlockComment) {
if (chars.length > 0)
items.push(chars.join(''));
chars.length = 0;
items.push(w);
}
else {
chars.push(c);
chars.push(w);
}
}, true);
}
}
else {
if (star)
chars.push('*');
star = false;
chars.push(c);
}
};
}