UNPKG

@ranchonyx/nanoparse

Version:

A super simple basic parser / tokenizer for easier processing of various configuration files

168 lines (167 loc) 4.72 kB
export function assertNotNull(target, info) { if (target === null || target === undefined) throw new Error(`Assertion failed.${info ? " " + info : ""}`); } export class InputStream { input; END_OF_LINE = "\n"; COMMENT_START = "#"; streamPosition = 0; sourceColumn = 0; sourceLine = 1; constructor(pStreamInput, pCommentStart, pSourceEOL) { this.input = pStreamInput; if (pCommentStart) this.COMMENT_START = pCommentStart; if (pSourceEOL) this.END_OF_LINE = pSourceEOL; } next() { let ch = this.input.charAt(this.streamPosition++); if (ch == this.END_OF_LINE) { this.sourceLine++; this.sourceColumn = 0; } else { this.sourceColumn++; } return ch; } peek() { return this.input.charAt(this.streamPosition); } eof() { return this.peek() == ""; } getPosition() { return Object({ line: this.sourceLine, col: this.sourceColumn, pos: this.streamPosition }); } } export class TokenStream { input; current = null; constructor(streamInput) { this.input = streamInput; } is_whitespace = (ch) => { return " \t\n\r\b".indexOf(ch) >= 0; }; is_ident_start = (ch) => { return /\P{C}/u.test(ch); }; is_ident = (ch) => { return (this.is_ident_start(ch) || "?!-<>()[]{}=0123456789".indexOf(ch) >= 0) && ch !== this.input.COMMENT_START; }; read_while = (predicate) => { let str = ""; while (!this.input.eof() && predicate(this.input.peek())) str += this.input.next(); return str; }; read_ident = () => { const beforeRead = this.input.getPosition(); const ident = this.read_while(this.is_ident); return { type: "identifier", value: ident, position: beforeRead }; }; skip_comment = () => { this.read_while((ch) => ch !== this.input.END_OF_LINE); this.input.next(); }; read_next() { this.read_while(this.is_whitespace); if (this.input.eof()) return null; const ch = this.input.peek(); if (ch === this.input.COMMENT_START) { this.skip_comment(); return this.read_next(); } if (this.is_ident_start(ch)) { return this.read_ident(); } throw new Error(`Can't handle character ${ch} [0x${ch.charCodeAt(0).toString(16).toUpperCase().padStart(2, "0")}]`); } peek() { return this.current || (this.current = this.read_next()); } eof() { return this.peek() === null; } next() { const tok = this.current; this.current = null; return tok || this.read_next(); } } export default class BasicParser { inputStream; tokenStream; tokenTransformer; hasTokenTransformer = false; current = null; constructor(pTokenizerConfig) { this.inputStream = new InputStream(pTokenizerConfig.sourceText, pTokenizerConfig.commentStart, pTokenizerConfig.lineTerminator); this.tokenStream = new TokenStream(this.inputStream); if ("forEachToken" in pTokenizerConfig) { this.tokenTransformer = pTokenizerConfig.forEachToken; this.hasTokenTransformer = true; } } next() { const tok = this.current; this.current = null; return tok || this.tokenStream.next(); } PeekRaw() { return (this.current || (this.current = this.next())); } PeekTransformed() { assertNotNull(this.tokenTransformer); return this.tokenTransformer(this.PeekRaw()); } ParseNext() { const tok = this.next(); assertNotNull(tok); if (this.hasTokenTransformer) { assertNotNull(this.tokenTransformer); return this.tokenTransformer(tok); } return tok; } HasNext() { return !this.tokenStream.eof(); } Parse() { const tokens = []; while (!this.tokenStream.eof()) { const tok = this.ParseNext(); tokens.push(tok); } return tokens; } TryParse() { try { return this.Parse(); } catch (ex) { if (ex instanceof Error) { console.warn(ex); } return []; } } ParseAsync() { return new Promise((resolve) => { resolve(this.Parse()); }); } TryParseAsync() { return new Promise((resolve) => { resolve(this.TryParse()); }); } }