@ranchonyx/nanoparse
Version:
A super simple basic parser / tokenizer for easier processing of various configuration files
168 lines (167 loc) • 4.72 kB
JavaScript
export function assertNotNull(target, info) {
if (target === null || target === undefined)
throw new Error(`Assertion failed.${info ? " " + info : ""}`);
}
export class InputStream {
input;
END_OF_LINE = "\n";
COMMENT_START = "#";
streamPosition = 0;
sourceColumn = 0;
sourceLine = 1;
constructor(pStreamInput, pCommentStart, pSourceEOL) {
this.input = pStreamInput;
if (pCommentStart)
this.COMMENT_START = pCommentStart;
if (pSourceEOL)
this.END_OF_LINE = pSourceEOL;
}
next() {
let ch = this.input.charAt(this.streamPosition++);
if (ch == this.END_OF_LINE) {
this.sourceLine++;
this.sourceColumn = 0;
}
else {
this.sourceColumn++;
}
return ch;
}
peek() {
return this.input.charAt(this.streamPosition);
}
eof() {
return this.peek() == "";
}
getPosition() {
return Object({ line: this.sourceLine, col: this.sourceColumn, pos: this.streamPosition });
}
}
export class TokenStream {
input;
current = null;
constructor(streamInput) {
this.input = streamInput;
}
is_whitespace = (ch) => {
return " \t\n\r\b".indexOf(ch) >= 0;
};
is_ident_start = (ch) => {
return /\P{C}/u.test(ch);
};
is_ident = (ch) => {
return (this.is_ident_start(ch) || "?!-<>()[]{}=0123456789".indexOf(ch) >= 0) && ch !== this.input.COMMENT_START;
};
read_while = (predicate) => {
let str = "";
while (!this.input.eof() && predicate(this.input.peek()))
str += this.input.next();
return str;
};
read_ident = () => {
const beforeRead = this.input.getPosition();
const ident = this.read_while(this.is_ident);
return {
type: "identifier",
value: ident,
position: beforeRead
};
};
skip_comment = () => {
this.read_while((ch) => ch !== this.input.END_OF_LINE);
this.input.next();
};
read_next() {
this.read_while(this.is_whitespace);
if (this.input.eof())
return null;
const ch = this.input.peek();
if (ch === this.input.COMMENT_START) {
this.skip_comment();
return this.read_next();
}
if (this.is_ident_start(ch)) {
return this.read_ident();
}
throw new Error(`Can't handle character ${ch} [0x${ch.charCodeAt(0).toString(16).toUpperCase().padStart(2, "0")}]`);
}
peek() {
return this.current || (this.current = this.read_next());
}
eof() {
return this.peek() === null;
}
next() {
const tok = this.current;
this.current = null;
return tok || this.read_next();
}
}
export default class BasicParser {
inputStream;
tokenStream;
tokenTransformer;
hasTokenTransformer = false;
current = null;
constructor(pTokenizerConfig) {
this.inputStream = new InputStream(pTokenizerConfig.sourceText, pTokenizerConfig.commentStart, pTokenizerConfig.lineTerminator);
this.tokenStream = new TokenStream(this.inputStream);
if ("forEachToken" in pTokenizerConfig) {
this.tokenTransformer = pTokenizerConfig.forEachToken;
this.hasTokenTransformer = true;
}
}
next() {
const tok = this.current;
this.current = null;
return tok || this.tokenStream.next();
}
PeekRaw() {
return (this.current || (this.current = this.next()));
}
PeekTransformed() {
assertNotNull(this.tokenTransformer);
return this.tokenTransformer(this.PeekRaw());
}
ParseNext() {
const tok = this.next();
assertNotNull(tok);
if (this.hasTokenTransformer) {
assertNotNull(this.tokenTransformer);
return this.tokenTransformer(tok);
}
return tok;
}
HasNext() {
return !this.tokenStream.eof();
}
Parse() {
const tokens = [];
while (!this.tokenStream.eof()) {
const tok = this.ParseNext();
tokens.push(tok);
}
return tokens;
}
TryParse() {
try {
return this.Parse();
}
catch (ex) {
if (ex instanceof Error) {
console.warn(ex);
}
return [];
}
}
ParseAsync() {
return new Promise((resolve) => {
resolve(this.Parse());
});
}
TryParseAsync() {
return new Promise((resolve) => {
resolve(this.TryParse());
});
}
}