@ranchonyx/nanoparse
Version:
A super simple basic parser / tokenizer for easier processing of various configuration files
354 lines (298 loc) • 11.1 kB
text/typescript
/**
* Asserts that `target` is not `null` or `undefined`
* @param target
* @param info
*/
export function assertNotNull<T>(target: T | undefined | null, info?: string): asserts target is NonNullable<T> {
if (target === null || target === undefined)
throw new Error(`Assertion failed.${info ? " " + info : ""}`);
}
/**
* Represents an {@link InputStream}'s internal 'cursor'
*/
export type InputStreamPosition = { line: number, col: number, pos: number };
/**
* This class consumes and iterates over a piece of source text
*/
export class InputStream {
private readonly input: string;
public END_OF_LINE: string = "\n";
public COMMENT_START: string = "#";
private streamPosition = 0;
private sourceColumn = 0;
private sourceLine = 1;
constructor(pStreamInput: string, pCommentStart?: string, pSourceEOL?: string) {
this.input = pStreamInput;
if (pCommentStart)
this.COMMENT_START = pCommentStart;
if (pSourceEOL)
this.END_OF_LINE = pSourceEOL;
}
/**
* Gets the next character
*/
public next() {
let ch = this.input.charAt(this.streamPosition++);
if (ch == this.END_OF_LINE) {
this.sourceLine++;
this.sourceColumn = 0;
} else {
this.sourceColumn++;
}
return ch;
}
/**
* Looks up the next character
*/
public peek() {
return this.input.charAt(this.streamPosition);
}
/**
* Checks if we have reached the end of the source code
*/
public eof() {
return this.peek() == "";
}
/**
* Returns the current internal cursor position
*/
public getPosition(): InputStreamPosition {
return Object({line: this.sourceLine, col: this.sourceColumn, pos: this.streamPosition}) as InputStreamPosition;
}
}
/**
* Represents a token as emitted by {@link TokenStream}s
*/
export type Token = {
type: string;
value: string;
position: InputStreamPosition;
}
/**
* Represents the union of a {@link Token} and `null`
*/
export type TokenOrNull = Token | null;
/**
* Checks if `T` is exactly equal to {@link Token}
*/
export type IsExactlyToken<T> = T extends Token ? (Token extends T ? true : false) : false;
/**
* Describes the signature of a function which transforms {@link Token}s into the type specified by `TransformTo`
*/
export type TokenTransformFunction<TransformTo> = (token: Token) => TransformTo;
/**
* This class consumes an {@link InputStream} to generate {@link Token}s
*/
export class TokenStream {
private input: InputStream;
private current: TokenOrNull = null;
constructor(streamInput: InputStream) {
this.input = streamInput;
}
/**
* Checks if `ch` is whitespace
* @param ch The character produced by the internal {@link InputStream} to be checked
*/
private is_whitespace = (ch: string) => {
return " \t\n\r\b".indexOf(ch) >= 0;
}
/**
* Checks if `ch` is the start of an identifier
* @param ch The character produced by the internal {@link InputStream} to be checked
*/
private is_ident_start = (ch: string) => {
return /\P{C}/u.test(ch);
}
/**
* Checks if `ch` represents an identifier
* @param ch The character produced by the internal {@link InputStream} to be checked
*/
private is_ident = (ch: string) => {
return (this.is_ident_start(ch) || "?!-<>()[]{}=0123456789".indexOf(ch) >= 0) && ch !== this.input.COMMENT_START;
}
/**
* Reads from its internal {@link InputStream} until `predicate` returns false
* @param predicate A function, which the character produced by the internal {@link InputStream} is passed, which controls the reading of new characters
*/
private read_while = (predicate: (ch: string) => boolean) => {
let str = "";
while (!this.input.eof() && predicate(this.input.peek()))
str += this.input.next();
return str;
}
/**
* Reads an entire identifier from the internal {@link InputStream}
*/
private read_ident = (): Token => {
const beforeRead: InputStreamPosition = this.input.getPosition();
const ident = this.read_while(this.is_ident);
return {
type: "identifier",
value: ident,
position: beforeRead
}
}
/**
* Skips a comment, the start character of which is determined by the internal {@link InputStream}'s `COMMEND_START` property
*/
private skip_comment = () => {
this.read_while((ch) => ch !== this.input.END_OF_LINE);
this.input.next();
}
/**
* Produce the next available {@link TokenOrNull} from the internal {@link InputStream}
* @throws Error When unable to handle a character
*/
private read_next(): TokenOrNull {
this.read_while(this.is_whitespace);
if (this.input.eof()) return null;
const ch = this.input.peek();
if (ch === this.input.COMMENT_START) {
this.skip_comment();
return this.read_next();
}
if (this.is_ident_start(ch)) {
return this.read_ident();
}
throw new Error(`Can't handle character ${ch} [0x${ch.charCodeAt(0).toString(16).toUpperCase().padStart(2, "0")}]`);
}
/**
* Looks up the next token
*/
private peek(): TokenOrNull {
return this.current || (this.current = this.read_next());
}
/**
* Checks if we have reached the end of the available tokens
*/
public eof(): boolean {
return this.peek() === null;
}
/**
* Gets the next token
*/
public next(): TokenOrNull {
const tok = this.current;
this.current = null;
return tok || this.read_next();
}
}
/**
* Represents non-generic options for a {@link BasicParser}
* - If `lineTerminator` is left blank, it will be set to `\n`
* - If `commentStart` is left blank, it will be set to `#`
*/
export type BasicParserOptions = {
sourceText: string;
lineTerminator?: string;
commentStart?: string;
};
/**
* Represents an optionally generic configuration for a {@link BasicParser}
* - If *not* typed, this type is equal to {@link BasicParserOptions}
* - if it *is* typed, this type will *require* a `forEachToken` function of type {@link TokenTransformFunction} which serves to transform the produces tokens into the desired format
*/
export type BasicParserConfig<TransformTo = Token> = IsExactlyToken<TransformTo> extends true ? BasicParserOptions : BasicParserOptions & {
forEachToken: TokenTransformFunction<TransformTo>;
}
/**
* This class consumes and wraps an {@link TokenStream}, providing methods for normal, safe and asynchronous parsing of a source text
*/
export default class BasicParser<TransformTo = Token> {
private readonly inputStream: InputStream;
private readonly tokenStream: TokenStream;
private readonly tokenTransformer?: TokenTransformFunction<TransformTo>;
private readonly hasTokenTransformer: boolean = false;
private current: TokenOrNull = null;
constructor(pTokenizerConfig: BasicParserConfig<TransformTo>) {
this.inputStream = new InputStream(pTokenizerConfig.sourceText, pTokenizerConfig.commentStart, pTokenizerConfig.lineTerminator);
this.tokenStream = new TokenStream(this.inputStream);
if ("forEachToken" in pTokenizerConfig) {
this.tokenTransformer = pTokenizerConfig.forEachToken as TokenTransformFunction<TransformTo>;
this.hasTokenTransformer = true;
}
}
/**
* Produces the next token
* @private
*/
private next() {
const tok = this.current;
this.current = null;
return tok || this.tokenStream.next();
}
/**
* Returns the current token
*/
public PeekRaw(): Token {
return (this.current || (this.current = this.next())) as Token;
}
/**
* Returns the current token in its transformed state
*/
public PeekTransformed(): TransformTo {
assertNotNull<TokenTransformFunction<TransformTo>>(this.tokenTransformer);
return this.tokenTransformer(this.PeekRaw());
}
/**
* Parses tokens iteratively
*/
public ParseNext(): TransformTo {
const tok = this.next() as Token;
assertNotNull<Token>(tok);
if(this.hasTokenTransformer) {
assertNotNull<TokenTransformFunction<TransformTo>>(this.tokenTransformer);
return this.tokenTransformer(tok);
}
return tok as TransformTo;
}
public HasNext(): boolean {
return !this.tokenStream.eof();
}
/**
* Parses the internal source text and returns an Array of {@link Token}s or, if this class was constructed with a type parameter, in an Array of that type.
* @throws Error On invalid source text
*/
public Parse(): Array<TransformTo> {
const tokens: Array<TransformTo> = [];
while (!this.tokenStream.eof()) {
const tok = this.ParseNext();
tokens.push(tok);
}
return tokens as Array<TransformTo>;
}
/**
* Parses the internal source text and returns an Array of {@link Token}s or, if this class was constructed with a type parameter, in an Array of that type.
* - If an exceptions was thrown during parsing, it returns an empty array
* - Does not throw exceptions on invalid source text
*/
public TryParse(): Array<TransformTo> | [] {
try {
return this.Parse();
} catch (ex) {
if (ex instanceof Error) {
console.warn(ex);
}
return [];
}
}
/**
* Asynchronously Parses the internal source text and returns a Promise of an Array of {@link Token}s or, if this class was constructed with a type parameter, an Array of that type.
* @throws Error On invalid source text
*/
public ParseAsync(): Promise<Array<TransformTo>> {
return new Promise((resolve) => {
resolve(this.Parse());
});
}
/**
* Asynchronously Parses the internal source text and returns a Promise of an Array of {@link Token}s or, if this class was constructed with a type parameter, an Array of that type.
* - If an exceptions was thrown during parsing, it returns an empty array
* - Does not throw exceptions on invalid source text
*/
public TryParseAsync(): Promise<Array<TransformTo> | []> {
return new Promise((resolve) => {
resolve(this.TryParse());
});
}
}