UNPKG

@ranchonyx/nanoparse

Version:

A super simple basic parser / tokenizer for easier processing of various configuration files

354 lines (298 loc) 11.1 kB
/** * Asserts that `target` is not `null` or `undefined` * @param target * @param info */ export function assertNotNull<T>(target: T | undefined | null, info?: string): asserts target is NonNullable<T> { if (target === null || target === undefined) throw new Error(`Assertion failed.${info ? " " + info : ""}`); } /** * Represents an {@link InputStream}'s internal 'cursor' */ export type InputStreamPosition = { line: number, col: number, pos: number }; /** * This class consumes and iterates over a piece of source text */ export class InputStream { private readonly input: string; public END_OF_LINE: string = "\n"; public COMMENT_START: string = "#"; private streamPosition = 0; private sourceColumn = 0; private sourceLine = 1; constructor(pStreamInput: string, pCommentStart?: string, pSourceEOL?: string) { this.input = pStreamInput; if (pCommentStart) this.COMMENT_START = pCommentStart; if (pSourceEOL) this.END_OF_LINE = pSourceEOL; } /** * Gets the next character */ public next() { let ch = this.input.charAt(this.streamPosition++); if (ch == this.END_OF_LINE) { this.sourceLine++; this.sourceColumn = 0; } else { this.sourceColumn++; } return ch; } /** * Looks up the next character */ public peek() { return this.input.charAt(this.streamPosition); } /** * Checks if we have reached the end of the source code */ public eof() { return this.peek() == ""; } /** * Returns the current internal cursor position */ public getPosition(): InputStreamPosition { return Object({line: this.sourceLine, col: this.sourceColumn, pos: this.streamPosition}) as InputStreamPosition; } } /** * Represents a token as emitted by {@link TokenStream}s */ export type Token = { type: string; value: string; position: InputStreamPosition; } /** * Represents the union of a {@link Token} and `null` */ export type TokenOrNull = Token | null; /** * Checks if `T` is exactly equal to {@link Token} */ export type IsExactlyToken<T> = T extends Token ? (Token extends T ? true : false) : false; /** * Describes the signature of a function which transforms {@link Token}s into the type specified by `TransformTo` */ export type TokenTransformFunction<TransformTo> = (token: Token) => TransformTo; /** * This class consumes an {@link InputStream} to generate {@link Token}s */ export class TokenStream { private input: InputStream; private current: TokenOrNull = null; constructor(streamInput: InputStream) { this.input = streamInput; } /** * Checks if `ch` is whitespace * @param ch The character produced by the internal {@link InputStream} to be checked */ private is_whitespace = (ch: string) => { return " \t\n\r\b".indexOf(ch) >= 0; } /** * Checks if `ch` is the start of an identifier * @param ch The character produced by the internal {@link InputStream} to be checked */ private is_ident_start = (ch: string) => { return /\P{C}/u.test(ch); } /** * Checks if `ch` represents an identifier * @param ch The character produced by the internal {@link InputStream} to be checked */ private is_ident = (ch: string) => { return (this.is_ident_start(ch) || "?!-<>()[]{}=0123456789".indexOf(ch) >= 0) && ch !== this.input.COMMENT_START; } /** * Reads from its internal {@link InputStream} until `predicate` returns false * @param predicate A function, which the character produced by the internal {@link InputStream} is passed, which controls the reading of new characters */ private read_while = (predicate: (ch: string) => boolean) => { let str = ""; while (!this.input.eof() && predicate(this.input.peek())) str += this.input.next(); return str; } /** * Reads an entire identifier from the internal {@link InputStream} */ private read_ident = (): Token => { const beforeRead: InputStreamPosition = this.input.getPosition(); const ident = this.read_while(this.is_ident); return { type: "identifier", value: ident, position: beforeRead } } /** * Skips a comment, the start character of which is determined by the internal {@link InputStream}'s `COMMEND_START` property */ private skip_comment = () => { this.read_while((ch) => ch !== this.input.END_OF_LINE); this.input.next(); } /** * Produce the next available {@link TokenOrNull} from the internal {@link InputStream} * @throws Error When unable to handle a character */ private read_next(): TokenOrNull { this.read_while(this.is_whitespace); if (this.input.eof()) return null; const ch = this.input.peek(); if (ch === this.input.COMMENT_START) { this.skip_comment(); return this.read_next(); } if (this.is_ident_start(ch)) { return this.read_ident(); } throw new Error(`Can't handle character ${ch} [0x${ch.charCodeAt(0).toString(16).toUpperCase().padStart(2, "0")}]`); } /** * Looks up the next token */ private peek(): TokenOrNull { return this.current || (this.current = this.read_next()); } /** * Checks if we have reached the end of the available tokens */ public eof(): boolean { return this.peek() === null; } /** * Gets the next token */ public next(): TokenOrNull { const tok = this.current; this.current = null; return tok || this.read_next(); } } /** * Represents non-generic options for a {@link BasicParser} * - If `lineTerminator` is left blank, it will be set to `\n` * - If `commentStart` is left blank, it will be set to `#` */ export type BasicParserOptions = { sourceText: string; lineTerminator?: string; commentStart?: string; }; /** * Represents an optionally generic configuration for a {@link BasicParser} * - If *not* typed, this type is equal to {@link BasicParserOptions} * - if it *is* typed, this type will *require* a `forEachToken` function of type {@link TokenTransformFunction} which serves to transform the produces tokens into the desired format */ export type BasicParserConfig<TransformTo = Token> = IsExactlyToken<TransformTo> extends true ? BasicParserOptions : BasicParserOptions & { forEachToken: TokenTransformFunction<TransformTo>; } /** * This class consumes and wraps an {@link TokenStream}, providing methods for normal, safe and asynchronous parsing of a source text */ export default class BasicParser<TransformTo = Token> { private readonly inputStream: InputStream; private readonly tokenStream: TokenStream; private readonly tokenTransformer?: TokenTransformFunction<TransformTo>; private readonly hasTokenTransformer: boolean = false; private current: TokenOrNull = null; constructor(pTokenizerConfig: BasicParserConfig<TransformTo>) { this.inputStream = new InputStream(pTokenizerConfig.sourceText, pTokenizerConfig.commentStart, pTokenizerConfig.lineTerminator); this.tokenStream = new TokenStream(this.inputStream); if ("forEachToken" in pTokenizerConfig) { this.tokenTransformer = pTokenizerConfig.forEachToken as TokenTransformFunction<TransformTo>; this.hasTokenTransformer = true; } } /** * Produces the next token * @private */ private next() { const tok = this.current; this.current = null; return tok || this.tokenStream.next(); } /** * Returns the current token */ public PeekRaw(): Token { return (this.current || (this.current = this.next())) as Token; } /** * Returns the current token in its transformed state */ public PeekTransformed(): TransformTo { assertNotNull<TokenTransformFunction<TransformTo>>(this.tokenTransformer); return this.tokenTransformer(this.PeekRaw()); } /** * Parses tokens iteratively */ public ParseNext(): TransformTo { const tok = this.next() as Token; assertNotNull<Token>(tok); if(this.hasTokenTransformer) { assertNotNull<TokenTransformFunction<TransformTo>>(this.tokenTransformer); return this.tokenTransformer(tok); } return tok as TransformTo; } public HasNext(): boolean { return !this.tokenStream.eof(); } /** * Parses the internal source text and returns an Array of {@link Token}s or, if this class was constructed with a type parameter, in an Array of that type. * @throws Error On invalid source text */ public Parse(): Array<TransformTo> { const tokens: Array<TransformTo> = []; while (!this.tokenStream.eof()) { const tok = this.ParseNext(); tokens.push(tok); } return tokens as Array<TransformTo>; } /** * Parses the internal source text and returns an Array of {@link Token}s or, if this class was constructed with a type parameter, in an Array of that type. * - If an exceptions was thrown during parsing, it returns an empty array * - Does not throw exceptions on invalid source text */ public TryParse(): Array<TransformTo> | [] { try { return this.Parse(); } catch (ex) { if (ex instanceof Error) { console.warn(ex); } return []; } } /** * Asynchronously Parses the internal source text and returns a Promise of an Array of {@link Token}s or, if this class was constructed with a type parameter, an Array of that type. * @throws Error On invalid source text */ public ParseAsync(): Promise<Array<TransformTo>> { return new Promise((resolve) => { resolve(this.Parse()); }); } /** * Asynchronously Parses the internal source text and returns a Promise of an Array of {@link Token}s or, if this class was constructed with a type parameter, an Array of that type. * - If an exceptions was thrown during parsing, it returns an empty array * - Does not throw exceptions on invalid source text */ public TryParseAsync(): Promise<Array<TransformTo> | []> { return new Promise((resolve) => { resolve(this.TryParse()); }); } }