UNPKG

crazy-parser

Version:

A light-weight parser combinator

855 lines (699 loc) 22.1 kB
export const Nothing: unique symbol = Symbol("Nothing") export type State = { index: number } type Vector<A, Count extends number, TmpResult extends Array<A> = []> = TmpResult["length"] extends Count ? TmpResult : Vector<A, Count, [A, ...TmpResult]> type MaybeJoined<T> = T | [T] export class Parser<A, E extends Error = Error> { constructor( public readonly f: (input: Uint32Array, state: State) => A | E ) {} /* Replace the result of parser with a constant. The same as `<$` in Haskell. @param c - The constant @return A new parser that always returns that constant. */ cmap<B>(c: B): Parser<B, E> { return new Parser<B, E>((input, state) => { const result = this.f(input, state) if (result instanceof Error) return result return c }) } /* Modify the result of parser. Because every parser is a functor, similar to `map()` of lists. The same as `fmap` or `<$>` in Haskell. @param f - The modifier function, which takes parser's result and gives a new value. @return A new parser that returns the return value of the modifier function. */ map<B>(f: (a: A) => B): Parser<B, E> { return new Parser<B, E>((input, state) => { const result = this.f(input, state) if (result instanceof Error) return result return f(result) }) } /* If this parser failed, try the other one. Because every parser is an alternative. The same as `<|>` in Haskell. @param p - The other parser to try. @return A new parser that tries parser `p` if it failed. */ or<B, E2 extends Error>(p: Parser<B, E2>): Parser<A | B, E2> { return new Parser<A | B, E2>((input, state) => { const result = this.f(input, state) if (result instanceof Error) return p.f(input, state) return result }) } orPure<B>(c: B): Parser<A | B, any> { return new Parser<A | B, any>((input, state) => { const result = this.f(input, state) if (result instanceof Error) return c return result }) } /* Continue parsing with the result from the previous parser. Because every parser is a monad, similar to `flatMap()` of lists, `then()` of `Promise`s. The same as `>>=` in haskell. @param gf - The generator function that takes the result of the previous parser, and gives the next parser. @return A new parser that firstly tries the first one, and if it succeeded, the result is passed to `gf`, and the second parser will be what `gf` returns. */ bind<B, E2 extends Error>(gf: (a: A) => Parser<B, E2>): Parser<B, E | E2> { return new Parser<B, E | E2>((input, state) => { const result = this.f(input, state) if (result instanceof Error) return result return gf(result).f(input, state) }) } /* Combine two parsers into one. Because every parser is an applicative. The same as `<~>` in Parsec (a Haskell parser library). @param p - The other parser to be combined with. @return A new parser that firstly tries the first one, and if it succeeded, tries the second one. */ and<B, E2 extends Error>(p: Parser<B, E2>): Parser<[A, B], E | E2> { return this.bind(a => p.map(b => [a, b])) } /* Combine two parsers into one, **keep the result from the left**, reject the result from the right. The same as `<*` in Haskell. @param p - The other parser to be combined with. @return A new parser that only **keeps the result from the **left**. */ left<B, E2 extends Error>(p: Parser<B, E2>): Parser<A, E | E2> { return this.bind(a => p.map(_ => a)) } /* Combine two parsers into one, **keep the result from the right**, reject the result from the left. The same as `*>` in Haskell. @param p - The other parser to be combined with. @return A nwe parser that only **keeps the result from the right**. */ right<B, E2 extends Error>(p: Parser<B, E2>): Parser<B, E | E2> { return this.bind(_ => p) } /* Try to use this parser as many as it can. **It won't fail** but gives an empty list if it didn't parse anything. The same as `many` in Haskell. @return A new parser that gives a list of results. */ many(): Parser<Array<A>, E> { return new Parser<Array<A>, E>((input, state) => { const results: Array<A> = [] while (true) { const result = this.f(input, state) if (result instanceof Error) break results.push(result) } return results }) } /* Try to use this parser as many as it can. **It will fail** if it didn't parse anything. The same as `some` in Haskell. @return A new parser that gives a list of results. */ some(): Parser<Array<A>, E> { return this.many().bind(rs => rs.length > 0 ? pure(rs) : empty); } /* Haskell-style infix operator of `and()`. */ _$_<B, E2 extends Error>(p: Parser<B, E2>): Parser<[A, B], E | E2> { return this.and(p) } /* Haskell-style infix operator of `left()`. */ _$<B, E2 extends Error>(p: Parser<B, E2>): Parser<A, E | E2> { return this.left(p) } /* Haskell-style infix operator of `right()`. */ $_<B, E2 extends Error>(p: Parser<B, E2>): Parser<B, E | E2> { return this.right(p) } /* Let this parser fail if the condition is false. @param c - The condition @return A parser that immediately fails if the condition is false. */ if(c: boolean): Parser<A> { return c ? this : empty } /* Check if the result meets the requirement. @param c - The condition function that takes the result from the parser. If it returned true, the parsing succeeded. @return A new parser that fails if the result doesn't meet the requirement, even the parsing succeeded. */ where<E2 extends Error = Error>(c: (a: A) => boolean, e2: E2 = new Error() as E2): Parser<A, E | E2> { return new Parser<A, E | E2>((input, state) => { const oldIndex = state.index const result = this.f(input, state) if (result instanceof Error) return result if (!c(result)) { state.index = oldIndex return e2 } return result }) } /* Make a parser optional by allowing it to fail. The same as `optional` in Haskell. */ optional(): Parser<A | typeof Nothing, E> { return this.or(pure(Nothing)) } /* Repeat the parser several times. @param n - The number of repetition. @return A new parser that gives a list of results based on how many times it repeated. If any of it failed, the whole one failed. */ x<N extends number>(n: N): Parser<Vector<A, N>, E> { if (n < 1) throw Error("Number of repetition must be greater than 1") const ps: Array<Parser<A, E>> = Array.from({length: n}, _ => this) return sequence(ps) as Parser<Vector<A, N>, E> } /* Append the index range to the result. This is useful in creating source-map. @return A new parser that gives a bi-tuple (result & range). */ withRange(): Parser<[A, [number, number]], E> { return new Parser<[A, [number, number]], E>((input, state) => { const start = state.index const result = this.f(input, state) if (result instanceof Error) return result const end = state.index return [result, [start, end]] }) } /* When this parser failed, rewind the cursor back to where it started. This function doesn't have a prefix version because `try` is a JavaScript keyword. @return A new parser that will rewind the cursor back to where it started if the parsing failed. */ try(): Parser<A, E> { return new Parser<A, E>((input, state) => { const oldState = structuredClone(state) const result = this.f(input, state) if (result instanceof Error) state.index = oldState.index return result }) } /* Override the error message of this parser. @param e - The new error message. @return A new parser that will throw this new error message. */ error<E2 extends Error>(e: E2): Parser<A, E2> { return new Parser<A, E2>((input, state) => { const result = this.f(input, state) if (result instanceof Error) return e return result }) } /* Try to parse the given string. @param input - The string to be parsed. @return A tuple where the first one is the parsed result, if it succeeded. Or you'll get a `Fail` constant. The second one is the state that could reveal where the parser stopped. You should import `Fail` to check if it was successful. */ run(input: string): [A | E, State] { const buffer = Uint32Array.from(Array.from(input).map(c => c.codePointAt(0))) const state: State = {index: 0} const result = this.f(buffer, state) return [result, state] } /* Try to parse the given string. @param input - The string to be parsed. @return The parsed result if it succeeded. Or you'll get a `Fail` constant. */ eval(input: string): A | E { return this.run(input)[0] } /* Turn this parser into a promise. This is useful when you don't want to deal with our `Fail` constant. @param input - The string to be parsed. @return The promise version of `run()`. */ runPromise(input: string): Promise<[A, State]> { return new Promise((resolve, reject) => { const [result, state] = this.run(input) if (result instanceof Error) reject(result) else resolve([result, state]) }) } /* Turn this parser into a promise. This is useful when you don't want to deal with our `Fail` constant. @param input - The string to be parsed. @return The promise version of `eval()`. */ evalPromise(input: string): Promise<A> { return new Promise((resolve, reject) => { const result = this.eval(input) if (result instanceof Error) reject(result) else resolve(result) }) } /* Show a message when the parser succeeded (WIN) or failed (BAD). Don't use is for dumping syntax tree or anything. Only use this for debugging purpose. @param message - The message to be shown. @return A new parser that will show the message when it succeeded or failed. */ trace(...message: Array<any>): Parser<A, E> { return new Parser<A, E>((input, state) => { const result = this.f(input, state) if (result instanceof Error) console.error(`[crazy-parser] BAD`, result, ...message) else console.log(`[crazy-parser] WIN`, result, ...message) return result }) } } /* Lift a constant to a parser. The same as `pure` or `return` in Haskell. @param c - The constant @return A parser that always gives that constant, without consuming any input string. */ export function pure<A>(c: A): Parser<A, any> { return new Parser<A, any>(_ => c) } /* The parser that always fails. The same as `empty` in Haskell. */ export const empty: Parser<any> = new Parser<any, Error>(() => new Error()) /* A convenient way to write a list of chained `or()`. Similar to `race()` of Promise, but this is deterministic. The same as `asum` in Haskell. Before: ```ts a.or(b).or(c).or(d) ``` Now: ```ts asum([a, b, c, d]) ``` @param ps - A list of parsers. @return A new parser from combining parsers by `or()`. */ export function asum<A> (...args: [Array<Parser<A, any>>]): Parser<A> export function asum<A> (...args: Array<Parser<A, any>>): Parser<A> export function asum<A> (...args: [Array<Parser<A, any>>] | Array<Parser<A, any>>): Parser<A> { const ps = (args.length == 1 && args[0] instanceof Array) ? args[0] : args as Array<Parser<A, any>> if (ps.length == 0) return empty return ps.reduce((sum, p) => sum.or(p)).or(empty) } /* A convenient way to write a list of chained `and()`, without being shocked by the nested bi-tuples. Similar to `all()` of Promise. The same as `sequence` in Haskell, but with more precise types thanks to the chaos of JavaScript. @param ps - A list of parsers. @return A new parser that tries the parsers one by one from left to right, and gives a list of their results. If any of it failed, the whole one failed. */ export function sequence< A1, E1 extends Error, A2, E2 extends Error, > (args: [Parser<A1, E1>, Parser<A2, E2>,]): Parser<[A1, A2], E1 | E2> export function sequence< A1, E1 extends Error, A2, E2 extends Error, A3, E3 extends Error, > (args: [ Parser<A1, E1>, Parser<A2, E2>, Parser<A3, E3>, ]): Parser<[A1, A2, A3], E1 | E2 | E3> export function sequence< A1, E1 extends Error, A2, E2 extends Error, A3, E3 extends Error, A4, E4 extends Error, > (args: [ Parser<A1, E1>, Parser<A2, E2>, Parser<A3, E3>, Parser<A4, E4>, ]): Parser<[A1, A2, A3, A4], E1 | E2 | E3 | E4> export function sequence< A1, E1 extends Error, A2, E2 extends Error, A3, E3 extends Error, A4, E4 extends Error, A5, E5 extends Error, > (args: [ Parser<A1, E1>, Parser<A2, E2>, Parser<A3, E3>, Parser<A4, E4>, Parser<A5, E5>, ]): Parser<[A1, A2, A3, A4, A5], E1 | E2 | E3 | E4 | E5> export function sequence< A1, E1 extends Error, A2, E2 extends Error, A3, E3 extends Error, A4, E4 extends Error, A5, E5 extends Error, A6, E6 extends Error, > (args: [ Parser<A1, E1>, Parser<A2, E2>, Parser<A3, E3>, Parser<A4, E4>, Parser<A5, E5>, Parser<A6, E6>, ]): Parser<[A1, A2, A3, A4, A5, A6], E1 | E2 | E3 | E4 | E5 | E6> export function sequence< A1, E1 extends Error, A2, E2 extends Error, > (...args: [Parser<A1, E1>, Parser<A2, E2>,]): Parser<[A1, A2], E1 | E2> export function sequence< A1, E1 extends Error, A2, E2 extends Error, A3, E3 extends Error, > (...args: [ Parser<A1, E1>, Parser<A2, E2>, Parser<A3, E3>, ]): Parser<[A1, A2, A3], E1 | E2 | E3> export function sequence< A1, E1 extends Error, A2, E2 extends Error, A3, E3 extends Error, A4, E4 extends Error, > (...args: [ Parser<A1, E1>, Parser<A2, E2>, Parser<A3, E3>, Parser<A4, E4>, ]): Parser<[A1, A2, A3, A4], E1 | E2 | E3 | E4> export function sequence< A1, E1 extends Error, A2, E2 extends Error, A3, E3 extends Error, A4, E4 extends Error, A5, E5 extends Error, > (...args: [ Parser<A1, E1>, Parser<A2, E2>, Parser<A3, E3>, Parser<A4, E4>, Parser<A5, E5>, ]): Parser<[A1, A2, A3, A4, A5], E1 | E2 | E3 | E4 | E5> export function sequence< A1, E1 extends Error, A2, E2 extends Error, A3, E3 extends Error, A4, E4 extends Error, A5, E5 extends Error, A6, E6 extends Error, > (...args: [ Parser<A1, E1>, Parser<A2, E2>, Parser<A3, E3>, Parser<A4, E4>, Parser<A5, E5>, Parser<A6, E6>, ]): Parser<[A1, A2, A3, A4, A5, A6], E1 | E2 | E3 | E4 | E5 | E6> export function sequence<Ts extends Array<[any, Error]>> (ps: { [I in keyof Ts]: Parser<Ts[I][0], Ts[I][1]> }): Parser<{ [I in keyof Ts]: Ts[I][0] }, Ts[number][1]> export function sequence<Ts extends Array<[any, Error]>> (...ps: { [I in keyof Ts]: Parser<Ts[I][0], Ts[I][1]> }): Parser<{ [I in keyof Ts]: Ts[I][0] }, Ts[number][1]> export function sequence<Ts extends Array<[any, Error]>> (...args: MaybeJoined<{ [I in keyof Ts]: Parser<Ts[I][0], Ts[I][1]> }>): Parser<{ [I in keyof Ts]: Ts[I][0] }, Ts[number][1]> { const ps = args[0] instanceof Array ? args[0] : args return new Parser<{ [I in keyof Ts]: Ts[I][0] }, Ts[number][1]>((input, state) => { const results: any[] = [] for (const p of ps) { const result = p.f(input, state) if (result instanceof Error) return result results.push(result) } return results as Ts }) } /* Test if the parser has reached the end of file (string). */ export const eof = new Parser<unknown, Error>((input, state) => { if (state.index < input.length) return new Error() }) /* Consume one character from the input. Fail if it has reached the end of string. */ export const one = new Parser<string, Error>((input, state) => { if (state.index >= input.length) return new Error() const char = String.fromCodePoint(input[state.index]) state.index += 1 return char }) /* Parse a specific characters. @params c - The character to be parsed. */ export function char<C extends string>(c: C): Parser<C> { return new Parser<C, Error>((input, state) => { if (state.index >= input.length) return new Error() const char = String.fromCodePoint(input[state.index]) if (char != c) return new Error() state.index += 1 return <C>char }) } // Parse a digit (0~9). export const digit = one.where(c => "0" <= c && c <= "9") as Parser<"0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"> // Parse an uppercase Latin letter without diacritics. export const upper = one.where(c => "A" <= c && c <= "Z") as Parser< "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"> // Parse a lowercase Latin letter without diacritics. export const lower = one.where(c => "a" <= c && c <= "z") as Parser< "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"> // Parse a case-insensitive hexadecimal digit. export const hex = digit.or(one.where(c => "A" <= c && c <= "F" || "a" <= c && c <= "f")) as Parser< "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" | "A" | "B" | "C" | "D" | "E" | "F" | "a" | "b" | "c" | "d" | "e" | "f"> // Parse a Latin letter without diacritics. export const alpha = upper.or(lower) // Parse a space. export const space = char(" ") // Parse a tab. export const tab = char("\t") // Parse a carriage return. export const cr = char("\r") // Parser a linefeed. export const lf = char("\n") // Parse the index which the parser is looking at. export const index = new Parser<number, any>((_, state) => state.index) /* Parse a specific string. @param s - The string to be parsed. */ export function str<S extends string>(s: S): Parser<S> { const buffer = Uint32Array.from(Array.from(s).map(c => c.codePointAt(0))) return new Parser<S, Error>((input, state) => { if (state.index + buffer.length > input.length) return new Error() for (let i = 0; i < buffer.length; i += 1) if (input[state.index + i] != buffer[i]) return new Error() state.index += buffer.length return s }) } export function span(f: (c: string) => boolean): Parser<Array<string>, any> { return new Parser<Array<string>, any>((input, state) => { let oldIndex = state.index while (state.index < input.length && f(String.fromCodePoint(input[state.index]))) state.index += 1 return Array.from(input.slice(oldIndex, state.index)).map(c => String.fromCodePoint(c)).join("") }) } export function anyChar<Cs extends Array<string>>(cs: Cs): Parser<Cs[number]> { return asum(cs.map(char)) } export function anyStr<Ss extends Array<string>>(ss: Ss): Parser<Ss[number]> { return asum(ss.map(str)) } /* Since JavaScript is a strictly evaluated language, this makes it impossible for constants to refer to itself. So all parsers refer to itself must be wrapped in a function that takes no arguments (in Haskell, a function takes no arguments is totally the same as a constant). But referring to itself will cause infinite recursion, so you must use our `lazy()` wrapper to only evaluate the parser when it's needed. So here's the full workaround, assume there's a self-referring parser `srp`: Before: ```ts const srp = f(srp) ``` After: ```ts const srp = () => // 1st WRAP! lazy(() => // 2nd WRAP! f(srp()) // Call it! ) ``` */ export function lazy<P extends Parser<any, any>>(pg: () => P): P { return new Parser((input, state) => pg().f(input, state)) as P } /* A convenient way to intersperse parsers among string parsers. Before: ```ts sequence([str("123").right(a), str("456").right(b).left("789")]) ``` After: ```ts template`123${a}456${b}789` ``` */ export function template<Ts extends Array<[any, any]>> (ss: TemplateStringsArray, ...ps: { [I in keyof Ts]: Parser<Ts[I][0], Ts[I][1]> }): Parser<{ [I in keyof Ts]: Ts[I][0] }, Ts[number][1]> { return sequence(ps.map((p, i) => str(ss[i]).right(p)) as { [I in keyof Ts]: Parser<Ts[I][0], Ts[I][1]> }) .left(str(ss[ss.length - 1])) }