UNPKG

crazy-parser

Version:

A light-weight parser combinator

485 lines (452 loc) 14.9 kB
export const Nothing = Symbol("Nothing"); export class Parser { constructor(f) { this.f = f; } /* Replace the result of parser with a constant. The same as `<$` in Haskell. @param c - The constant @return A new parser that always returns that constant. */ cmap(c) { return new Parser((input, state) => { const result = this.f(input, state); if (result instanceof Error) return result; return c; }); } /* Modify the result of parser. Because every parser is a functor, similar to `map()` of lists. The same as `fmap` or `<$>` in Haskell. @param f - The modifier function, which takes parser's result and gives a new value. @return A new parser that returns the return value of the modifier function. */ map(f) { return new Parser((input, state) => { const result = this.f(input, state); if (result instanceof Error) return result; return f(result); }); } /* If this parser failed, try the other one. Because every parser is an alternative. The same as `<|>` in Haskell. @param p - The other parser to try. @return A new parser that tries parser `p` if it failed. */ or(p) { return new Parser((input, state) => { const result = this.f(input, state); if (result instanceof Error) return p.f(input, state); return result; }); } orPure(c) { return new Parser((input, state) => { const result = this.f(input, state); if (result instanceof Error) return c; return result; }); } /* Continue parsing with the result from the previous parser. Because every parser is a monad, similar to `flatMap()` of lists, `then()` of `Promise`s. The same as `>>=` in haskell. @param gf - The generator function that takes the result of the previous parser, and gives the next parser. @return A new parser that firstly tries the first one, and if it succeeded, the result is passed to `gf`, and the second parser will be what `gf` returns. */ bind(gf) { return new Parser((input, state) => { const result = this.f(input, state); if (result instanceof Error) return result; return gf(result).f(input, state); }); } /* Combine two parsers into one. Because every parser is an applicative. The same as `<~>` in Parsec (a Haskell parser library). @param p - The other parser to be combined with. @return A new parser that firstly tries the first one, and if it succeeded, tries the second one. */ and(p) { return this.bind(a => p.map(b => [a, b])); } /* Combine two parsers into one, **keep the result from the left**, reject the result from the right. The same as `<*` in Haskell. @param p - The other parser to be combined with. @return A new parser that only **keeps the result from the **left**. */ left(p) { return this.bind(a => p.map(_ => a)); } /* Combine two parsers into one, **keep the result from the right**, reject the result from the left. The same as `*>` in Haskell. @param p - The other parser to be combined with. @return A nwe parser that only **keeps the result from the right**. */ right(p) { return this.bind(_ => p); } /* Try to use this parser as many as it can. **It won't fail** but gives an empty list if it didn't parse anything. The same as `many` in Haskell. @return A new parser that gives a list of results. */ many() { return new Parser((input, state) => { const results = []; while (true) { const result = this.f(input, state); if (result instanceof Error) break; results.push(result); } return results; }); } /* Try to use this parser as many as it can. **It will fail** if it didn't parse anything. The same as `some` in Haskell. @return A new parser that gives a list of results. */ some() { return this.many().bind(rs => rs.length > 0 ? pure(rs) : empty); } /* Haskell-style infix operator of `and()`. */ _$_(p) { return this.and(p); } /* Haskell-style infix operator of `left()`. */ _$(p) { return this.left(p); } /* Haskell-style infix operator of `right()`. */ $_(p) { return this.right(p); } /* Let this parser fail if the condition is false. @param c - The condition @return A parser that immediately fails if the condition is false. */ if(c) { return c ? this : empty; } /* Check if the result meets the requirement. @param c - The condition function that takes the result from the parser. If it returned true, the parsing succeeded. @return A new parser that fails if the result doesn't meet the requirement, even the parsing succeeded. */ where(c, e2 = new Error()) { return new Parser((input, state) => { const oldIndex = state.index; const result = this.f(input, state); if (result instanceof Error) return result; if (!c(result)) { state.index = oldIndex; return e2; } return result; }); } /* Make a parser optional by allowing it to fail. The same as `optional` in Haskell. */ optional() { return this.or(pure(Nothing)); } /* Repeat the parser several times. @param n - The number of repetition. @return A new parser that gives a list of results based on how many times it repeated. If any of it failed, the whole one failed. */ x(n) { if (n < 1) throw Error("Number of repetition must be greater than 1"); const ps = Array.from({ length: n }, _ => this); return sequence(ps); } /* Append the index range to the result. This is useful in creating source-map. @return A new parser that gives a bi-tuple (result & range). */ withRange() { return new Parser((input, state) => { const start = state.index; const result = this.f(input, state); if (result instanceof Error) return result; const end = state.index; return [result, [start, end]]; }); } /* When this parser failed, rewind the cursor back to where it started. This function doesn't have a prefix version because `try` is a JavaScript keyword. @return A new parser that will rewind the cursor back to where it started if the parsing failed. */ try() { return new Parser((input, state) => { const oldState = structuredClone(state); const result = this.f(input, state); if (result instanceof Error) state.index = oldState.index; return result; }); } /* Override the error message of this parser. @param e - The new error message. @return A new parser that will throw this new error message. */ error(e) { return new Parser((input, state) => { const result = this.f(input, state); if (result instanceof Error) return e; return result; }); } /* Try to parse the given string. @param input - The string to be parsed. @return A tuple where the first one is the parsed result, if it succeeded. Or you'll get a `Fail` constant. The second one is the state that could reveal where the parser stopped. You should import `Fail` to check if it was successful. */ run(input) { const buffer = Uint32Array.from(Array.from(input).map(c => c.codePointAt(0))); const state = { index: 0 }; const result = this.f(buffer, state); return [result, state]; } /* Try to parse the given string. @param input - The string to be parsed. @return The parsed result if it succeeded. Or you'll get a `Fail` constant. */ eval(input) { return this.run(input)[0]; } /* Turn this parser into a promise. This is useful when you don't want to deal with our `Fail` constant. @param input - The string to be parsed. @return The promise version of `run()`. */ runPromise(input) { return new Promise((resolve, reject) => { const [result, state] = this.run(input); if (result instanceof Error) reject(result); else resolve([result, state]); }); } /* Turn this parser into a promise. This is useful when you don't want to deal with our `Fail` constant. @param input - The string to be parsed. @return The promise version of `eval()`. */ evalPromise(input) { return new Promise((resolve, reject) => { const result = this.eval(input); if (result instanceof Error) reject(result); else resolve(result); }); } /* Show a message when the parser succeeded (WIN) or failed (BAD). Don't use is for dumping syntax tree or anything. Only use this for debugging purpose. @param message - The message to be shown. @return A new parser that will show the message when it succeeded or failed. */ trace(...message) { return new Parser((input, state) => { const result = this.f(input, state); if (result instanceof Error) console.error(`[crazy-parser] BAD`, result, ...message); else console.log(`[crazy-parser] WIN`, result, ...message); return result; }); } } /* Lift a constant to a parser. The same as `pure` or `return` in Haskell. @param c - The constant @return A parser that always gives that constant, without consuming any input string. */ export function pure(c) { return new Parser(_ => c); } /* The parser that always fails. The same as `empty` in Haskell. */ export const empty = new Parser(() => new Error()); export function asum(...args) { const ps = (args.length == 1 && args[0] instanceof Array) ? args[0] : args; if (ps.length == 0) return empty; return ps.reduce((sum, p) => sum.or(p)).or(empty); } export function sequence(...args) { const ps = args[0] instanceof Array ? args[0] : args; return new Parser((input, state) => { const results = []; for (const p of ps) { const result = p.f(input, state); if (result instanceof Error) return result; results.push(result); } return results; }); } /* Test if the parser has reached the end of file (string). */ export const eof = new Parser((input, state) => { if (state.index < input.length) return new Error(); }); /* Consume one character from the input. Fail if it has reached the end of string. */ export const one = new Parser((input, state) => { if (state.index >= input.length) return new Error(); const char = String.fromCodePoint(input[state.index]); state.index += 1; return char; }); /* Parse a specific characters. @params c - The character to be parsed. */ export function char(c) { return new Parser((input, state) => { if (state.index >= input.length) return new Error(); const char = String.fromCodePoint(input[state.index]); if (char != c) return new Error(); state.index += 1; return char; }); } // Parse a digit (0~9). export const digit = one.where(c => "0" <= c && c <= "9"); // Parse an uppercase Latin letter without diacritics. export const upper = one.where(c => "A" <= c && c <= "Z"); // Parse a lowercase Latin letter without diacritics. export const lower = one.where(c => "a" <= c && c <= "z"); // Parse a case-insensitive hexadecimal digit. export const hex = digit.or(one.where(c => "A" <= c && c <= "F" || "a" <= c && c <= "f")); // Parse a Latin letter without diacritics. export const alpha = upper.or(lower); // Parse a space. export const space = char(" "); // Parse a tab. export const tab = char("\t"); // Parse a carriage return. export const cr = char("\r"); // Parser a linefeed. export const lf = char("\n"); // Parse the index which the parser is looking at. export const index = new Parser((_, state) => state.index); /* Parse a specific string. @param s - The string to be parsed. */ export function str(s) { const buffer = Uint32Array.from(Array.from(s).map(c => c.codePointAt(0))); return new Parser((input, state) => { if (state.index + buffer.length > input.length) return new Error(); for (let i = 0; i < buffer.length; i += 1) if (input[state.index + i] != buffer[i]) return new Error(); state.index += buffer.length; return s; }); } export function span(f) { return new Parser((input, state) => { let oldIndex = state.index; while (state.index < input.length && f(String.fromCodePoint(input[state.index]))) state.index += 1; return Array.from(input.slice(oldIndex, state.index)).map(c => String.fromCodePoint(c)).join(""); }); } export function anyChar(cs) { return asum(cs.map(char)); } export function anyStr(ss) { return asum(ss.map(str)); } /* Since JavaScript is a strictly evaluated language, this makes it impossible for constants to refer to itself. So all parsers refer to itself must be wrapped in a function that takes no arguments (in Haskell, a function takes no arguments is totally the same as a constant). But referring to itself will cause infinite recursion, so you must use our `lazy()` wrapper to only evaluate the parser when it's needed. So here's the full workaround, assume there's a self-referring parser `srp`: Before: ```ts const srp = f(srp) ``` After: ```ts const srp = () => // 1st WRAP! lazy(() => // 2nd WRAP! f(srp()) // Call it! ) ``` */ export function lazy(pg) { return new Parser((input, state) => pg().f(input, state)); } /* A convenient way to intersperse parsers among string parsers. Before: ```ts sequence([str("123").right(a), str("456").right(b).left("789")]) ``` After: ```ts template`123${a}456${b}789` ``` */ export function template(ss, ...ps) { return sequence(ps.map((p, i) => str(ss[i]).right(p))) .left(str(ss[ss.length - 1])); }