crazy-parser
Version:
A light-weight parser combinator
485 lines (452 loc) • 14.9 kB
JavaScript
export const Nothing = Symbol("Nothing");
export class Parser {
constructor(f) {
this.f = f;
}
/*
Replace the result of parser with a constant.
The same as `<$` in Haskell.
@param c - The constant
@return A new parser that always returns that constant.
*/
cmap(c) {
return new Parser((input, state) => {
const result = this.f(input, state);
if (result instanceof Error)
return result;
return c;
});
}
/*
Modify the result of parser.
Because every parser is a functor, similar to `map()` of lists.
The same as `fmap` or `<$>` in Haskell.
@param f - The modifier function, which takes parser's result and gives a new value.
@return A new parser that returns the return value of the modifier function.
*/
map(f) {
return new Parser((input, state) => {
const result = this.f(input, state);
if (result instanceof Error)
return result;
return f(result);
});
}
/*
If this parser failed, try the other one.
Because every parser is an alternative.
The same as `<|>` in Haskell.
@param p - The other parser to try.
@return A new parser that tries parser `p` if it failed.
*/
or(p) {
return new Parser((input, state) => {
const result = this.f(input, state);
if (result instanceof Error)
return p.f(input, state);
return result;
});
}
orPure(c) {
return new Parser((input, state) => {
const result = this.f(input, state);
if (result instanceof Error)
return c;
return result;
});
}
/*
Continue parsing with the result from the previous parser.
Because every parser is a monad, similar to `flatMap()` of lists, `then()` of `Promise`s.
The same as `>>=` in haskell.
@param gf - The generator function that takes the result of the previous parser, and gives the next parser.
@return A new parser that firstly tries the first one, and if it succeeded,
the result is passed to `gf`, and the second parser will be what `gf` returns.
*/
bind(gf) {
return new Parser((input, state) => {
const result = this.f(input, state);
if (result instanceof Error)
return result;
return gf(result).f(input, state);
});
}
/*
Combine two parsers into one.
Because every parser is an applicative.
The same as `<~>` in Parsec (a Haskell parser library).
@param p - The other parser to be combined with.
@return A new parser that firstly tries the first one, and if it succeeded, tries the second one.
*/
and(p) {
return this.bind(a => p.map(b => [a, b]));
}
/*
Combine two parsers into one, **keep the result from the left**, reject the result from the right.
The same as `<*` in Haskell.
@param p - The other parser to be combined with.
@return A new parser that only **keeps the result from the **left**.
*/
left(p) {
return this.bind(a => p.map(_ => a));
}
/*
Combine two parsers into one, **keep the result from the right**, reject the result from the left.
The same as `*>` in Haskell.
@param p - The other parser to be combined with.
@return A nwe parser that only **keeps the result from the right**.
*/
right(p) {
return this.bind(_ => p);
}
/*
Try to use this parser as many as it can. **It won't fail** but gives an empty list if it didn't parse anything.
The same as `many` in Haskell.
@return A new parser that gives a list of results.
*/
many() {
return new Parser((input, state) => {
const results = [];
while (true) {
const result = this.f(input, state);
if (result instanceof Error)
break;
results.push(result);
}
return results;
});
}
/*
Try to use this parser as many as it can. **It will fail** if it didn't parse anything.
The same as `some` in Haskell.
@return A new parser that gives a list of results.
*/
some() {
return this.many().bind(rs => rs.length > 0 ? pure(rs) : empty);
}
/*
Haskell-style infix operator of `and()`.
*/
_$_(p) {
return this.and(p);
}
/*
Haskell-style infix operator of `left()`.
*/
_$(p) {
return this.left(p);
}
/*
Haskell-style infix operator of `right()`.
*/
$_(p) {
return this.right(p);
}
/*
Let this parser fail if the condition is false.
@param c - The condition
@return A parser that immediately fails if the condition is false.
*/
if(c) {
return c ? this : empty;
}
/*
Check if the result meets the requirement.
@param c - The condition function that takes the result from the parser. If it returned true, the parsing succeeded.
@return A new parser that fails if the result doesn't meet the requirement, even the parsing succeeded.
*/
where(c, e2 = new Error()) {
return new Parser((input, state) => {
const oldIndex = state.index;
const result = this.f(input, state);
if (result instanceof Error)
return result;
if (!c(result)) {
state.index = oldIndex;
return e2;
}
return result;
});
}
/*
Make a parser optional by allowing it to fail.
The same as `optional` in Haskell.
*/
optional() {
return this.or(pure(Nothing));
}
/*
Repeat the parser several times.
@param n - The number of repetition.
@return A new parser that gives a list of results based on how many times it repeated.
If any of it failed, the whole one failed.
*/
x(n) {
if (n < 1)
throw Error("Number of repetition must be greater than 1");
const ps = Array.from({ length: n }, _ => this);
return sequence(ps);
}
/*
Append the index range to the result. This is useful in creating source-map.
@return A new parser that gives a bi-tuple (result & range).
*/
withRange() {
return new Parser((input, state) => {
const start = state.index;
const result = this.f(input, state);
if (result instanceof Error)
return result;
const end = state.index;
return [result, [start, end]];
});
}
/*
When this parser failed, rewind the cursor back to where it started.
This function doesn't have a prefix version because `try` is a JavaScript keyword.
@return A new parser that will rewind the cursor back to where it started if the parsing failed.
*/
try() {
return new Parser((input, state) => {
const oldState = structuredClone(state);
const result = this.f(input, state);
if (result instanceof Error)
state.index = oldState.index;
return result;
});
}
/*
Override the error message of this parser.
@param e - The new error message.
@return A new parser that will throw this new error message.
*/
error(e) {
return new Parser((input, state) => {
const result = this.f(input, state);
if (result instanceof Error)
return e;
return result;
});
}
/*
Try to parse the given string.
@param input - The string to be parsed.
@return A tuple where the first one is the parsed result, if it succeeded. Or you'll get a `Fail` constant.
The second one is the state that could reveal where the parser stopped.
You should import `Fail` to check if it was successful.
*/
run(input) {
const buffer = Uint32Array.from(Array.from(input).map(c => c.codePointAt(0)));
const state = { index: 0 };
const result = this.f(buffer, state);
return [result, state];
}
/*
Try to parse the given string.
@param input - The string to be parsed.
@return The parsed result if it succeeded. Or you'll get a `Fail` constant.
*/
eval(input) {
return this.run(input)[0];
}
/*
Turn this parser into a promise.
This is useful when you don't want to deal with our `Fail` constant.
@param input - The string to be parsed.
@return The promise version of `run()`.
*/
runPromise(input) {
return new Promise((resolve, reject) => {
const [result, state] = this.run(input);
if (result instanceof Error)
reject(result);
else
resolve([result, state]);
});
}
/*
Turn this parser into a promise.
This is useful when you don't want to deal with our `Fail` constant.
@param input - The string to be parsed.
@return The promise version of `eval()`.
*/
evalPromise(input) {
return new Promise((resolve, reject) => {
const result = this.eval(input);
if (result instanceof Error)
reject(result);
else
resolve(result);
});
}
/*
Show a message when the parser succeeded (WIN) or failed (BAD).
Don't use is for dumping syntax tree or anything. Only use this for debugging purpose.
@param message - The message to be shown.
@return A new parser that will show the message when it succeeded or failed.
*/
trace(...message) {
return new Parser((input, state) => {
const result = this.f(input, state);
if (result instanceof Error)
console.error(`[crazy-parser] BAD`, result, ...message);
else
console.log(`[crazy-parser] WIN`, result, ...message);
return result;
});
}
}
/*
Lift a constant to a parser.
The same as `pure` or `return` in Haskell.
@param c - The constant
@return A parser that always gives that constant, without consuming any input string.
*/
export function pure(c) {
return new Parser(_ => c);
}
/*
The parser that always fails.
The same as `empty` in Haskell.
*/
export const empty = new Parser(() => new Error());
export function asum(...args) {
const ps = (args.length == 1 && args[0] instanceof Array) ? args[0] : args;
if (ps.length == 0)
return empty;
return ps.reduce((sum, p) => sum.or(p)).or(empty);
}
export function sequence(...args) {
const ps = args[0] instanceof Array ? args[0] : args;
return new Parser((input, state) => {
const results = [];
for (const p of ps) {
const result = p.f(input, state);
if (result instanceof Error)
return result;
results.push(result);
}
return results;
});
}
/*
Test if the parser has reached the end of file (string).
*/
export const eof = new Parser((input, state) => {
if (state.index < input.length)
return new Error();
});
/*
Consume one character from the input. Fail if it has reached the end of string.
*/
export const one = new Parser((input, state) => {
if (state.index >= input.length)
return new Error();
const char = String.fromCodePoint(input[state.index]);
state.index += 1;
return char;
});
/*
Parse a specific characters.
@params c - The character to be parsed.
*/
export function char(c) {
return new Parser((input, state) => {
if (state.index >= input.length)
return new Error();
const char = String.fromCodePoint(input[state.index]);
if (char != c)
return new Error();
state.index += 1;
return char;
});
}
// Parse a digit (0~9).
export const digit = one.where(c => "0" <= c && c <= "9");
// Parse an uppercase Latin letter without diacritics.
export const upper = one.where(c => "A" <= c && c <= "Z");
// Parse a lowercase Latin letter without diacritics.
export const lower = one.where(c => "a" <= c && c <= "z");
// Parse a case-insensitive hexadecimal digit.
export const hex = digit.or(one.where(c => "A" <= c && c <= "F" || "a" <= c && c <= "f"));
// Parse a Latin letter without diacritics.
export const alpha = upper.or(lower);
// Parse a space.
export const space = char(" ");
// Parse a tab.
export const tab = char("\t");
// Parse a carriage return.
export const cr = char("\r");
// Parser a linefeed.
export const lf = char("\n");
// Parse the index which the parser is looking at.
export const index = new Parser((_, state) => state.index);
/*
Parse a specific string.
@param s - The string to be parsed.
*/
export function str(s) {
const buffer = Uint32Array.from(Array.from(s).map(c => c.codePointAt(0)));
return new Parser((input, state) => {
if (state.index + buffer.length > input.length)
return new Error();
for (let i = 0; i < buffer.length; i += 1)
if (input[state.index + i] != buffer[i])
return new Error();
state.index += buffer.length;
return s;
});
}
export function span(f) {
return new Parser((input, state) => {
let oldIndex = state.index;
while (state.index < input.length && f(String.fromCodePoint(input[state.index])))
state.index += 1;
return Array.from(input.slice(oldIndex, state.index)).map(c => String.fromCodePoint(c)).join("");
});
}
export function anyChar(cs) {
return asum(cs.map(char));
}
export function anyStr(ss) {
return asum(ss.map(str));
}
/*
Since JavaScript is a strictly evaluated language,
this makes it impossible for constants to refer to itself.
So all parsers refer to itself must be wrapped in a function that takes no arguments
(in Haskell, a function takes no arguments is totally the same as a constant).
But referring to itself will cause infinite recursion,
so you must use our `lazy()` wrapper to only evaluate the parser when it's needed.
So here's the full workaround, assume there's a self-referring parser `srp`:
Before:
```ts
const srp = f(srp)
```
After:
```ts
const srp =
() => // 1st WRAP!
lazy(() => // 2nd WRAP!
f(srp()) // Call it!
)
```
*/
export function lazy(pg) {
return new Parser((input, state) => pg().f(input, state));
}
/*
A convenient way to intersperse parsers among string parsers.
Before:
```ts
sequence([str("123").right(a), str("456").right(b).left("789")])
```
After:
```ts
template`123${a}456${b}789`
```
*/
export function template(ss, ...ps) {
return sequence(ps.map((p, i) => str(ss[i]).right(p)))
.left(str(ss[ss.length - 1]));
}