UNPKG

parserator

Version:

An elegant parser combinators library for Typescript

1,037 lines (1,028 loc) 29.5 kB
"use strict"; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); // src/index.ts var src_exports = {}; __export(src_exports, { Either: () => Either, Left: () => Left, Parser: () => Parser, ParserError: () => ParserError, Right: () => Right, State: () => State, alphabet: () => alphabet, anyChar: () => anyChar, benchmark: () => benchmark, between: () => between, breakpoint: () => breakpoint, chain: () => chain, char: () => char, debug: () => debug, debugState: () => debugState, digit: () => digit, getErrorLine: () => getErrorLine, lookAhead: () => lookAhead, many0: () => many0, many1: () => many1, manyN: () => manyN, manyNExact: () => manyNExact, narrowedString: () => narrowedString, notFollowedBy: () => notFollowedBy, optional: () => optional, or: () => or, parseUntilChar: () => parseUntilChar, parser: () => parser, peekAhead: () => peekAhead, peekLine: () => peekLine, peekRemaining: () => peekRemaining, peekState: () => peekState, peekUntil: () => peekUntil, printArrow: () => printArrow, printErrorContext: () => printErrorContext, printErrorLine: () => printErrorLine, printPosition: () => printPosition, printPositionWithOffset: () => printPositionWithOffset, regex: () => regex, sepBy: () => sepBy, sequence: () => sequence, skipMany0: () => skipMany0, skipMany1: () => skipMany1, skipManyN: () => skipManyN, skipSpaces: () => skipSpaces, skipUntil: () => skipUntil, string: () => string, takeUntil: () => takeUntil, takeUpto: () => takeUpto, then: () => then, thenDiscard: () => thenDiscard, trace: () => trace, zip: () => zip, zipLeft: () => zipLeft, zipRight: () => zipRight }); module.exports = __toCommonJS(src_exports); // src/either.ts var Left = class { constructor(left) { this.left = left; } _tag = "Left"; *[Symbol.iterator]() { return yield this; } }; var Right = class { constructor(right) { this.right = right; } _tag = "Right"; *[Symbol.iterator]() { return yield this; } }; var Either = { left(l) { return new Left(l); }, right(r) { return new Right(r); }, isLeft(either) { return either._tag === "Left"; }, isRight(either) { return either._tag === "Right"; }, match(either, patterns) { if (Either.isLeft(either)) { return patterns.onLeft(either.left); } return patterns.onRight(either.right); }, gen(f) { const iterator = f(); let current = iterator.next(); while (!current.done) { const either = current.value; if (Either.isLeft(either)) { return either; } current = iterator.next(either.right); } return Either.right(current.value); } }; // src/state.ts var ParserError = class { constructor(message, expected, found) { this.message = message; this.expected = expected; this.found = found; } }; var State = { /** * Creates a new parser state from an input string. * * @param input - The input string to parse * @returns A new parser state initialized at the start of the input */ fromInput(input, context) { return { remaining: input, pos: { line: 1, column: 1, offset: 0 }, context }; }, /** * Creates a new state by consuming n characters from the current state. * * @param state - The current parser state * @param n - Number of characters to consume * @returns A new state with n characters consumed and position updated * @throws Error if attempting to consume more characters than remaining */ consume(state, n) { if (n === 0) return state; if (n > state.remaining.length) { throw new Error("Cannot consume more characters than remaining"); } const consumed = state.remaining.slice(0, n); let { line, column, offset } = state.pos; for (const char2 of consumed) { if (char2 === "\n") { line++; column = 1; } else { column++; } offset++; } return { remaining: state.remaining.slice(n), pos: { line, column, offset }, context: state.context }; }, /** * Creates a new state by consuming a specific string from the current state. * * @param state - The current parser state * @param str - The string to consume * @returns A new state with the string consumed and position updated * @throws Error if the input doesn't start with the specified string */ consumeString(state, str) { if (!state.remaining.startsWith(str)) { throw new Error( `Cannot consume "${str}" - input "${state.remaining}" doesn't start with it` ); } return State.consume(state, str.length); }, move(state, moveBy) { return State.consume( { ...state, remaining: state.context.source, pos: { line: 1, column: 1, offset: 0 } }, state.pos.offset + moveBy ); }, /** * Creates a new state by consuming characters while a predicate is true. * * @param state - The current parser state * @param predicate - Function that tests each character * @returns A new state with matching characters consumed */ consumeWhile(state, predicate) { let i = 0; while (i < state.remaining.length && predicate(state.remaining[i])) { i++; } return State.consume(state, i); }, /** * Gets the next n characters from the input without consuming them. * * @param state - The current parser state * @param n - Number of characters to peek (default: 1) * @returns The next n characters as a string */ peek(state, n = 1) { return state.remaining.slice(0, n); }, /** * Checks if the parser has reached the end of input. * * @param state - The current parser state * @returns True if at end of input, false otherwise */ isAtEnd(state) { return state.remaining.length === 0; }, printPosition(state) { return `line ${state.pos.line}, column ${state.pos.column}, offset ${state.pos.offset}`; } }; // src/debug.ts function debugState(label, state, result, options = {}) { const { inputPreviewLength = 20, separator = "=".repeat(40) } = options; console.log(` === ${label} ===`); console.log("Position:", State.printPosition(state)); console.log( "Input:", JSON.stringify( state.remaining.slice(0, inputPreviewLength) + (state.remaining.length > inputPreviewLength ? "..." : "") ) ); console.log( "Result:", Either.isRight(result.result) ? `Success: ${JSON.stringify(result.result.right.value)}` : `Error: ${result.result.left.message}` ); console.log(separator); } function debug(parser2, label) { return parser2.tap(({ state, result }) => debugState(label, state, result)); } function trace(label) { return new Parser((state) => { console.log(` [TRACE] ${label}`); console.log("Position:", State.printPosition(state)); console.log("Remaining:", JSON.stringify(state.remaining)); return Parser.succeed(void 0, state); }); } function breakpoint(parser2, label) { return parser2.tap(({ state, result }) => { debugState(label, state, result); debugger; }); } function benchmark(parser2, label) { return new Parser((state) => { const start = performance.now(); const result = parser2.run(state); const end = performance.now(); console.log(` [BENCHMARK] ${label}: ${(end - start).toFixed(2)}ms`); return result; }); } // src/errors.ts function printPosition(position) { return `line ${position.line}, column ${position.column}`; } function printArrow(position) { const lineNumberDigits = position.line.toString().length; return " ".repeat(lineNumberDigits + 3 + position.column - 1) + "^"; } function printErrorContext(state, message) { return "Parser Error:\n" + printErrorLine(state) + "\n" + printArrow(state.pos) + `${message ? ` ${message}` : ""}`; } function printErrorLine(state) { const lines = state.context.source.split("\n"); const lineNum = state.pos.line; const startLine = Math.max(0, lineNum - 1); const endLine = lineNum; const relevantLines = lines.slice(startLine, endLine + 1); const padding = lineNum.toString().length; return relevantLines.map((line, i) => { const num = startLine + i + 1; const paddedNum = num.toString().padStart(padding, " "); return `${paddedNum} | ${line}`; }).join("\n"); } function printPositionWithOffset(position) { return `line ${position.line}, column ${position.column}, offset ${position.offset}`; } function getErrorLine(error, state) { const errorLine = state.context.source.slice( state.pos.offset, state.context.source.indexOf("\n", state.pos.offset) ); return errorLine; } // src/parser.ts var Parser = class _Parser { constructor(run, options) { this.run = run; this.options = options; } name(name) { this.options = { ...this.options, name }; return this; } static succeed(value, state) { return { state, result: Either.right(value) }; } static fail(error, state) { const errorMessage = error.message.startsWith("Parser Error:") ? error.message : printErrorContext(state, error.message); return { state, result: Either.left( new ParserError(errorMessage, error.expected ?? [], error.found) ) }; } static error(message, expected = [], stateCallback) { return new _Parser((state) => { return _Parser.fail( { message, expected }, stateCallback ? stateCallback(state) : state ); }); } /** * Adds an error message to the parser * @param makeMessage - A function that returns an error message * @returns A new parser with the error message added */ withError(makeMessage) { return new _Parser((state) => { const output = this.run(state); if (Either.isLeft(output.result)) { return _Parser.fail( { message: makeMessage({ error: output.result.left, state: output.state }), expected: output.result.left.expected }, output.state ); } return output; }, this.options); } parse(input, context = { source: input }) { const { result, state } = this.run(State.fromInput(input, context)); if (Either.isLeft(result)) { return _Parser.fail(result.left, state); } return _Parser.succeed(result.right, state); } withTrace(label) { return new _Parser((state) => { if (!state.context?.debug) { return this.run(state); } return debug(this, label).run(state); }, this.options); } parseOrError(input, context = { source: input }) { const { result } = this.run(State.fromInput(input, context)); if (Either.isRight(result)) { return result.right; } return result.left; } parseOrThrow(input, context = { source: input }) { const { result } = this.parse( input, context ?? { source: input } ); if (Either.isLeft(result)) { throw new Error(result.left.message); } return result.right; } map(f) { return new _Parser((state) => { const { result, state: newState } = this.run(state); if (Either.isLeft(result)) { return _Parser.fail(result.left, state); } return _Parser.succeed(f(result.right), newState); }); } flatMap(f) { return new _Parser((state) => { const { result, state: newState } = this.run(state); if (Either.isLeft(result)) { return _Parser.fail(result.left, newState); } const nextParser = f(result.right); return nextParser.run(newState); }); } static pure = (a) => new _Parser((state) => _Parser.succeed(a, state)); static Do = _Parser.pure({}); /** * Creates a new parser that lazily evaluates the given function. * This is useful for creating recursive parsers. * * @param fn - A function that returns a parser * @returns A new parser that evaluates the function when parsing * @template T The type of value produced by the parser * * @example * ```ts * // Create a recursive parser for nested parentheses * const parens: Parser<string> = Parser.lazy(() => * between( * char('('), * char(')'), * parens * ) * ) * ``` */ static lazy(fn) { return new _Parser((state) => { const parser2 = fn(); return parser2.run(state); }); } zip(parserB) { return new _Parser((state) => { const { result: a, state: stateA } = this.run(state); if (Either.isLeft(a)) { return _Parser.fail(a.left, stateA); } const { result: b, state: stateB } = parserB.run(stateA); if (Either.isLeft(b)) { return _Parser.fail(b.left, stateB); } return _Parser.succeed([a.right, b.right], stateB); }); } then(parserB) { return this.zip(parserB).map(([_, b]) => b); } zipRight = this.then; thenDiscard(parserB) { return this.zip(parserB).map(([a, _]) => a); } zipLeft = this.thenDiscard; bind(k, other) { return new _Parser((state) => { const { result: resultA, state: stateA } = this.run(state); if (Either.isLeft(resultA)) { return _Parser.fail(resultA.left, stateA); } const nextParser = other instanceof _Parser ? other : other(resultA.right); const { result: resultB, state: stateB } = nextParser.run(stateA); if (Either.isLeft(resultB)) { return _Parser.fail(resultB.left, stateB); } return _Parser.succeed( { ...resultA.right, [k]: resultB.right }, stateB ); }, this.options); } *[Symbol.iterator]() { return yield this; } /** * Adds a tap point to observe the current state and result during parsing. * Useful for debugging parser behavior. * * @param callback - Function called with current state and result * @returns The same parser with the tap point added */ tap(callback) { return new _Parser((state) => { const result = this.run(state); callback({ state, result }); return result; }, this.options); } static gen(f) { return new _Parser((state) => { const iterator = f(); let current = iterator.next(); let currentState = state; while (!current.done) { const { result, state: updatedState } = current.value.run(currentState); if (Either.isLeft(result)) { return _Parser.fail(result.left, updatedState); } currentState = updatedState; current = iterator.next(result.right); } return _Parser.succeed(current.value, currentState); }); } trim(parser2) { return parser2.then(this).thenDiscard(parser2); } trimLeft(parser2) { return parser2.then(this); } trimRight(parser2) { return this.thenDiscard(parser2); } }; function parser(f) { return new Parser((state) => { const iterator = f(); let current = iterator.next(); let currentState = state; while (!current.done) { const { result, state: updatedState } = current.value.run(currentState); if (Either.isLeft(result)) { return Parser.fail(result.left, updatedState); } currentState = updatedState; current = iterator.next(result.right); } return Parser.succeed(current.value, currentState); }); } // src/chain.ts var chain = (parser2, ...fns) => { return new Parser((state) => { let result = parser2.run(state); for (const fn of fns) { const { result: parserResult, state: newState } = result; if (Either.isLeft(parserResult)) { return Parser.fail(parserResult.left, newState); } const value = parserResult.right; result = fn(value).run(newState); } return result; }); }; // src/combinators.ts function lookAhead(parser2) { return new Parser((state) => { const { result } = parser2.run(state); if (Either.isRight(result)) { return Parser.succeed(result.right, state); } return Parser.succeed(void 0, state); }); } function notFollowedBy(parser2) { return new Parser((state) => { const { result, state: newState } = parser2.run(state); if (Either.isRight(result)) { if (parser2.options?.name) { const message = `Found ${parser2.options.name} when it should not appear here`; return Parser.fail({ message, expected: [] }, newState); } return Parser.fail( { message: "Expected not to follow", expected: [], found: state.remaining.at(0) }, newState ); } return Parser.succeed(true, newState); }); } var string = (str) => new Parser( (state) => { if (state.remaining.startsWith(str)) { return Parser.succeed(str, State.consume(state, str.length)); } const message = `Expected '${str}', but found '${state.remaining.slice(0, str.length)}'`; return Parser.fail( { message, expected: [str], found: state.remaining.slice(0, str.length) }, state ); }, { name: str } ); function narrowedString(str) { return string(str); } var char = (ch) => { return new Parser( (state) => { if (ch.length !== 1) { return Parser.fail( { message: "Incorrect usage of char parser.", expected: [ch] }, state ); } if (state.remaining[0] === ch) { return Parser.succeed(ch, State.consume(state, 1)); } const message = `Expected ${ch} but found ${state.remaining.at(0)}.`; return Parser.fail( { message, expected: [ch], found: state.remaining.at(0) }, state ); }, { name: ch } ); }; var alphabet = new Parser( (state) => { if (State.isAtEnd(state)) { return Parser.fail( { message: "Unexpected end of input", expected: [] }, state ); } const first = state.remaining[0]; if (first && /^[a-zA-Z]$/.test(first)) { return Parser.succeed(first, State.consume(state, 1)); } const message = `Expected alphabetic character, but got '${first}'`; return Parser.fail( { message, expected: [], found: state.remaining[0] }, state ); }, { name: "alphabet" } ); var digit = new Parser( (state) => { if (State.isAtEnd(state)) { return Parser.fail( { message: "Unexpected end of input", expected: [] }, state ); } const first = state.remaining[0]; if (first && /^[0-9]$/.test(first)) { return Parser.succeed(first, State.consume(state, 1)); } const message = `Expected digit, but got '${first}'`; return Parser.fail( { message, expected: [], found: state.remaining[0] }, state ); }, { name: "digit" } ); function sepBy(sepParser, parser2) { return new Parser((state) => { const results = []; let currentState = state; const { result: firstResult, state: firstState } = parser2.run(currentState); if (Either.isLeft(firstResult)) { return Parser.fail(firstResult.left, firstState); } results.push(firstResult.right); currentState = firstState; while (true) { const { result: sepResult, state: sepState } = sepParser.run(currentState); if (Either.isLeft(sepResult)) { break; } currentState = sepState; const { result: itemResult, state: itemResultState } = parser2.run(currentState); if (Either.isLeft(itemResult)) { return Parser.fail(itemResult.left, itemResultState); } results.push(itemResult.right); currentState = itemResultState; } return Parser.succeed(results, currentState); }); } function between(start, end, parser2) { return new Parser((state) => { const startResult = start.run(state); if (Either.isLeft(startResult.result)) { return startResult; } const contentResult = parser2.run(startResult.state); if (Either.isLeft(contentResult.result)) { return contentResult; } const endResult = end.run(contentResult.state); if (Either.isLeft(endResult.result)) { return endResult; } return Parser.succeed(contentResult.result.right, endResult.state); }); } function anyChar() { return new Parser((state) => { if (State.isAtEnd(state)) { return Parser.fail( { message: "Unexpected end of input", expected: [] }, state ); } return Parser.succeed(state.remaining[0], State.consume(state, 1)); }); } function many_(count) { return (parser2, separator) => { return new Parser((state) => { const results = []; let currentState = state; while (true) { const itemResult = parser2.run(currentState); if (Either.isLeft(itemResult.result)) { if (results.length >= count) { return Parser.succeed(results, currentState); } const message2 = `Expected at least ${count} occurrences, but only found ${results.length}`; return Parser.fail({ message: message2, expected: [] }, itemResult.state); } const { result: value, state: newState } = itemResult; results.push(value.right); currentState = newState; if (separator) { const { result: sepResult, state: state2 } = separator.run(currentState); if (Either.isLeft(sepResult)) { break; } currentState = state2; } } if (results.length >= count) { return Parser.succeed(results, currentState); } const message = `Expected at least ${count} occurrences, but only found ${results.length}`; return Parser.fail({ message, expected: [] }, currentState); }); }; } var many0 = (parser2, separator) => many_(0)(parser2, separator); var many1 = (parser2, separator) => many_(1)(parser2, separator); var manyN = (parser2, n, separator) => many_(n)(parser2, separator); var manyNExact = (parser2, n, separator) => Parser.gen(function* () { const results = yield* manyN(parser2, n, separator); if (results.length !== n) { const message = `Expected exactly ${n} occurrences, but found ${results.length}`; return yield* Parser.error(message); } return results; }); function skipMany_(count) { return (parser2) => { return new Parser((state) => { let currentState = state; let successes = 0; while (true) { const { result, state: newState } = parser2.run(currentState); if (Either.isLeft(result)) { break; } successes++; currentState = newState; } if (successes >= count) { return Parser.succeed(void 0, currentState); } const message = `Expected at least ${count} occurrences, but only found ${successes}`; return Parser.fail({ message, expected: [] }, state); }); }; } var skipMany0 = (parser2) => skipMany_(0)(parser2); var skipMany1 = (parser2) => skipMany_(1)(parser2); var skipManyN = (parser2, n) => skipMany_(n)(parser2); function skipUntil(parser2) { return new Parser((state) => { let currentState = state; while (!State.isAtEnd(currentState)) { const { result, state: newState } = parser2.run(currentState); if (Either.isRight(result)) { return Parser.succeed(void 0, newState); } currentState = State.consume(currentState, 1); } return Parser.succeed(void 0, currentState); }); } function takeUntil(parser2) { return new Parser((state) => { let currentState = state; let collected = ""; while (!State.isAtEnd(currentState)) { const { result, state: newState } = parser2.run(currentState); if (Either.isRight(result)) { return Parser.succeed(collected, newState); } collected += currentState.remaining[0]; currentState = State.consume(currentState, 1); } return Parser.succeed(collected, currentState); }); } function parseUntilChar(char2) { return new Parser((state) => { if (char2.length !== 1) { return Parser.fail( { message: "Incorrect usage of parseUntilChar parser.", expected: [char2] }, state ); } let currentState = state; let collected = ""; while (!State.isAtEnd(currentState)) { if (currentState.remaining[0] === char2) { return Parser.succeed(collected, currentState); } collected += currentState.remaining[0]; currentState = State.consume(currentState, 1); } const message = `Expected character ${char2} but found ${collected}`; return Parser.fail({ message, expected: [char2] }, currentState); }); } var skipSpaces = new Parser( (state) => Parser.succeed( void 0, State.consumeWhile(state, (char2) => char2 === " ") ), { name: "skipSpaces" } ); function or(...parsers) { return new Parser((state) => { for (const parser2 of parsers) { const { result, state: newState } = parser2.run(state); if (Either.isRight(result)) { return Parser.succeed(result.right, newState); } } const message = `None of the ${parsers.length} choices could be satisfied`; return Parser.fail({ message }, state); }); } function optional(parser2) { return new Parser((state) => { const { result, state: newState } = parser2.run(state); if (Either.isLeft(result)) { return Parser.succeed(void 0, newState); } return Parser.succeed(result.right, newState); }); } function sequence(parsers) { return new Parser((state) => { const results = []; let currentState = state; for (const parser2 of parsers) { const { result, state: newState } = parser2.run(currentState); if (Either.isLeft(result)) { return Parser.fail(result.left, newState); } results.push(result.right); currentState = newState; } return Parser.succeed(results.at(-1), currentState); }); } var regex = (re) => { const nonGlobalRe = new RegExp(re.source, re.flags.replace("g", "")); return new Parser( (state) => { const match = nonGlobalRe.exec(state.remaining); if (match && match.index === 0) { const value = match[0]; return Parser.succeed(value, state); } const message = `Expected ${re} but found ${state.remaining.slice(0, 10)}...`; return Parser.fail( { message, expected: [re.toString()] }, state ); }, { name: re.toString() } ); }; function zip(parserA, parserB) { return parserA.zip(parserB); } function then(parserA, parserB) { return parserA.then(parserB); } var zipRight = then; function thenDiscard(parserA, parserB) { return parserA.thenDiscard(parserB); } var zipLeft = thenDiscard; function takeUpto(parser2) { return new Parser((state) => { let currentState = state; let collected = ""; while (!State.isAtEnd(currentState)) { const { result } = parser2.run(currentState); if (Either.isRight(result)) { return Parser.succeed(collected, currentState); } collected += currentState.remaining[0]; currentState = State.consume(currentState, 1); } return Parser.succeed(collected, currentState); }); } // src/utils.ts var peekState = new Parser((s) => { return Parser.succeed(s, s); }); var peekRemaining = new Parser((s) => { console.log(s.remaining); return Parser.succeed(s.remaining, s); }); var peekAhead = (n) => new Parser((s) => { return Parser.succeed(s.remaining.slice(0, n), s); }); var peekLine = new Parser((s) => { const restOfLine = s.remaining.slice(0, s.remaining.indexOf("\n")); console.log(restOfLine); return Parser.succeed(restOfLine, s); }); var peekUntil = (ch) => new Parser((s) => { const index = s.remaining.indexOf(ch); return Parser.succeed(s.remaining.slice(0, index), s); }); // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { Either, Left, Parser, ParserError, Right, State, alphabet, anyChar, benchmark, between, breakpoint, chain, char, debug, debugState, digit, getErrorLine, lookAhead, many0, many1, manyN, manyNExact, narrowedString, notFollowedBy, optional, or, parseUntilChar, parser, peekAhead, peekLine, peekRemaining, peekState, peekUntil, printArrow, printErrorContext, printErrorLine, printPosition, printPositionWithOffset, regex, sepBy, sequence, skipMany0, skipMany1, skipManyN, skipSpaces, skipUntil, string, takeUntil, takeUpto, then, thenDiscard, trace, zip, zipLeft, zipRight }); //# sourceMappingURL=index.cjs.map