UNPKG

parserator

Version:

An elegant parser combinators library for Typescript

1,530 lines (1,523 loc) 67 kB
"use strict"; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __hasOwnProp = Object.prototype.hasOwnProperty; var __esm = (fn, res) => function __init() { return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res; }; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); // src/error-formatter.ts var error_formatter_exports = {}; __export(error_formatter_exports, { ErrorFormatter: () => ErrorFormatter, formatError: () => formatError }); var ErrorFormatter, formatError; var init_error_formatter = __esm({ "src/error-formatter.ts"() { "use strict"; ErrorFormatter = class _ErrorFormatter { _format; options; constructor(format = "plain", options = {}) { this._format = format; this.options = { maxContextLines: 3, showHints: true, colorize: true, showContext: true, tabSize: 2, ...options }; } /** * Format a ParseErrorBundle into a string based on the configured format. * * @param bundle - The error bundle to format * @returns Formatted error message string */ format(bundle) { switch (this._format) { case "ansi": return this.formatAnsi(bundle); case "html": return this.formatHtml(bundle); case "json": return this.formatJson(bundle); default: return this.formatPlain(bundle); } } /** * Format error with ANSI color codes for terminal output. */ formatAnsi(bundle) { const primary = bundle.primary; const lines = bundle.source.split("\n"); const errorLine = lines[primary.span.line - 1] || ""; const parts = []; parts.push( `\x1B[31mError\x1B[0m at line ${primary.span.line}, column ${primary.span.column}:` ); if (this.options.showContext && this.options.maxContextLines > 0) { const contextLines = this.getContextLines( lines, primary.span.line - 1, this.options.maxContextLines ); parts.push(...contextLines.map((line) => ` ${line}`)); } else { parts.push(` ${errorLine}`); } const linePrefix = ` > ${primary.span.line.toString().padStart(0, " ")} | `; const adjustedColumn = primary.span.column + linePrefix.length - 2; const pointer = this.createPointer(adjustedColumn, primary.span.length); parts.push(` ${pointer}`); parts.push(this.formatErrorMessage(primary)); const hints = this.getHints(primary); if (this.options.showHints && hints.length > 0) { parts.push(""); for (const hint of hints) { parts.push(` \x1B[36mDid you mean: ${hint}?\x1B[0m`); } } if (this.options.showContext && primary.context && primary.context.length > 0) { parts.push(""); parts.push(` \x1B[90mContext: ${primary.context.join(" > ")}\x1B[0m`); } return parts.join("\n"); } /** * Format error as plain text without colors. */ formatPlain(bundle) { const primary = bundle.primary; const lines = bundle.source.split("\n"); const errorLine = lines[primary.span.line - 1] || ""; const parts = []; parts.push( `Error at line ${primary.span.line}, column ${primary.span.column}:` ); if (this.options.showContext && this.options.maxContextLines > 0) { const contextLines = this.getContextLines( lines, primary.span.line - 1, this.options.maxContextLines ); parts.push(...contextLines.map((line) => ` ${line}`)); } else { parts.push(` ${errorLine}`); } const linePrefix = ` > ${primary.span.line.toString()} | `; const adjustedColumn = primary.span.column + linePrefix.length - 2; const pointer = this.createPointer( adjustedColumn, primary.span.length, false ); parts.push(` ${pointer}`); parts.push(this.formatErrorMessage(primary, false)); const hints = this.getHints(primary); if (this.options.showHints && hints.length > 0) { parts.push(""); for (const hint of hints) { parts.push(` Did you mean: ${hint}?`); } } if (this.options.showContext && primary.context && primary.context.length > 0) { parts.push(""); parts.push(` Context: ${primary.context.join(" > ")}`); } return parts.join("\n"); } /** * Format error as HTML with styling. */ formatHtml(bundle) { const primary = bundle.primary; const lines = bundle.source.split("\n"); const errorLine = lines[primary.span.line - 1] || ""; const parts = []; parts.push('<div class="parse-error">'); parts.push( ` <div class="error-header">Error at line ${primary.span.line}, column ${primary.span.column}:</div>` ); parts.push(' <div class="error-context">'); if (this.options.showContext && this.options.maxContextLines > 0) { const contextLines = this.getContextLines( lines, primary.span.line - 1, this.options.maxContextLines ); for (const line of contextLines) { parts.push( ` <div class="context-line">${this.escapeHtml(line)}</div>` ); } } else { parts.push( ` <div class="error-line">${this.escapeHtml(errorLine)}</div>` ); } const pointer = this.createPointer( primary.span.column, primary.span.length, false ); parts.push(` <div class="error-pointer">${pointer}</div>`); parts.push(" </div>"); parts.push( ` <div class="error-message">${this.escapeHtml(this.formatErrorMessage(primary, false))}</div>` ); const hints = this.getHints(primary); if (this.options.showHints && hints.length > 0) { parts.push(' <div class="error-hints">'); for (const hint of hints) { parts.push( ` <div class="hint">Did you mean: <span class="suggestion">${this.escapeHtml(hint)}</span>?</div>` ); } parts.push(" </div>"); } if (this.options.showContext && primary.context && primary.context.length > 0) { parts.push( ` <div class="error-context-stack">Context: ${primary.context.map((c) => `<span class="context-item">${this.escapeHtml(c)}</span>`).join(" &gt; ")}</div>` ); } parts.push("</div>"); return parts.join("\n"); } /** * Format error as JSON for programmatic consumption. */ formatJson(bundle) { const primary = bundle.primary; const lines = bundle.source.split("\n"); const contextLines = this.options.showContext ? this.getContextLines( lines, primary.span.line - 1, this.options.maxContextLines ) : [lines[primary.span.line - 1] || ""]; return JSON.stringify( { error: { type: primary.tag, message: this.getPlainErrorMessage(primary), location: { line: primary.span.line, column: primary.span.column, offset: primary.span.offset, length: primary.span.length }, context: { lines: contextLines, stack: primary.context || [] }, hints: this.getHints(primary), source: bundle.source }, allErrors: bundle.errors.map((err) => ({ type: err.tag, location: { line: err.span.line, column: err.span.column, offset: err.span.offset, length: err.span.length }, context: err.context || [], ...err.tag === "Expected" && { items: err.items, found: err.found }, ...err.tag === "Unexpected" && { found: err.found }, ...err.tag === "Fatal" && { message: err.message } })) }, null, this.options.tabSize ); } /** * Format the error message based on error type. */ formatErrorMessage(error, useColors = true) { const red = useColors ? "\x1B[31m" : ""; const yellow = useColors ? "\x1B[33m" : ""; const reset = useColors ? "\x1B[0m" : ""; switch (error.tag) { case "Expected": const foundText = error.found ? `, found ${error.found}` : ""; return ` ${yellow}Expected:${reset} ${error.items.join(" or ")}${foundText}`; case "Unexpected": return ` ${red}Unexpected:${reset} ${error.found}`; case "Custom": return ` ${error.message}`; case "Fatal": return ` ${red}Fatal:${reset} ${error.message}`; } } /** * Get plain error message without formatting. */ getPlainErrorMessage(error) { switch (error.tag) { case "Expected": const foundText = error.found ? `, found ${error.found}` : ""; return `Expected: ${error.items.join(" or ")}${foundText}`; case "Unexpected": return `Unexpected: ${error.found}`; case "Custom": return error.message; case "Fatal": return `Fatal: ${error.message}`; } } /** * Create a pointer/caret pointing to the error location. */ createPointer(column, length = 1, useColors = true) { const spaces = " ".repeat(Math.max(0, column - 1)); const carets = "^".repeat(Math.max(1, length)); const red = useColors ? "\x1B[31m" : ""; const reset = useColors ? "\x1B[0m" : ""; return `${spaces}${red}${carets}${reset}`; } /** * Get context lines around the error location. */ getContextLines(allLines, errorLineIndex, maxLines) { const contextRadius = Math.floor(maxLines / 2); const startLine = Math.max(0, errorLineIndex - contextRadius); const endLine = Math.min( allLines.length - 1, errorLineIndex + contextRadius ); const contextLines = []; for (let i = startLine; i <= endLine; i++) { const lineNum = i + 1; const lineContent = allLines[i] || ""; const isErrorLine = i === errorLineIndex; const prefix = isErrorLine ? ">" : " "; const paddedLineNum = lineNum.toString().padStart(3, " "); contextLines.push(`${prefix} ${paddedLineNum} | ${lineContent}`); } return contextLines; } /** * Escape HTML entities. */ escapeHtml(text) { return text.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&#39;"); } /** * Create a new formatter with different options. */ withOptions(options) { return new _ErrorFormatter(this._format, { ...this.options, ...options }); } /** * Create a new formatter with a different format. */ withFormat(format) { return new _ErrorFormatter(format, this.options); } /** * Get hints from an error, handling the union type safely. */ getHints(error) { if (error.tag === "Unexpected" && error.hints) { return error.hints; } return []; } }; formatError = { plain: (bundle, options) => new ErrorFormatter("plain", options).format(bundle), ansi: (bundle, options) => new ErrorFormatter("ansi", options).format(bundle), html: (bundle, options) => new ErrorFormatter("html", options).format(bundle), json: (bundle, options) => new ErrorFormatter("json", options).format(bundle) }; } }); // src/index.ts var src_exports = {}; __export(src_exports, { Either: () => Either, ErrorFormatter: () => ErrorFormatter, Left: () => Left, ParseError: () => ParseError, ParseErrorBundle: () => ParseErrorBundle, Parser: () => Parser, ParserOutput: () => ParserOutput, Right: () => Right, SourcePosition_: () => SourcePosition_, Span: () => Span, State: () => State, alphabet: () => alphabet, anyChar: () => anyChar, anyKeywordWithHints: () => anyKeywordWithHints, atomic: () => atomic, between: () => between, char: () => char, commit: () => commit, count: () => count, cut: () => cut, digit: () => digit, eof: () => eof, formatError: () => formatError, generateHints: () => generateHints, keywordWithHints: () => keywordWithHints, levenshteinDistance: () => levenshteinDistance, lookahead: () => lookahead, many: () => many, many0: () => many0, many1: () => many1, manyN: () => manyN, manyNExact: () => manyNExact, narrow: () => narrow, narrowedString: () => narrowedString, notChar: () => notChar, notFollowedBy: () => notFollowedBy, optional: () => optional, or: () => or, parseUntilChar: () => parseUntilChar, parser: () => parser, peekAhead: () => peekAhead, peekLine: () => peekLine, peekRemaining: () => peekRemaining, peekState: () => peekState, peekUntil: () => peekUntil, position: () => position, regex: () => regex, sepBy: () => sepBy, sepBy1: () => sepBy1, sepEndBy: () => sepEndBy, sequence: () => sequence, skipMany0: () => skipMany0, skipMany1: () => skipMany1, skipManyN: () => skipManyN, skipSpaces: () => skipSpaces, skipUntil: () => skipUntil, string: () => string, stringWithHints: () => stringWithHints, takeUntil: () => takeUntil, takeUpto: () => takeUpto, then: () => then, thenDiscard: () => thenDiscard, zip: () => zip, zipLeft: () => zipLeft, zipRight: () => zipRight }); module.exports = __toCommonJS(src_exports); // src/either.ts var Left = class { constructor(left) { this.left = left; } _tag = "Left"; *[Symbol.iterator]() { return yield this; } }; var Right = class { constructor(right) { this.right = right; } _tag = "Right"; *[Symbol.iterator]() { return yield this; } }; var Either = { left(l) { return new Left(l); }, right(r) { return new Right(r); }, isLeft(either) { return either._tag === "Left"; }, isRight(either) { return either._tag === "Right"; }, match(onLeft, onRight) { return (either) => { if (Either.isLeft(either)) { return onLeft(either.left); } return onRight(either.right); }; }, gen(f) { const iterator = f(); let current = iterator.next(); while (!current.done) { const either = current.value; if (Either.isLeft(either)) { return either; } current = iterator.next(either.right); } return Either.right(current.value); } }; // src/errors.ts function Span(state, length = 0) { return { offset: state.pos.offset, length, line: state.pos.line, column: state.pos.column }; } var ParseError = { /** Creates an ExpectedParseError for when specific tokens were expected */ expected: (params) => ({ tag: "Expected", ...params }), /** Creates an UnexpectedParseError for when an unexpected token was found */ unexpected: (params) => ({ tag: "Unexpected", ...params }), /** Creates a CustomParseError with a custom message */ custom: (params) => ({ tag: "Custom", ...params }), /** Creates a FatalParseError that cannot be recovered from */ fatal: (params) => ({ tag: "Fatal", ...params }) }; var ParseErrorBundle = class { /** * Creates a new ParseErrorBundle. * @param errors - Array of parsing errors * @param source - The original source code being parsed * @returns {ParseErrorBundle} A new ParseErrorBundle instance containing the errors and source */ constructor(errors, source) { this.errors = errors; this.source = source; } /** * Gets the primary error (the one that occurred furthest in the input). * This is typically the most relevant error to show to the user. * @returns {ParseError} The error with the highest offset position */ get primary() { return this.errors.reduce( (furthest, current) => current.span.offset > furthest.span.offset ? current : furthest ); } /** * Gets all errors that occurred at the same position as the primary error. * Useful when multiple parse attempts failed at the same location. * @returns {ParseError[]} Array of errors at the furthest position */ get primaryErrors() { const maxOffset = this.primary.span.offset; return this.errors.filter((err) => err.span.offset === maxOffset); } /** * Converts the primary error to a simple string representation. * @returns {string} A human-readable error message */ toString() { const err = this.primary; switch (err.tag) { case "Expected": return `Expected ${err.items.join(" or ")}${err.found ? `, found ${err.found}` : ""}`; case "Unexpected": return `Unexpected ${err.found}`; case "Custom": return err.message; case "Fatal": return `Fatal: ${err.message}`; } } /** * Formats the error bundle using the specified formatter. * @param format - The output format ("plain", "ansi", "html", or "json") * @returns {string} Formatted error message with context and highlighting */ format(format = "plain") { const { ErrorFormatter: ErrorFormatter2 } = (init_error_formatter(), __toCommonJS(error_formatter_exports)); return new ErrorFormatter2(format).format(this); } }; // src/state.ts var ParserOutput = (state, result) => ({ state, result }); var SourcePosition_ = class { constructor(line, column, offset) { this.line = line; this.column = column; this.offset = offset; } }; var State = { /** * Creates a new parser state from an input string. * * @param input - The input string to parse * @returns A new parser state initialized at the start of the input */ fromInput(input) { return { remaining: input, source: input, pos: { line: 1, column: 1, offset: 0 } }; }, /** * Creates a new state by consuming n characters from the current state. * * @param state - The current parser state * @param n - Number of characters to consume * @returns A new state with n characters consumed and position updated * @throws Error if attempting to consume more characters than remaining */ consume(state, n) { if (n === 0) return state; if (n > state.remaining.length) { throw new Error("Cannot consume more characters than remaining"); } const consumed = state.remaining.slice(0, n); let { line, column, offset } = state.pos; for (const char2 of consumed) { if (char2 === "\n") { line++; column = 1; } else { column++; } offset++; } return { ...state, remaining: state.remaining.slice(n), pos: { line, column, offset } }; }, /** * Creates a new state by consuming a specific string from the current state. * * @param state - The current parser state * @param str - The string to consume * @returns A new state with the string consumed and position updated * @throws Error if the input doesn't start with the specified string */ consumeString(state, str) { if (!state.remaining.startsWith(str)) { throw new Error( `Cannot consume "${str}" - input "${state.remaining}" doesn't start with it` ); } return State.consume(state, str.length); }, /** * Creates a new state by moving to a specific offset position in the source. * Resets to the beginning and then consumes to the target position. * @param state - The current parser state * @param moveBy - Number of characters to move forward from current position * @returns A new state at the target position */ move(state, moveBy) { return State.consume( { ...state, remaining: state.source, pos: { line: 1, column: 1, offset: 0 } }, state.pos.offset + moveBy ); }, /** * Creates a new state by consuming characters while a predicate is true. * * @param state - The current parser state * @param predicate - Function that tests each character * @returns A new state with matching characters consumed */ consumeWhile(state, predicate) { let i = 0; while (i < state.remaining.length && predicate(state.remaining[i])) { i++; } return State.consume(state, i); }, /** * Gets the next n characters from the input without consuming them. * * @param state - The current parser state * @param n - Number of characters to peek (default: 1) * @returns The next n characters as a string */ peek(state, n = 1) { return state.remaining.slice(0, n); }, /** * Checks if the parser has reached the end of input. * * @param state - The current parser state * @returns True if at end of input, false otherwise */ isAtEnd(state) { return state.remaining.length === 0; }, /** * Creates a human-readable string representation of the current parser position. * @param state - The current parser state * @returns A formatted string showing line, column, and offset * @example * ```typescript * const posStr = State.printPosition(state); * // Returns: "line 5, column 12, offset 89" * ``` */ printPosition(state) { return `line ${state.pos.line}, column ${state.pos.column}, offset ${state.pos.offset}`; } }; // src/parser.ts var Parser = class _Parser { /** * Creates a new Parser instance. * * @param run - The parsing function that takes a parser state and returns a parse result */ constructor(run) { this.run = run; } // Monad/Applicative /** * Creates a successful parser output with the given value and state. * * This is a low-level helper used internally to construct successful parse results. * It doesn't consume any input and returns the value with the current state unchanged. * * @param value - The value to wrap in a successful result * @param state - The current parser state * @returns {ParserOutput<T>} A successful parser output containing the value * @template T The type of the successful value * @internal */ static succeed(value, state) { return ParserOutput(state, Either.right(value)); } /** * Creates a parser that always succeeds with the given value without consuming any input. * * This is the basic way to inject a value into the parser context. The parser will * succeed immediately with the provided value and won't advance the parser state. * * @param a - The value to lift into the parser context * @returns {Parser<A>} A parser that always succeeds with the given value * @template A The type of the value being lifted * * @example * ```ts * const always42 = Parser.lift(42) * always42.parse("any input") // succeeds with 42 * * // Useful for providing default values * const parseNumberOrDefault = number.or(Parser.lift(0)) * * // Can be used to inject values in parser chains * const parser = parser(function* () { * const name = yield* identifier * const separator = yield* Parser.lift(":") * const value = yield* number * return { name, separator, value } * }) * ``` */ static lift = (a) => new _Parser((state) => _Parser.succeed(a, state)); /** * Lifts a binary function into the parser context, applying it to the results of two parsers. * * This is the applicative functor's version of `map` for functions of two arguments. * It runs both parsers in sequence and applies the function to their results if both succeed. * * @param ma - The first parser * @param mb - The second parser * @param f - A function that takes the results of both parsers and produces a new value * @returns {Parser<C>} A parser that applies the function to the results of both input parsers * @template A The type of value produced by the first parser * @template B The type of value produced by the second parser * @template C The type of value produced by applying the function * * @example * ```ts * // Combine two parsed values with a function * const parsePoint = Parser.liftA2( * number, * number.trimLeft(comma), * (x, y) => ({ x, y }) * ) * parsePoint.parse("10, 20") // succeeds with { x: 10, y: 20 } * * // Build a data structure from multiple parsers * const parsePerson = Parser.liftA2( * identifier, * number.trimLeft(colon), * (name, age) => ({ name, age }) * ) * parsePerson.parse("John:30") // succeeds with { name: "John", age: 30 } * ``` */ static liftA2 = (ma, mb, f) => ma.zip(mb).map((args) => f(...args)); /** * Applies a parser that produces a function to a parser that produces a value. * * This is the applicative functor's application operator. It allows you to apply * functions within the parser context, enabling powerful composition patterns. * * @param ma - A parser that produces a value * @param mf - A parser that produces a function from that value type to another type * @returns {Parser<B>} A parser that applies the parsed function to the parsed value * @template A The type of the input value * @template B The type of the output value after function application * * @example * ```ts * // Parse a function name and apply it * const parseFn = choice([ * string("double").map(() => (x: number) => x * 2), * string("square").map(() => (x: number) => x * x) * ]) * const result = Parser.ap(number, parseFn.trimLeft(space)) * result.parse("5 double") // succeeds with 10 * result.parse("5 square") // succeeds with 25 * * // Chain multiple applications * const add = (x: number) => (y: number) => x + y * const parseAdd = Parser.lift(add) * const addParser = Parser.ap( * number, * Parser.ap(number.trimLeft(plus), parseAdd) * ) * addParser.parse("3 + 4") // succeeds with 7 * ``` */ static ap = (ma, mf) => mf.zip(ma).map(([f, a]) => f(a)); // Error handling /** * Creates a failed parser output with the given error information. * * This is a low-level helper for constructing parse errors. It creates a custom * error with the provided message and optional expected/found information. * * @param error - Error details including message and optional expected/found values * @param state - The parser state where the error occurred * @returns {ParserOutput<never>} A failed parser output containing the error * @internal */ static fail(error, state) { const span = Span({ pos: { offset: state.pos.offset, line: state.pos.line, column: state.pos.column } }); const parseErr = ParseError.custom({ span, message: error.message, context: state?.labelStack ?? [], hints: [] }); const bundle = new ParseErrorBundle( [parseErr], // state?.source ?? state.remaining state.source ); return ParserOutput(state, Either.left(bundle)); } /** * Creates a parser that always fails with a fatal error. * * Fatal errors are non-recoverable and prevent backtracking in choice combinators. * Use this when you've determined that the input is definitely malformed and trying * other alternatives would be meaningless. * * @param message - The error message to display * @returns {Parser<never>} A parser that always fails with a fatal error * * @example * ```ts * const number = regex(/-?[0-9]+/).map(Number); * const parsePositive = number.flatMap(n => * n > 0 ? Parser.lift(n) : Parser.fatal("Expected positive number") * ) * ``` */ static fatal = (message) => new _Parser( (state) => ParserOutput( state, Either.left( new ParseErrorBundle( [ ParseError.fatal({ span: Span(state), message, context: state?.labelStack ?? [] }) ], // state?.source ?? state.remaining state.source ) ) ) ); /** * Runs the parser on the given input string and returns the full parser output. * * This method provides access to both the parse result and the final parser state, * which includes information about the remaining unparsed input and position. * * @param input - The string to parse * @returns {ParserOutput<T>} A parser output containing both the result (success or error) and final state * * @example * ```ts * const parser = string("hello"); * const output = parser.parse("hello world"); * // output.result contains Either.right("hello") * // output.state contains remaining input " world" and position info * ``` */ parse(input) { const { result, state } = this.run(State.fromInput(input)); return ParserOutput(state, result); } /** * Runs the parser on the given input and returns either the parsed value or error bundle. * * This is a convenience method that unwraps the Either result, making it easier * to handle the common case where you just need the value or error without the * full parser state information. * * @param input - The string to parse * @returns {T | ParseErrorBundle} The successfully parsed value of type T, or a ParseErrorBundle on failure * * @example * ```ts * const parser = number(); * const result = parser.parseOrError("42"); * if (result instanceof ParseErrorBundle) { * console.error(result.format()); * } else { * console.log(result); // 42 * } * ``` */ parseOrError(input) { const { result } = this.run(State.fromInput(input)); if (Either.isRight(result)) { return result.right; } return result.left; } /** * Runs the parser on the given input and returns the parsed value or throws an error. * * This method is useful when you're confident the parse will succeed or want to * handle parse errors as exceptions. The thrown error is a ParseErrorBundle which * contains detailed information about what went wrong. * * @param input - The string to parse * @returns {T} The successfully parsed value of type T * @throws {ParseErrorBundle} Thrown when parsing fails * * @example * ```ts * const parser = number(); * try { * const value = parser.parseOrThrow("42"); * console.log(value); // 42 * } catch (error) { * if (error instanceof ParseErrorBundle) { * console.error(error.format()); * } * } * ``` */ parseOrThrow(input) { const { result } = this.parse(input); if (Either.isLeft(result)) { throw result.left; } return result.right; } /** * Transforms the result of this parser by applying a function to the parsed value. * * This is the functor map operation. If the parser succeeds, the function is applied * to the result. If the parser fails, the error is propagated unchanged. The input * is not consumed if the transformation fails. * * @param f - A function that transforms the parsed value * @returns {Parser<B>} A new parser that produces the transformed value * @template B The type of the transformed value * * @example * ```ts * // Parse a number and double it * const doubled = number().map(n => n * 2); * doubled.parse("21") // succeeds with 42 * * // Parse a string and get its length * const stringLength = quoted('"').map(s => s.length); * stringLength.parse('"hello"') // succeeds with 5 * * // Chain multiple transformations * const parser = identifier() * .map(s => s.toUpperCase()) * .map(s => ({ name: s })); * parser.parse("hello") // succeeds with { name: "HELLO" } * ``` */ map(f) { return new _Parser((state) => { const { result, state: newState } = this.run(state); if (Either.isLeft(result)) { return ParserOutput( state, result ); } return ParserOutput(newState, Either.right(f(result.right))); }); } /** * Chains this parser with another parser that depends on the result of this one. * * This is the monadic bind operation (also known as chain or andThen). It allows * you to create a parser whose behavior depends on the result of a previous parse. * This is essential for context-sensitive parsing where later parsing decisions * depend on earlier results. * * @param f - A function that takes the parsed value and returns a new parser * @returns {Parser<B>} A new parser that runs the second parser after the first succeeds * @template B The type of value produced by the resulting parser * * @example * ```ts * // Parse a number and then that many 'a' characters * const parser = number().flatMap(n => * string('a'.repeat(n)) * ); * parser.parse("3aaa") // succeeds with "aaa" * * // Parse a type annotation and return appropriate parser * const typeParser = identifier().flatMap(type => { * switch(type) { * case "int": return number(); * case "string": return quoted('"'); * default: return Parser.fail({ message: `Unknown type: ${type}` }); * } * }); * * // Validate parsed values * const positiveNumber = number().flatMap(n => * n > 0 * ? Parser.lift(n) * : Parser.fail({ message: "Expected positive number" }) * ); * ``` */ flatMap(f) { return new _Parser((state) => { const { result, state: newState } = this.run(state); if (Either.isLeft(result)) { return { state: newState, result }; } const nextParser = f(result.right); return nextParser.run(newState); }); } /** * Creates a parser that always succeeds with the given value without consuming input. * * This is an alias for `Parser.lift` that follows the monadic naming convention. * It's the "return" or "pure" operation for the Parser monad, injecting a plain * value into the parser context. * * @param a - The value to wrap in a successful parser * @returns {Parser<A>} A parser that always succeeds with the given value * @template A The type of the value being lifted * * @example * ```ts * // Always succeed with a constant value * const always42 = Parser.pure(42); * always42.parse("any input") // succeeds with 42 * * // Use in flatMap to wrap values * const parser = number().flatMap(n => * n > 0 ? Parser.pure(n) : Parser.fail({ message: "Must be positive" }) * ); * ``` */ static pure = (a) => new _Parser((state) => _Parser.succeed(a, state)); /** * Creates a new parser that lazily evaluates the given function. * This is useful for creating recursive parsers. * * @param fn - A function that returns a parser * @returns {Parser<T>} A new parser that evaluates the function when parsing * @template T The type of value produced by the parser * * @example * ```ts * // Create a recursive parser for nested parentheses * const parens: Parser<string> = Parser.lazy(() => * between( * char('('), * char(')'), * parens * ) * ) * ``` */ static lazy(fn) { return new _Parser((state) => { const parser2 = fn(); return parser2.run(state); }); } /** * Combines this parser with another parser, returning both results as a tuple. * * This is a fundamental sequencing operation that runs two parsers in order. * If either parser fails, the entire operation fails. The results are returned * as a tuple containing both parsed values. * * @param parserB - The second parser to run after this one * @returns {Parser<[T, B]>} A parser that produces a tuple of both results * @template B The type of value produced by the second parser * * @example * ```ts * // Parse a coordinate pair * const coordinate = number().zip(number().trimLeft(comma)); * coordinate.parse("10, 20") // succeeds with [10, 20] * * // Parse a key-value pair * const keyValue = identifier().zip(number().trimLeft(colon)); * keyValue.parse("age:30") // succeeds with ["age", 30] * * // Combine multiple parsers * const triple = number() * .zip(number().trimLeft(comma)) * .zip(number().trimLeft(comma)) * .map(([[a, b], c]) => [a, b, c]); * triple.parse("1, 2, 3") // succeeds with [1, 2, 3] * ``` */ zip(parserB) { return new _Parser((state) => { const { result: a, state: stateA } = this.run(state); if (Either.isLeft(a)) { return { result: a, state: stateA }; } const { result: b, state: stateB } = parserB.run(stateA); if (Either.isLeft(b)) { return { result: b, state: stateB }; } return _Parser.succeed([a.right, b.right], stateB); }); } /** * Sequences this parser with another, keeping only the second result. * * This is useful when you need to parse something but only care about what * comes after it. The first parser must succeed for the second to run, but * its result is discarded. * * @param parserB - The parser whose result will be kept * @returns {Parser<B>} A parser that produces only the second result * @template B The type of value produced by the second parser * * @example * ```ts * // Parse a value after a label * const labeledValue = string("value:").then(number()); * labeledValue.parse("value:42") // succeeds with 42 * * // Skip whitespace before parsing * const trimmedNumber = whitespace().then(number()); * trimmedNumber.parse(" 123") // succeeds with 123 * * // Parse the body after a keyword * const functionBody = keyword("function").then(identifier()).then(block()); * ``` */ then(parserB) { return this.zip(parserB).map(([_, b]) => b); } /** * Alias for `then` - sequences parsers and keeps the right result. * * This alias follows the naming convention from applicative functors where * "zipRight" means to combine two values but keep only the right one. * * @see {@link then} for details and examples */ zipRight = this.then; /** * Sequences this parser with another, keeping only the first result. * * This is useful when you need to parse something that must be present but * whose value you don't need. Common uses include parsing required delimiters * or terminators. * * @param parserB - The parser to run but whose result will be discarded * @returns {Parser<T>} A parser that produces only the first result * @template B The type of value produced by the second parser (discarded) * * @example * ```ts * // Parse a statement and discard the semicolon * const statement = expression().thenDiscard(char(';')); * statement.parse("x + 1;") // succeeds with the expression, semicolon discarded * * // Parse a quoted string and discard the closing quote * const quotedContent = char('"').then(stringUntil('"')).thenDiscard(char('"')); * * // Parse array elements and discard separators * const element = number().thenDiscard(optional(char(','))); * ``` */ thenDiscard(parserB) { return this.zip(parserB).map(([a, _]) => a); } /** * Alias for `thenDiscard` - sequences parsers and keeps the left result. * * This alias follows the naming convention from applicative functors where * "zipLeft" means to combine two values but keep only the left one. * * @see {@link thenDiscard} for details and examples */ zipLeft = this.thenDiscard; /** * Makes this parser usable in generator syntax for cleaner sequential parsing. * * This iterator implementation allows parsers to be used with `yield*` in * generator functions, enabling a more imperative style of parser composition * that can be easier to read for complex sequential parsing. * * @returns {Generator<Parser<T>, T, any>} A generator that yields this parser and returns its result * @internal */ *[Symbol.iterator]() { return yield this; } /** * Adds a tap point to observe the current state and result during parsing. * Useful for debugging parser behavior. * * @example * ```ts * const parser = parser(function* () { * const name = yield* identifier(); * yield* char(':'); * const value = yield* number(); * return { name, value }; * }); * parser.tap(({ state, result }) => { * console.log(`Parsed ${result} at position ${state.pos}`); * }); * ``` * * @param callback - Function called with current state and result * @returns {Parser<T>} The same parser with the tap point added */ tap(callback) { return new _Parser((state) => { const result = this.run(state); callback({ state, result }); return result; }); } static gen = (f) => new _Parser((state) => { const iterator = f(); let current = iterator.next(); let currentState = state; while (!current.done) { const { result, state: updatedState } = current.value.run(currentState); if (Either.isLeft(result)) { return { result, state: updatedState }; } currentState = updatedState; current = iterator.next(result.right); } return _Parser.succeed(current.value, currentState); }); trim(parser2) { return parser2.then(this).thenDiscard(parser2); } trimLeft(parser2) { return parser2.then(this); } trimRight(parser2) { return this.thenDiscard(parser2); } /** * Adds a label to this parser for better error messages * @param name - The label name to add to the context stack * @returns {Parser<T>} A new parser with the label added */ label(name) { return new _Parser((state) => { const newState = { ...state, context: { ...state, labelStack: [name, ...state.labelStack || []] } }; const result = this.run(newState); if (Either.isLeft(result.result)) { return ParserOutput( state, Either.left( new ParseErrorBundle( [ // Convert generic errors to labeled expectations { tag: "Expected", span: Span(state), items: [name], context: newState.labelStack || [] } ], state.source ) ) ); } return result; }); } /** * Helper for creating semantic expectations with both label and error message * @param description - The description for both the label and error message * @returns {Parser<T>} A new parser with both labeling and error message */ expect(description) { return new _Parser((state) => { const output = this.run(state); if (Either.isLeft(output.result)) { return _Parser.fail( { message: `Expected ${description}` }, output.state ); } return output; }); } /** * Helper for creating semantic expectations with both label and error message * @param errorBundle - The error bundle containing the errors to be displayed * @param state - The current parser state * @returns {ParserOutput<never>} A parser output with the error bundle and the current state * @internal */ static failRich(errorBundle, state) { const bundle = new ParseErrorBundle( errorBundle.errors, // state?.source ?? state.remaining state.source ); return ParserOutput(state, Either.left(bundle)); } /** * Commits to the current parsing path, preventing backtracking beyond this point. * * Once a parser is committed, if it fails later in the sequence, the error won't * backtrack to try other alternatives in a `choice` or `or` combinator. This leads * to more specific error messages instead of generic "expected one of" errors. * * @returns {Parser<T>} A new parser that sets the commit flag after successful parsing * * @example * ```ts * // Use commit after matching a keyword to ensure specific error messages * const ifStatement = parser(function* () { * yield* keyword("if") * yield* commit() // After seeing "if", we know it's an if statement * yield* char('(').expect("opening parenthesis after 'if'") * const condition = yield* expression * yield* char(')').expect("closing parenthesis") * const body = yield* block * return { type: "if", condition, body } * }) * * // In a choice, commit prevents backtracking * const statement = choice([ * ifStatement, * whileStatement, * assignment * ]) * * // Input: "if x > 5 {}" (missing parentheses) * // Without commit: "Expected if, while, or assignment" * // With commit: "Expected opening parenthesis after 'if'" * ``` * * @example * ```ts * // Commit can be chained with other methods * const jsonObject = char('{') * .commit() // Once we see '{', it must be an object * .then(whitespace) * .then(objectContent) * .expect("valid JSON object") * ``` * * @see {@link commit} - Standalone function version * @see {@link cut} - Alias with Prolog-style naming */ commit = () => new _Parser((state) => { const result = this.run(state); if (Either.isRight(result.result)) { return ParserOutput( { ...result.state, committed: true }, result.result ); } return result; }); /** * Creates an atomic parser that either fully succeeds or resets to the original state. * * This is useful for "all-or-nothing" parsing where you want to try a complex * parser but not consume any input if it fails. The parser acts as a transaction - * if any part fails, the entire parse is rolled back. * * @returns {Parser<T>} A new parser that resets state on failure * * @example * ```ts * // Without atomic - partial consumption on failure * const badParser = parser(function* () { * yield* string("foo") * yield* string("bar") // If this fails, "foo" is already consumed * }) * * // With atomic - no consumption on failure * const goodParser = parser(function* () { * yield* string("foo") * yield* string("bar") // If this fails, we reset to before "foo" * }).atomic() * ``` * * @example * ```ts * // Useful for trying complex alternatives * const value = or( * // Try to parse as a complex expression * expression.atomic(), * // If that fails completely, try as a simple literal * literal * ) * ``` * * @example * ```ts * // Lookahead parsing without consumption * const startsWithKeyword = or( * string("function").atomic(), * string("const").atomic(), * string("let").atomic() * ).map(() => true).or(Parser.succeed(false)) * ``` * * @see {@link atomic} - Standalone function version */ atomic() { return new _Parser((state) => { const result = this.run(state); if (Either.isLeft(result.result)) { return ParserOutput(state, result.result); } return result; }); } spanned() { return new _Parser((state) => { const startState = state; const result = this.run(state); if (result.result._tag === "Right") { const span = Span( startState, result.state.pos.offset - startState.pos.offset ); return ParserOutput( result.state, Either.right([result.result.right, span]) ); } return result; }); } }; var parser = Parser.gen; // src/combinators.ts var lookahead = (par) => new Parser((state) => { const { result } = par.run(state); if (Either.isRight(result)) { return Parser.succeed(result.right, state); } return Parser.succeed(void 0, state); }); function notFollowedBy(par) { return new Parser((state) => { const { result, state: newState } = par.run(state); if (Either.isRight(result)) { return Parser.fail( { message: "Expected not to follow", expected: [], found: state.remaining.at(0) }, newState ); } return Parser.succeed(true, state); }); } var string = (str) => new Parser((state) => { if (state.remaining.startsWith(str)) { return Parser.succeed(str, State.