UNPKG

@sanity/json-match

Version:

A lightweight and lazy implementation of JSONMatch made for JavaScript

419 lines (360 loc) 10.2 kB
import {tokenize, type Token} from './tokenize' import {createCursor, type Cursor} from './cursor' /** * The root type for all JSONMatch expression nodes. * * @public */ export type ExprNode = NumberNode | StringNode | BooleanNode | NullNode | PathNode /** * Represents a path expression in the JSONMatch AST. * This is the most common type of expression, representing navigation through an object or array. * * @public */ export type PathNode = { type: 'Path' base?: PathNode // the preceding context (what we apply the current segment to) recursive?: boolean // true for '..' recursive descent, false/undefined for normal '.' descent segment: SegmentNode // current operation to apply } /** * Represents different types of path segments in the JSONMatch AST. * * @public */ export type SegmentNode = ThisNode | IdentifierNode | WildcardNode | SubscriptNode /** * Represents a subscript operation (bracket notation) in the JSONMatch AST. * Can contain multiple elements that are combined with union (OR) semantics. * * @public */ export type SubscriptNode = { type: 'Subscript' elements: SubscriptElementNode[] } /** * Represents elements that can appear inside subscript brackets. * * @public */ export type SubscriptElementNode = SliceNode | ComparisonNode | ExistenceNode | ExprNode /** * Represents a comparison operation for filtering array/object elements. * * @public */ export type ComparisonNode = { type: 'Comparison' left: ExprNode operator: '==' | '!=' | '>' | '<' | '>=' | '<=' right: ExprNode } /** * Represents an existence check (?) for filtering elements that have a specific property. * * @public */ export type ExistenceNode = { type: 'Existence' base: PathNode } /** * Represents an array slice operation. * * @public */ export type SliceNode = {type: 'Slice'; start?: number; end?: number} /** * Represents an identifier (property name) in the JSONMatch AST. * * @public */ export type IdentifierNode = {type: 'Identifier'; name: string} /** * Represents a string literal in the JSONMatch AST. * * @public */ export type StringNode = {type: 'String'; value: string} /** * Represents a numeric literal in the JSONMatch AST or an index depending on * execution the context. * * @public */ export type NumberNode = {type: 'Number'; value: number} /** * Represents a boolean literal in the JSONMatch AST. * * @public */ export type BooleanNode = {type: 'Boolean'; value: boolean} /** * Represents a null literal in the JSONMatch AST. * * @public */ export type NullNode = {type: 'Null'} /** * Represents a wildcard (*) operation in the JSONMatch AST. * * @public */ export type WildcardNode = {type: 'Wildcard'} /** * Represents the current context (`@`/`$`) in the JSONMatch AST. * * @public */ export type ThisNode = {type: 'This'} class UnexpectedTokenError extends SyntaxError { constructor(token: Token, expected?: string) { super( expected ? `Expected ${expected} at position ${token.position} but got ${token.type} instead` : `Unexpected token ${token.type} at position ${token.position}`, ) } } interface TokenCursor extends Cursor<Token, Token['type']> { consume<TTokenType extends Token['type']>( tokenType?: TTokenType, ): Extract<Token, {type: TTokenType}> } /** * Parses a JSONMatch expression string into an Abstract Syntax Tree (AST). * * This is the main parsing function that converts a JSONMatch string expression * into a structured AST that can be evaluated by the `jsonMatch` function or * manipulated using the path utilities. * * @param query - The JSONMatch expression string to parse * @returns The parsed AST representation of the expression * * @example * Basic parsing: * ```typescript * import { parse } from 'jsonmatch' * * const ast = parse('users[age > 21].name') * console.log(ast.type) // 'Path' * ``` * * @example * Parsing with complex expressions: * ```typescript * const complexAst = parse('data..items[price > 100, category == "electronics"].name') * // Returns a nested PathNode structure * ``` * * @public */ export function parse(query: string): ExprNode { const tokens = tokenize(query) if (tokens.length <= 1) throw new SyntaxError('Empty expression') const eof = tokens.at(-1)! // last token will always be EOF but we'll check anyway for the type assertion if (eof.type !== 'EOF') { throw new UnexpectedTokenError(eof) } const cursor = createCursor({ values: tokens, fallback: eof, validator: (expectedTokenType: Token['type'], token) => { if (token.type !== expectedTokenType) { throw new UnexpectedTokenError(token, expectedTokenType) } }, }) as TokenCursor const ast = parseExpression(cursor) cursor.consume('EOF') return ast } function parseExpression(cursor: TokenCursor): ExprNode { switch (cursor().type) { // Path openers case 'This': case 'Identifier': case '*': case '[': case '.': case '..': { return parsePath(cursor) } case 'String': { const {value} = cursor.consume('String') return {type: 'String', value} } case 'Number': { const {value} = cursor.consume('Number') return {type: 'Number', value} } case 'Boolean': { const {value} = cursor.consume('Boolean') return {type: 'Boolean', value} } case 'Null': { cursor.consume('Null') return {type: 'Null'} } default: { throw new UnexpectedTokenError(cursor()) } } } function parsePath(cursor: TokenCursor): PathNode { // build up the path node in this variable the AST includes a `base` to allow // for chaining. this was done to prioritize evaluation of the AST so that the // base can be evaluated first let result: PathNode // handle implicit this if (cursor().type === '.' || cursor().type === '..') { const recursive = cursor().type === '..' cursor.consume() // Check if there's a segment following the dot(s) if (cursor().type === 'EOF' || cursor().type === ']' || cursor().type === ',') { // Only bare .. is valid - treat as wildcard. Bare . should be invalid if (recursive) { result = { type: 'Path', base: { type: 'Path', segment: {type: 'This'}, }, recursive, segment: {type: 'Wildcard'}, } } else { // Bare . is invalid throw new UnexpectedTokenError(cursor(), 'Path Segment') } } else { // For implicit root, we need to parse the next segment and combine it const segment = parsePathSegment(cursor) result = { type: 'Path', base: { type: 'Path', segment: {type: 'This'}, }, recursive, segment, } } } else { // parse the initial segment const segment = parsePathSegment(cursor) result = {type: 'Path', segment} } // handle chaining: subscripts and dot notation can be mixed while (true) { // check for subscripts after identifiers, wildcards, or this if (cursor().type === '[') { const subscript = parseSubscript(cursor) result = { type: 'Path', base: result, recursive: false, segment: subscript, } continue } // check for dot notation continuation if (cursor().type === '.' || cursor().type === '..') { const recursive = cursor().type === '..' cursor.consume() const segment = parsePathSegment(cursor) result = { type: 'Path', base: result, recursive, segment, } continue } // no more chaining break } return result } function parsePathSegment(cursor: TokenCursor): SegmentNode { // PathSegment ::= This | Identifier | Wildcard | Subscript const next = cursor() if (next.type === 'This') { cursor.consume() return {type: 'This'} } if (next.type === 'Identifier') { cursor.consume() return {type: 'Identifier', name: next.value} } if (next.type === '*') { cursor.consume() return {type: 'Wildcard'} } if (next.type === '[') { return parseSubscript(cursor) } throw new UnexpectedTokenError(next, 'Path Segment') } function parseSubscript(cursor: TokenCursor): SubscriptNode { // Subscript ::= '[' SubscriptContent ']' const elements: SubscriptElementNode[] = [] cursor.consume('[') elements.push(parseSubscriptElement(cursor)) while (cursor().type === ',') { cursor.consume() elements.push(parseSubscriptElement(cursor)) } cursor.consume(']') return {type: 'Subscript', elements} } function parseSubscriptElement(cursor: TokenCursor): SubscriptElementNode { if (cursor().type === ':' || cursor().type === 'Number') { return parseIndexOrSlice(cursor) } const nestedExpression = parseExpression(cursor) if (cursor().type === 'Operator') { const {value: operator} = cursor.consume('Operator') return { type: 'Comparison', left: nestedExpression, operator, right: parseExpression(cursor), } } if (cursor().type === '?' && nestedExpression.type === 'Path') { cursor.consume() return { type: 'Existence', base: nestedExpression, } } return nestedExpression } function parseIndexOrSlice(cursor: TokenCursor): SliceNode | NumberNode | PathNode { if (cursor().type === 'Number') { const {value: start} = cursor.consume('Number') if (cursor().type === ':') { cursor.consume() if (cursor().type === 'Number') { const {value: end} = cursor.consume('Number') return {type: 'Slice', start, end} } return {type: 'Slice', start} } return {type: 'Number', value: start} } if (cursor().type === ':') { cursor.consume() if (cursor().type === 'Number') { const {value: end} = cursor.consume('Number') return {type: 'Slice', end} } // bare `:` is parsed as a wildcard return {type: 'Path', segment: {type: 'Wildcard'}} } throw new UnexpectedTokenError(cursor(), 'Number or Slice') }