UNPKG

@jahed/sparql-engine

Version:

SPARQL query engine for servers and web browsers.

473 lines (428 loc) 16.3 kB
// SPDX-License-Identifier: MIT import { identity, isUndefined, uniqBy } from "lodash-es"; /** * The input of a {@link PipelineStage}, either another {@link PipelineStage}, an array, an iterable or a promise. */ export type PipelineInput<T> = | PipelineStage<T> | StreamPipelineInput<T> | Iterable<T> | PromiseLike<T> | ArrayLike<T>; interface SubGroup<K, R> { key: K; value: R; } /** * An input of a {@link PipelineStage} which produces items in stream/async way. * Usefull for connecting a Node.JS stream into a pipeline of iterators. */ export interface StreamPipelineInput<T> { /** * Produces a new value and inject it into the pipeline * @param value - New value produced */ next(value: T): void; /** * Close the pipeline input */ complete(): void; /** * Report an error that occurs during execution * @param err - The error to report */ error(err?: any): void; } export type PipelineObserver<T> = { next: (value: T) => void; error: (err: any) => void; complete: () => void; }; export type PipelineSubscription = { [Symbol.dispose](): void; }; export function createSubscription() { return { [Symbol.dispose]() {}, }; } export type PipelineObserverOrNext<T> = | Partial<PipelineObserver<T>> | PipelineObserver<T>["next"] | undefined | null; export function createObserver<T>( observerOrNext?: PipelineObserverOrNext<T> ): Partial<PipelineObserver<T>> { return observerOrNext ? typeof observerOrNext === "object" ? observerOrNext : { next: observerOrNext } : {}; } /** * A step in a pipeline. Data can be consumed in a pull-based or push-bashed fashion. */ export interface PipelineStage<T> { /** * Subscribes to the items emitted by the stage, in a push-based fashion * @param onData - Function invoked on each item produced by the stage * @param onError - Function invoked in cas of an error * @param onEnd - Function invoked when the stage ends */ subscribe( observerOrNext?: PipelineObserverOrNext<T>, onError?: PipelineObserver<T>["error"], onComplete?: PipelineObserver<T>["complete"] ): PipelineSubscription; /** * Invoke a callback on each item produced by the stage * @param cb - Function invoked on each item produced by the stage */ forEach(cb: (value: T) => void): void; pipe<N>(fn: (source: PipelineStage<T>) => PipelineStage<N>): PipelineStage<N>; [Symbol.asyncIterator](): AsyncIterator<T>; } /** * Abstract representation used to apply transformations on a pipeline of iterators. * Concrete subclasses are used by the framework to build the query execution pipeline. * @abstract */ export abstract class PipelineEngine { /** * Creates a PipelineStage that emits no items * @return A PipelineStage that emits no items */ abstract empty<T>(): PipelineStage<T>; /** * Converts the arguments to a PipelineStage * @param values - Values to convert * @return A PipelineStage that emits the values */ abstract of<T>(...values: T[]): PipelineStage<T>; /** * Creates a PipelineStage from an Array, an array-like object, a Promise, an iterable object, or an Observable-like object. * @param value - Source object * @return A PipelineStage that emits the values contains in the object */ abstract from<T>(value: PipelineInput<T>): PipelineStage<T>; /** * Creates a PipelineStage from a something that emits values asynchronously, using a {@link StreamPipelineInput} to feed values/errors into the pipeline. * @param cb - Callback invoked with a {@link StreamPipelineInput} used to feed values inot the pipeline. * @return A PipelineStage that emits the values produces asynchronously */ abstract fromAsync<T>( cb: (input: StreamPipelineInput<T>) => void ): PipelineStage<T>; /** * Clone a PipelineStage * @param stage - PipelineStage to clone * @return Cloned PipelineStage */ abstract clone<T>(stage: PipelineStage<T>): PipelineStage<T>; /** * Handle errors raised in the pipeline as follows: * 1) Default: raise the error * 2) Use a handler function to returns a new PipelineStage in case of error * @param input - Source PipelineStage * @param handler - Function called in case of error to generate a new PipelineStage * @return Output PipelineStage */ abstract catch<T, O>( input: PipelineStage<T>, handler?: (err: Error) => PipelineStage<O> ): PipelineStage<T | O>; /** * Creates an output PipelineStage which concurrently emits all values from every given input PipelineStage. * @param inputs - Inputs PipelineStage * @return Output PipelineStage */ abstract merge<T>( ...inputs: Array<PipelineStage<T> | PipelineInput<T>> ): PipelineStage<T>; /** * Applies a given `mapper` function to each value emitted by the source PipelineStage, and emits the resulting values as a PipelineStage. * @param input - Source PipelineStage * @param mapper - The function to apply to each value emitted by the source PipelineStage * @return A PipelineStage that emits the values from the source PipelineStage transformed by the given `mapper` function. */ abstract map<F, T>( input: PipelineStage<F>, mapper: (value: F) => T ): PipelineStage<T>; /** * Projects each source value to a PipelineStage which is merged in the output PipelineStage. * @param input - Input PipelineStage * @param mapper - Transformation function * @return Output PipelineStage */ abstract mergeMap<F, T>( input: PipelineStage<F>, mapper: (value: F) => PipelineStage<T> ): PipelineStage<T>; abstract mergeMapAsync<F, T>( input: PipelineStage<F>, predicate: (value: F) => PipelineStage<T> | Promise<PipelineStage<T>> ): PipelineStage<T>; /** * Do something after the PipelineStage has produced all its results * @param input - Input PipelineStage * @param callback - Function invoked after the PipelineStage has produced all its results * @return Output PipelineStage */ abstract finalize<T>( input: PipelineStage<T>, callback: () => void ): PipelineStage<T>; /** * Maps each source value to an array of values which is merged in the output PipelineStage. * @param input - Input PipelineStage * @param mapper - Transformation function * @return Output PipelineStage */ flatMap<F, T>( input: PipelineStage<F>, mapper: (value: F) => T[] ): PipelineStage<T> { return this.mergeMap(input, (value: F) => this.of(...mapper(value))); } /** * Flatten the output of a pipeline stage that emits array of values into single values. * @param input - Input PipelineStage * @return Output PipelineStage */ flatten<T>(input: PipelineStage<T[]>): PipelineStage<T> { return this.flatMap(input, (v) => v); } /** * Filter items emitted by the source PipelineStage by only emitting those that satisfy a specified predicate. * @param input - Input PipelineStage * @param predicate - Predicate function * @return Output PipelineStage */ abstract filter<T>( input: PipelineStage<T>, predicate: (value: T) => boolean ): PipelineStage<T>; abstract filterAsync<T>( input: PipelineStage<T>, predicate: (value: T) => boolean | Promise<boolean> ): PipelineStage<T>; /** * Applies an accumulator function over the source PipelineStage, and returns the accumulated result when the source completes, given an optional initial value. * @param input - Input PipelineStage * @param reducer - Accumulator function * @return A PipelineStage that emits a single value that is the result of accumulating the values emitted by the source PipelineStage. */ abstract reduce<F, T>( input: PipelineStage<F>, reducer: (acc: T, value: F) => T, initial: T ): PipelineStage<T>; /** * Emits only the first `count` values emitted by the source PipelineStage. * @param input - Input PipelineStage * @param count - How many items to take * @return A PipelineStage that emits only the first count values emitted by the source PipelineStage, or all of the values from the source if the source emits fewer than count values. */ abstract limit<T>(input: PipelineStage<T>, count: number): PipelineStage<T>; /** * Returns a PipelineStage that skips the first count items emitted by the source PipelineStage. * @param input - Input PipelineStage * @param count - How many items to skip * @return A PipelineStage that skips values emitted by the source PipelineStage. */ abstract skip<T>(input: PipelineStage<T>, count: number): PipelineStage<T>; /** * Apply a callback on every item emitted by the source PipelineStage * @param input - Input PipelineStage * @param cb - Callback */ abstract forEach<T>(input: PipelineStage<T>, cb: (value: T) => void): void; /** * Emits given values if the source PipelineStage completes without emitting any next value, otherwise mirrors the source PipelineStage. * @param input - Input PipelineStage * @param defaultValue - The default values used if the source Observable is empty. * @return A PipelineStage that emits either the specified default values if the source PipelineStage emits no items, or the values emitted by the source PipelineStage. */ abstract defaultValues<T>( input: PipelineStage<T>, ...values: T[] ): PipelineStage<T>; /** * Buffers the source PipelineStage values until the size hits the maximum bufferSize given. * @param input - Input PipelineStage * @param count - The maximum size of the buffer emitted. * @return A PipelineStage of arrays of buffered values. */ abstract bufferCount<T>( input: PipelineStage<T>, count: number ): PipelineStage<T[]>; /** * Creates a PipelineStage which collect all items from the source PipelineStage into an array, and then emits this array. * @param input - Input PipelineStage * @return A PipelineStage which emits all values emitted by the source PipelineStage as an array */ abstract collect<T>(input: PipelineStage<T>): PipelineStage<T[]>; /** * Returns a PipelineStage that emits all items emitted by the source PipelineStage that are distinct by comparison from previous items. * @param input - Input PipelineStage * @param selector - Optional function to select which value you want to check as distinct. * @return A PipelineStage that emits items from the source PipelineStage with distinct values. */ distinct<T, K>( input: PipelineStage<T>, selector?: (value: T) => T | K ): PipelineStage<T> { if (isUndefined(selector)) { selector = identity; } return this.flatMap(this.collect(input), (values: T[]) => uniqBy(values, selector!) ); } /** * Emits only the first value (or the first value that meets some condition) emitted by the source PipelineStage. * @param input - Input PipelineStage * @return A PipelineStage of the first item that matches the condition. */ first<T>(input: PipelineStage<T>): PipelineStage<T> { return this.limit(input, 1); } /** * Returns a PipelineStage that emits the items you specify as arguments after it finishes emitting items emitted by the source PipelineStage. * @param input - Input PipelineStage * @param values - Values to append * @return A PipelineStage that emits the items emitted by the source PipelineStage and then emits the additional values. */ endWith<T>(input: PipelineStage<T>, values: T[]): PipelineStage<T> { return this.merge(input, this.from(values)); } /** * Perform a side effect for every emission on the source PipelineStage, but return a PipelineStage that is identical to the source. * @param input - Input PipelineStage * @param cb - Callback invoked on each item * @return A PipelineStage identical to the source, but runs the specified PipelineStage or callback(s) for each item. */ tap<T>(input: PipelineStage<T>, cb: (value: T) => void): PipelineStage<T> { return this.map(input, (value: T) => { cb(value); return value; }); } /** * Find the smallest value produced by a pipeline of iterators. * It takes a ranking function as input, which is invoked with (x, y) * and must returns True if x < y and False otherwise. * Warning: this function needs to materialize all values of the pipeline. * @param input - Input PipelineStage * @param comparator - (optional) Ranking function * @return A pipeline stage that emits the lowest value found */ min<T>( input: PipelineStage<T>, ranking?: (x: T, y: T) => boolean ): PipelineStage<T> { if (isUndefined(ranking)) { ranking = (x: T, y: T) => x < y; } return this.map(this.collect(input), (values: T[]) => { let minValue = values[0]; for (let i = 1; i < values.length - 1; i++) { if (ranking!(values[i], minValue)) { minValue = values[i]; } } return minValue; }); } /** * Find the smallest value produced by a pipeline of iterators. * It takes a ranking function as input, which is invoked with (x, y) * and must returns True if x > y and False otherwise. * Warning: this function needs to materialize all values of the pipeline. * @param input - Input PipelineStage * @param comparator - (optional) Ranking function * @return A pipeline stage that emits the highest value found */ max<T>( input: PipelineStage<T>, ranking?: (x: T, y: T) => boolean ): PipelineStage<T> { if (isUndefined(ranking)) { ranking = (x: T, y: T) => x > y; } return this.map(this.collect(input), (values: T[]) => { let maxValue = values[0]; for (let i = 1; i < values.length - 1; i++) { if (ranking!(values[i], maxValue)) { maxValue = values[i]; } } return maxValue; }); } /** * Groups the items produced by a pipeline according to a specified criterion, * and emits the resulting groups * @param input - Input PipelineStage * @param keySelector - A function that extracts the grouping key for each item * @param elementSelector - (optional) A function that transforms items before inserting them in a group */ groupBy<T, K, R>( input: PipelineStage<T>, keySelector: (value: T) => K, elementSelector?: (value: T) => R ): PipelineStage<[K, R[]]> { if (isUndefined(elementSelector)) { elementSelector = identity; } const groups: Map<K, R[]> = new Map(); let stage: PipelineStage<SubGroup<K, R>> = this.map(input, (value) => { return { key: keySelector(value), value: elementSelector!(value), }; }); return this.mergeMap(this.collect(stage), (subgroups: SubGroup<K, R>[]) => { // build groups subgroups.forEach((g) => { if (!groups.has(g.key)) { groups.set(g.key, [g.value]); } else { groups.set(g.key, groups.get(g.key)!.concat([g.value])); } }); // inject groups into the pipeline return this.fromAsync((input) => { groups.forEach((value, key) => input.next([key, value])); }); }); } /** * Peek values from the input pipeline stage, and use them to decide * between two candidate pipeline stages to continue the pipeline. * @param input - Input pipeline stage * @param count - How many items to peek from the input? * @param predicate - Predicate function invoked with the values * @param ifCase - Callback invoked if the predicate function evaluates to True * @param elseCase - Callback invoked if the predicate function evaluates to False * @return A pipeline stage */ peekIf<T, O>( input: PipelineStage<T>, count: number, predicate: (values: T[]) => boolean, ifCase: (values: T[]) => Promise<PipelineStage<O>>, elseCase: (values: T[]) => Promise<PipelineStage<O>> ): PipelineStage<O> { const peekable = this.limit(this.clone(input), count); return this.mergeMapAsync(this.collect(peekable), (values) => { if (predicate(values)) { return ifCase(values); } return elseCase(values); }); } }