UNPKG

@jahed/sparql-engine

Version:

SPARQL query engine for servers and web browsers.

507 lines (474 loc) 17.2 kB
// SPDX-License-Identifier: MIT import { isNull, isUndefined, partition, some, sortBy } from "lodash-es"; import type { BgpPattern, ConstructQuery, Pattern, Query, SparqlQuery, Triple, Variable, VariableExpression, } from "sparqljs"; import type { PipelineStage } from "../engine/pipeline/pipeline-engine.ts"; import { Pipeline } from "../engine/pipeline/pipeline.ts"; import type { CustomFunctions } from "../operators/expressions/sparql-expression.ts"; import ask from "../operators/modifiers/ask.ts"; import construct from "../operators/modifiers/construct.ts"; import select from "../operators/modifiers/select.ts"; import type { Consumable } from "../operators/update/consumer.ts"; import Optimizer from "../optimizer/optimizer.ts"; import { BindingBase, Bindings } from "../rdf/bindings.ts"; import Dataset from "../rdf/dataset.ts"; import type { EngineTriple } from "../types.ts"; import { deepApplyBindings, extendByBindings } from "../utils.ts"; import { RDF, isVariable } from "../utils/rdf.ts"; import type { BGPCache } from "./cache/types.ts"; import ExecutionContext from "./context/execution-context.ts"; import ContextSymbols from "./context/symbols.ts"; import AggregateStageBuilder from "./stages/aggregate-stage-builder.ts"; import BGPStageBuilder from "./stages/bgp-stage-builder.ts"; import BindStageBuilder from "./stages/bind-stage-builder.ts"; import DistinctStageBuilder from "./stages/distinct-stage-builder.ts"; import FilterStageBuilder from "./stages/filter-stage-builder.ts"; import GlushkovStageBuilder from "./stages/glushkov-executor/glushkov-stage-builder.ts"; import GraphStageBuilder from "./stages/graph-stage-builder.ts"; import MinusStageBuilder from "./stages/minus-stage-builder.ts"; import OptionalStageBuilder from "./stages/optional-stage-builder.ts"; import OrderByStageBuilder from "./stages/orderby-stage-builder.ts"; import { extractPropertyPaths } from "./stages/rewritings.ts"; import ServiceStageBuilder from "./stages/service-stage-builder.ts"; import StageBuilder from "./stages/stage-builder.ts"; import UnionStageBuilder from "./stages/union-stage-builder.ts"; import UpdateStageBuilder from "./stages/update-stage-builder.ts"; export type QueryOutput = Bindings | EngineTriple | boolean; export type SparqlOperation = number; export const SPARQL_OPERATION = { AGGREGATE: 0, BGP: 1, BIND: 2, DISTINCT: 3, FILTER: 4, GRAPH: 5, MINUS: 6, OPTIONAL: 7, ORDER_BY: 8, PROPERTY_PATH: 9, SERVICE: 10, UPDATE: 11, UNION: 12, }; export class PlanBuilder { private _optimizer: Optimizer; private _stageBuilders: Map<SparqlOperation, StageBuilder>; public _currentCache: BGPCache | null; // Public for tests. private _dataset: Dataset; private _customFunctions?: CustomFunctions; constructor(dataset: Dataset, customFunctions?: CustomFunctions) { this._dataset = dataset; this._customFunctions = customFunctions; this._optimizer = Optimizer.getDefault(); this._currentCache = null; this._stageBuilders = new Map(); this.use( SPARQL_OPERATION.AGGREGATE, new AggregateStageBuilder(this._dataset) ); this.use(SPARQL_OPERATION.BGP, new BGPStageBuilder(this._dataset)); this.use(SPARQL_OPERATION.BIND, new BindStageBuilder(this._dataset)); this.use( SPARQL_OPERATION.DISTINCT, new DistinctStageBuilder(this._dataset) ); this.use(SPARQL_OPERATION.FILTER, new FilterStageBuilder(this._dataset)); this.use(SPARQL_OPERATION.GRAPH, new GraphStageBuilder(this._dataset)); this.use(SPARQL_OPERATION.MINUS, new MinusStageBuilder(this._dataset)); this.use(SPARQL_OPERATION.SERVICE, new ServiceStageBuilder(this._dataset)); this.use( SPARQL_OPERATION.OPTIONAL, new OptionalStageBuilder(this._dataset) ); this.use(SPARQL_OPERATION.ORDER_BY, new OrderByStageBuilder(this._dataset)); this.use( SPARQL_OPERATION.PROPERTY_PATH, new GlushkovStageBuilder(this._dataset) ); this.use(SPARQL_OPERATION.UNION, new UnionStageBuilder(this._dataset)); this.use(SPARQL_OPERATION.UPDATE, new UpdateStageBuilder(this._dataset)); } set optimizer(opt: Optimizer) { this._optimizer = opt; } use(kind: SparqlOperation, stageBuilder: StageBuilder) { stageBuilder.builder = null; stageBuilder.builder = this; this._stageBuilders.set(kind, stageBuilder); } async useCache(cache: BGPCache): Promise<void> { this._currentCache = cache; } disableCache(): void { this._currentCache = null; } async build( query: SparqlQuery, context?: ExecutionContext ): Promise<PipelineStage<QueryOutput>> { if (isNull(context) || isUndefined(context)) { context = new ExecutionContext(); context.cache = this._currentCache; } query = this._optimizer.optimize(query); switch (query.type) { case "query": return this._buildQueryPlan(query, context); case "update": if (!this._stageBuilders.has(SPARQL_OPERATION.UPDATE)) { throw new Error( "A PlanBuilder cannot evaluate SPARQL UPDATE queries without a StageBuilder for it" ); } return this._stageBuilders .get(SPARQL_OPERATION.UPDATE)! .execute(query.updates, context); default: throw new SyntaxError("Unsupported SPARQL query type"); } } async _buildQueryPlan( query: Query, context: ExecutionContext, source?: PipelineStage<Bindings> ): Promise<PipelineStage<Bindings>> { const engine = Pipeline.getInstance(); if (isNull(source) || isUndefined(source)) { // build pipeline starting iterator source = engine.of(new BindingBase()); } context.setProperty(ContextSymbols.PREFIXES, query.prefixes); // rewrite a DESCRIBE query into a CONSTRUCT query if (query.queryType === "DESCRIBE") { const template: Triple[] = []; const where: any = [ { type: "bgp", triples: [], }, ]; query.variables.forEach((v: any) => { const triple = RDF.quad( v, RDF.variable(`pred__describe__${v}`), RDF.variable(`obj__describe__${v}`) ); template.push(triple); where[0].triples.push(triple); }); const construct: ConstructQuery = { prefixes: query.prefixes, from: query.from, queryType: "CONSTRUCT", template, type: "query", where: query.where?.concat(where), }; return this._buildQueryPlan(construct, context, source); } // from the begining, dectect any LIMIT/OFFSET modifiers, as they cimpact the caching strategy context.setProperty( ContextSymbols.HAS_LIMIT_OFFSET, "limit" in query || "offset" in query ); // Handles FROM clauses if (query.from) { context.defaultGraphs = query.from.default; context.namedGraphs = query.from.named; } // Handles WHERE clause let graphIterator: PipelineStage<Bindings>; if (query.where?.length) { graphIterator = await this._buildWhere(source, query.where, context); } else { graphIterator = engine.of(new BindingBase()); } let aggregates: VariableExpression[] = []; // Parse query variable to separate projection & aggregate variables if ("variables" in query) { const next: Variable[] = []; for (const v of query.variables as Variable[]) { if ("variable" in v) { aggregates.push(v); next.push(v.variable); } else { next.push(v); } } query.variables = next; } // Handles SPARQL aggregations if ("group" in query || aggregates.length > 0) { // Handles GROUP BY graphIterator = await this._stageBuilders .get(SPARQL_OPERATION.AGGREGATE)! .execute(graphIterator, query, context, this._customFunctions); } if (aggregates.length > 0) { // Handles SPARQL aggregation functions for (const agg of aggregates) { graphIterator = await this._stageBuilders .get(SPARQL_OPERATION.BIND)! .execute(graphIterator, agg, this._customFunctions, context); } } // Handles ORDER BY if ("order" in query) { if (!this._stageBuilders.has(SPARQL_OPERATION.ORDER_BY)) { throw new Error( "A PlanBuilder cannot evaluate SPARQL ORDER BY clauses without a StageBuilder for it" ); } graphIterator = await this._stageBuilders .get(SPARQL_OPERATION.ORDER_BY)! .execute(graphIterator, query.order!); } switch (query.queryType) { case "SELECT": { graphIterator = select(graphIterator, query); break; } case "CONSTRUCT": { graphIterator = construct( graphIterator, query ) as unknown as PipelineStage<Bindings>; break; } case "ASK": { graphIterator = ask( graphIterator ) as unknown as PipelineStage<Bindings>; break; } default: { throw new Error("Unsupported SPARQL query type."); } } // Create iterators for modifiers if ("distinct" in query && query.distinct) { if (!this._stageBuilders.has(SPARQL_OPERATION.DISTINCT)) { throw new Error( "A PlanBuilder cannot evaluate a DISTINCT clause without a StageBuilder for it" ); } graphIterator = await this._stageBuilders .get(SPARQL_OPERATION.DISTINCT)! .execute(graphIterator, context); } // Add offsets and limits if requested if ("offset" in query) { graphIterator = engine.skip(graphIterator, query.offset!); } if ("limit" in query) { graphIterator = engine.limit(graphIterator, query.limit!); } // graphIterator.queryType = query.queryType return graphIterator; } async _buildWhere( source: PipelineStage<Bindings>, groups: Pattern[], context: ExecutionContext ): Promise<PipelineStage<Bindings>> { groups = sortBy(groups, (g) => { switch (g.type) { case "graph": if (isVariable(g.name)) { return 5; } return 0; case "bgp": return 0; case "values": return 3; case "filter": return 4; default: return 1; } }); // Handle VALUES clauses using query rewriting if (some(groups, (g) => g.type === "values")) { return this._buildValues(source, groups, context); } // merge BGPs on the same level let newGroups = []; let prec = null; for (let i = 0; i < groups.length; i++) { let group = groups[i]; if (group.type === "bgp" && prec !== null && prec.type === "bgp") { let lastGroup = newGroups[newGroups.length - 1] as BgpPattern; lastGroup.triples = lastGroup.triples.concat(group.triples); } else { newGroups.push(group); } prec = groups[i]; } groups = newGroups; for (const group of groups) { source = await this._buildGroup(source, group, context); } return source; } /** * Build a physical plan for a SPARQL group clause * @param source - Input {@link PipelineStage} * @param group - SPARQL Group * @param options - Execution options * @return A {@link PipelineStage} used to evaluate the SPARQL Group */ async _buildGroup( source: PipelineStage<Bindings>, group: Pattern, context: ExecutionContext ): Promise<PipelineStage<Bindings>> { const engine = Pipeline.getInstance(); // Reset flags on the options for child iterators let childContext = context.clone(); switch (group.type) { case "bgp": if (!this._stageBuilders.has(SPARQL_OPERATION.BGP)) { throw new Error( "A PlanBuilder cannot evaluate a Basic Graph Pattern without a Stage Builder for it" ); } // find possible Property paths let [classicTriples, pathTriples] = extractPropertyPaths(group); if (pathTriples.length > 0) { if (!this._stageBuilders.has(SPARQL_OPERATION.PROPERTY_PATH)) { throw new Error( "A PlanBuilder cannot evaluate property paths without a Stage Builder for it" ); } source = await this._stageBuilders .get(SPARQL_OPERATION.PROPERTY_PATH)! .execute(source, pathTriples, context); } // delegate remaining BGP evaluation to the dedicated executor return await this._stageBuilders .get(SPARQL_OPERATION.BGP)! .execute(source, classicTriples, childContext); case "query": return this._buildQueryPlan(group, childContext, source); case "graph": if (!this._stageBuilders.has(SPARQL_OPERATION.GRAPH)) { throw new Error( "A PlanBuilder cannot evaluate a GRAPH clause without a Stage Builder for it" ); } // delegate GRAPH evaluation to an executor return this._stageBuilders .get(SPARQL_OPERATION.GRAPH)! .execute(source, group, childContext); case "service": if (!this._stageBuilders.has(SPARQL_OPERATION.SERVICE)) { throw new Error( "A PlanBuilder cannot evaluate a SERVICE clause without a Stage Builder for it" ); } return this._stageBuilders .get(SPARQL_OPERATION.SERVICE)! .execute(source, group, childContext); case "group": return this._buildWhere(source, group.patterns, childContext); case "optional": if (!this._stageBuilders.has(SPARQL_OPERATION.OPTIONAL)) { throw new Error( "A PlanBuilder cannot evaluate an OPTIONAL clause without a Stage Builder for it" ); } return this._stageBuilders .get(SPARQL_OPERATION.OPTIONAL)! .execute(source, group, childContext); case "union": if (!this._stageBuilders.has(SPARQL_OPERATION.UNION)) { throw new Error( "A PlanBuilder cannot evaluate an UNION clause without a Stage Builder for it" ); } return this._stageBuilders .get(SPARQL_OPERATION.UNION)! .execute(source, group, childContext); case "minus": if (!this._stageBuilders.has(SPARQL_OPERATION.MINUS)) { throw new Error( "A PlanBuilder cannot evaluate a MINUS clause without a Stage Builder for it" ); } return this._stageBuilders .get(SPARQL_OPERATION.MINUS)! .execute(source, group, childContext); case "filter": if (!this._stageBuilders.has(SPARQL_OPERATION.FILTER)) { throw new Error( "A PlanBuilder cannot evaluate a FILTER clause without a Stage Builder for it" ); } return this._stageBuilders .get(SPARQL_OPERATION.FILTER)! .execute(source, group, this._customFunctions, childContext); case "bind": if (!this._stageBuilders.has(SPARQL_OPERATION.BIND)) { throw new Error( "A PlanBuilder cannot evaluate a BIND clause without a Stage Builder for it" ); } return this._stageBuilders .get(SPARQL_OPERATION.BIND)! .execute(source, group, this._customFunctions, childContext); default: throw new Error( `Unsupported SPARQL group pattern found in query: ${group.type}` ); } } /** * Build a {@link PipelineStage} which evaluates a SPARQL query with VALUES clause(s). * It rely on a query rewritiing approach: * ?s ?p ?o . VALUES ?s { :1 :2 } becomes {:1 ?p ?o BIND(:1 AS ?s)} UNION {:2 ?p ?o BIND(:2 AS ?s)} * @param source - Input {@link PipelineStage} * @param groups - Query body, i.e., WHERE clause * @param options - Execution options * @return A {@link PipelineStage} which evaluates a SPARQL query with VALUES clause(s) */ async _buildValues( source: PipelineStage<Bindings>, groups: Pattern[], context: ExecutionContext ): Promise<PipelineStage<Bindings>> { let [values, others] = partition(groups, (g) => g.type === "values"); const bindingsLists = values.map((g) => g.values); // for each VALUES clause const iterators: PipelineStage<Bindings>[] = []; for (const bList of bindingsLists) { // for each value to bind in the VALUES clause const unionBranches = []; for (const b of bList) { const bindings = BindingBase.fromValuePatternRow(b); // BIND each group with the set of bindings and then evaluates it const temp = others.map((g) => deepApplyBindings(g, bindings)); unionBranches.push( extendByBindings( await this._buildWhere(source, temp, context), bindings ) ); } iterators.push(Pipeline.getInstance().merge(...unionBranches)); } // Users may use more than one VALUES clause if (iterators.length > 1) { return Pipeline.getInstance().merge(...iterators); } return iterators[0]; } }