UNPKG

@jahed/sparql-engine

Version:

SPARQL query engine for servers and web browsers.

387 lines 17.2 kB
// SPDX-License-Identifier: MIT import { isNull, isUndefined, partition, some, sortBy } from "lodash-es"; import { Pipeline } from "../engine/pipeline/pipeline.js"; import ask from "../operators/modifiers/ask.js"; import construct from "../operators/modifiers/construct.js"; import select from "../operators/modifiers/select.js"; import Optimizer from "../optimizer/optimizer.js"; import { BindingBase, Bindings } from "../rdf/bindings.js"; import Dataset from "../rdf/dataset.js"; import { deepApplyBindings, extendByBindings } from "../utils.js"; import { RDF, isVariable } from "../utils/rdf.js"; import ExecutionContext from "./context/execution-context.js"; import ContextSymbols from "./context/symbols.js"; import AggregateStageBuilder from "./stages/aggregate-stage-builder.js"; import BGPStageBuilder from "./stages/bgp-stage-builder.js"; import BindStageBuilder from "./stages/bind-stage-builder.js"; import DistinctStageBuilder from "./stages/distinct-stage-builder.js"; import FilterStageBuilder from "./stages/filter-stage-builder.js"; import GlushkovStageBuilder from "./stages/glushkov-executor/glushkov-stage-builder.js"; import GraphStageBuilder from "./stages/graph-stage-builder.js"; import MinusStageBuilder from "./stages/minus-stage-builder.js"; import OptionalStageBuilder from "./stages/optional-stage-builder.js"; import OrderByStageBuilder from "./stages/orderby-stage-builder.js"; import { extractPropertyPaths } from "./stages/rewritings.js"; import ServiceStageBuilder from "./stages/service-stage-builder.js"; import StageBuilder from "./stages/stage-builder.js"; import UnionStageBuilder from "./stages/union-stage-builder.js"; import UpdateStageBuilder from "./stages/update-stage-builder.js"; export const SPARQL_OPERATION = { AGGREGATE: 0, BGP: 1, BIND: 2, DISTINCT: 3, FILTER: 4, GRAPH: 5, MINUS: 6, OPTIONAL: 7, ORDER_BY: 8, PROPERTY_PATH: 9, SERVICE: 10, UPDATE: 11, UNION: 12, }; export class PlanBuilder { _optimizer; _stageBuilders; _currentCache; // Public for tests. _dataset; _customFunctions; constructor(dataset, customFunctions) { this._dataset = dataset; this._customFunctions = customFunctions; this._optimizer = Optimizer.getDefault(); this._currentCache = null; this._stageBuilders = new Map(); this.use(SPARQL_OPERATION.AGGREGATE, new AggregateStageBuilder(this._dataset)); this.use(SPARQL_OPERATION.BGP, new BGPStageBuilder(this._dataset)); this.use(SPARQL_OPERATION.BIND, new BindStageBuilder(this._dataset)); this.use(SPARQL_OPERATION.DISTINCT, new DistinctStageBuilder(this._dataset)); this.use(SPARQL_OPERATION.FILTER, new FilterStageBuilder(this._dataset)); this.use(SPARQL_OPERATION.GRAPH, new GraphStageBuilder(this._dataset)); this.use(SPARQL_OPERATION.MINUS, new MinusStageBuilder(this._dataset)); this.use(SPARQL_OPERATION.SERVICE, new ServiceStageBuilder(this._dataset)); this.use(SPARQL_OPERATION.OPTIONAL, new OptionalStageBuilder(this._dataset)); this.use(SPARQL_OPERATION.ORDER_BY, new OrderByStageBuilder(this._dataset)); this.use(SPARQL_OPERATION.PROPERTY_PATH, new GlushkovStageBuilder(this._dataset)); this.use(SPARQL_OPERATION.UNION, new UnionStageBuilder(this._dataset)); this.use(SPARQL_OPERATION.UPDATE, new UpdateStageBuilder(this._dataset)); } set optimizer(opt) { this._optimizer = opt; } use(kind, stageBuilder) { stageBuilder.builder = null; stageBuilder.builder = this; this._stageBuilders.set(kind, stageBuilder); } async useCache(cache) { this._currentCache = cache; } disableCache() { this._currentCache = null; } async build(query, context) { if (isNull(context) || isUndefined(context)) { context = new ExecutionContext(); context.cache = this._currentCache; } query = this._optimizer.optimize(query); switch (query.type) { case "query": return this._buildQueryPlan(query, context); case "update": if (!this._stageBuilders.has(SPARQL_OPERATION.UPDATE)) { throw new Error("A PlanBuilder cannot evaluate SPARQL UPDATE queries without a StageBuilder for it"); } return this._stageBuilders .get(SPARQL_OPERATION.UPDATE) .execute(query.updates, context); default: throw new SyntaxError("Unsupported SPARQL query type"); } } async _buildQueryPlan(query, context, source) { const engine = Pipeline.getInstance(); if (isNull(source) || isUndefined(source)) { // build pipeline starting iterator source = engine.of(new BindingBase()); } context.setProperty(ContextSymbols.PREFIXES, query.prefixes); // rewrite a DESCRIBE query into a CONSTRUCT query if (query.queryType === "DESCRIBE") { const template = []; const where = [ { type: "bgp", triples: [], }, ]; query.variables.forEach((v) => { const triple = RDF.quad(v, RDF.variable(`pred__describe__${v}`), RDF.variable(`obj__describe__${v}`)); template.push(triple); where[0].triples.push(triple); }); const construct = { prefixes: query.prefixes, from: query.from, queryType: "CONSTRUCT", template, type: "query", where: query.where?.concat(where), }; return this._buildQueryPlan(construct, context, source); } // from the begining, dectect any LIMIT/OFFSET modifiers, as they cimpact the caching strategy context.setProperty(ContextSymbols.HAS_LIMIT_OFFSET, "limit" in query || "offset" in query); // Handles FROM clauses if (query.from) { context.defaultGraphs = query.from.default; context.namedGraphs = query.from.named; } // Handles WHERE clause let graphIterator; if (query.where?.length) { graphIterator = await this._buildWhere(source, query.where, context); } else { graphIterator = engine.of(new BindingBase()); } let aggregates = []; // Parse query variable to separate projection & aggregate variables if ("variables" in query) { const next = []; for (const v of query.variables) { if ("variable" in v) { aggregates.push(v); next.push(v.variable); } else { next.push(v); } } query.variables = next; } // Handles SPARQL aggregations if ("group" in query || aggregates.length > 0) { // Handles GROUP BY graphIterator = await this._stageBuilders .get(SPARQL_OPERATION.AGGREGATE) .execute(graphIterator, query, context, this._customFunctions); } if (aggregates.length > 0) { // Handles SPARQL aggregation functions for (const agg of aggregates) { graphIterator = await this._stageBuilders .get(SPARQL_OPERATION.BIND) .execute(graphIterator, agg, this._customFunctions, context); } } // Handles ORDER BY if ("order" in query) { if (!this._stageBuilders.has(SPARQL_OPERATION.ORDER_BY)) { throw new Error("A PlanBuilder cannot evaluate SPARQL ORDER BY clauses without a StageBuilder for it"); } graphIterator = await this._stageBuilders .get(SPARQL_OPERATION.ORDER_BY) .execute(graphIterator, query.order); } switch (query.queryType) { case "SELECT": { graphIterator = select(graphIterator, query); break; } case "CONSTRUCT": { graphIterator = construct(graphIterator, query); break; } case "ASK": { graphIterator = ask(graphIterator); break; } default: { throw new Error("Unsupported SPARQL query type."); } } // Create iterators for modifiers if ("distinct" in query && query.distinct) { if (!this._stageBuilders.has(SPARQL_OPERATION.DISTINCT)) { throw new Error("A PlanBuilder cannot evaluate a DISTINCT clause without a StageBuilder for it"); } graphIterator = await this._stageBuilders .get(SPARQL_OPERATION.DISTINCT) .execute(graphIterator, context); } // Add offsets and limits if requested if ("offset" in query) { graphIterator = engine.skip(graphIterator, query.offset); } if ("limit" in query) { graphIterator = engine.limit(graphIterator, query.limit); } // graphIterator.queryType = query.queryType return graphIterator; } async _buildWhere(source, groups, context) { groups = sortBy(groups, (g) => { switch (g.type) { case "graph": if (isVariable(g.name)) { return 5; } return 0; case "bgp": return 0; case "values": return 3; case "filter": return 4; default: return 1; } }); // Handle VALUES clauses using query rewriting if (some(groups, (g) => g.type === "values")) { return this._buildValues(source, groups, context); } // merge BGPs on the same level let newGroups = []; let prec = null; for (let i = 0; i < groups.length; i++) { let group = groups[i]; if (group.type === "bgp" && prec !== null && prec.type === "bgp") { let lastGroup = newGroups[newGroups.length - 1]; lastGroup.triples = lastGroup.triples.concat(group.triples); } else { newGroups.push(group); } prec = groups[i]; } groups = newGroups; for (const group of groups) { source = await this._buildGroup(source, group, context); } return source; } /** * Build a physical plan for a SPARQL group clause * @param source - Input {@link PipelineStage} * @param group - SPARQL Group * @param options - Execution options * @return A {@link PipelineStage} used to evaluate the SPARQL Group */ async _buildGroup(source, group, context) { const engine = Pipeline.getInstance(); // Reset flags on the options for child iterators let childContext = context.clone(); switch (group.type) { case "bgp": if (!this._stageBuilders.has(SPARQL_OPERATION.BGP)) { throw new Error("A PlanBuilder cannot evaluate a Basic Graph Pattern without a Stage Builder for it"); } // find possible Property paths let [classicTriples, pathTriples] = extractPropertyPaths(group); if (pathTriples.length > 0) { if (!this._stageBuilders.has(SPARQL_OPERATION.PROPERTY_PATH)) { throw new Error("A PlanBuilder cannot evaluate property paths without a Stage Builder for it"); } source = await this._stageBuilders .get(SPARQL_OPERATION.PROPERTY_PATH) .execute(source, pathTriples, context); } // delegate remaining BGP evaluation to the dedicated executor return await this._stageBuilders .get(SPARQL_OPERATION.BGP) .execute(source, classicTriples, childContext); case "query": return this._buildQueryPlan(group, childContext, source); case "graph": if (!this._stageBuilders.has(SPARQL_OPERATION.GRAPH)) { throw new Error("A PlanBuilder cannot evaluate a GRAPH clause without a Stage Builder for it"); } // delegate GRAPH evaluation to an executor return this._stageBuilders .get(SPARQL_OPERATION.GRAPH) .execute(source, group, childContext); case "service": if (!this._stageBuilders.has(SPARQL_OPERATION.SERVICE)) { throw new Error("A PlanBuilder cannot evaluate a SERVICE clause without a Stage Builder for it"); } return this._stageBuilders .get(SPARQL_OPERATION.SERVICE) .execute(source, group, childContext); case "group": return this._buildWhere(source, group.patterns, childContext); case "optional": if (!this._stageBuilders.has(SPARQL_OPERATION.OPTIONAL)) { throw new Error("A PlanBuilder cannot evaluate an OPTIONAL clause without a Stage Builder for it"); } return this._stageBuilders .get(SPARQL_OPERATION.OPTIONAL) .execute(source, group, childContext); case "union": if (!this._stageBuilders.has(SPARQL_OPERATION.UNION)) { throw new Error("A PlanBuilder cannot evaluate an UNION clause without a Stage Builder for it"); } return this._stageBuilders .get(SPARQL_OPERATION.UNION) .execute(source, group, childContext); case "minus": if (!this._stageBuilders.has(SPARQL_OPERATION.MINUS)) { throw new Error("A PlanBuilder cannot evaluate a MINUS clause without a Stage Builder for it"); } return this._stageBuilders .get(SPARQL_OPERATION.MINUS) .execute(source, group, childContext); case "filter": if (!this._stageBuilders.has(SPARQL_OPERATION.FILTER)) { throw new Error("A PlanBuilder cannot evaluate a FILTER clause without a Stage Builder for it"); } return this._stageBuilders .get(SPARQL_OPERATION.FILTER) .execute(source, group, this._customFunctions, childContext); case "bind": if (!this._stageBuilders.has(SPARQL_OPERATION.BIND)) { throw new Error("A PlanBuilder cannot evaluate a BIND clause without a Stage Builder for it"); } return this._stageBuilders .get(SPARQL_OPERATION.BIND) .execute(source, group, this._customFunctions, childContext); default: throw new Error(`Unsupported SPARQL group pattern found in query: ${group.type}`); } } /** * Build a {@link PipelineStage} which evaluates a SPARQL query with VALUES clause(s). * It rely on a query rewritiing approach: * ?s ?p ?o . VALUES ?s { :1 :2 } becomes {:1 ?p ?o BIND(:1 AS ?s)} UNION {:2 ?p ?o BIND(:2 AS ?s)} * @param source - Input {@link PipelineStage} * @param groups - Query body, i.e., WHERE clause * @param options - Execution options * @return A {@link PipelineStage} which evaluates a SPARQL query with VALUES clause(s) */ async _buildValues(source, groups, context) { let [values, others] = partition(groups, (g) => g.type === "values"); const bindingsLists = values.map((g) => g.values); // for each VALUES clause const iterators = []; for (const bList of bindingsLists) { // for each value to bind in the VALUES clause const unionBranches = []; for (const b of bList) { const bindings = BindingBase.fromValuePatternRow(b); // BIND each group with the set of bindings and then evaluates it const temp = others.map((g) => deepApplyBindings(g, bindings)); unionBranches.push(extendByBindings(await this._buildWhere(source, temp, context), bindings)); } iterators.push(Pipeline.getInstance().merge(...unionBranches)); } // Users may use more than one VALUES clause if (iterators.length > 1) { return Pipeline.getInstance().merge(...iterators); } return iterators[0]; } } //# sourceMappingURL=plan-builder.js.map