@jahed/sparql-engine
Version:
SPARQL query engine for servers and web browsers.
387 lines • 17.2 kB
JavaScript
// SPDX-License-Identifier: MIT
import { isNull, isUndefined, partition, some, sortBy } from "lodash-es";
import { Pipeline } from "../engine/pipeline/pipeline.js";
import ask from "../operators/modifiers/ask.js";
import construct from "../operators/modifiers/construct.js";
import select from "../operators/modifiers/select.js";
import Optimizer from "../optimizer/optimizer.js";
import { BindingBase, Bindings } from "../rdf/bindings.js";
import Dataset from "../rdf/dataset.js";
import { deepApplyBindings, extendByBindings } from "../utils.js";
import { RDF, isVariable } from "../utils/rdf.js";
import ExecutionContext from "./context/execution-context.js";
import ContextSymbols from "./context/symbols.js";
import AggregateStageBuilder from "./stages/aggregate-stage-builder.js";
import BGPStageBuilder from "./stages/bgp-stage-builder.js";
import BindStageBuilder from "./stages/bind-stage-builder.js";
import DistinctStageBuilder from "./stages/distinct-stage-builder.js";
import FilterStageBuilder from "./stages/filter-stage-builder.js";
import GlushkovStageBuilder from "./stages/glushkov-executor/glushkov-stage-builder.js";
import GraphStageBuilder from "./stages/graph-stage-builder.js";
import MinusStageBuilder from "./stages/minus-stage-builder.js";
import OptionalStageBuilder from "./stages/optional-stage-builder.js";
import OrderByStageBuilder from "./stages/orderby-stage-builder.js";
import { extractPropertyPaths } from "./stages/rewritings.js";
import ServiceStageBuilder from "./stages/service-stage-builder.js";
import StageBuilder from "./stages/stage-builder.js";
import UnionStageBuilder from "./stages/union-stage-builder.js";
import UpdateStageBuilder from "./stages/update-stage-builder.js";
export const SPARQL_OPERATION = {
AGGREGATE: 0,
BGP: 1,
BIND: 2,
DISTINCT: 3,
FILTER: 4,
GRAPH: 5,
MINUS: 6,
OPTIONAL: 7,
ORDER_BY: 8,
PROPERTY_PATH: 9,
SERVICE: 10,
UPDATE: 11,
UNION: 12,
};
export class PlanBuilder {
_optimizer;
_stageBuilders;
_currentCache; // Public for tests.
_dataset;
_customFunctions;
constructor(dataset, customFunctions) {
this._dataset = dataset;
this._customFunctions = customFunctions;
this._optimizer = Optimizer.getDefault();
this._currentCache = null;
this._stageBuilders = new Map();
this.use(SPARQL_OPERATION.AGGREGATE, new AggregateStageBuilder(this._dataset));
this.use(SPARQL_OPERATION.BGP, new BGPStageBuilder(this._dataset));
this.use(SPARQL_OPERATION.BIND, new BindStageBuilder(this._dataset));
this.use(SPARQL_OPERATION.DISTINCT, new DistinctStageBuilder(this._dataset));
this.use(SPARQL_OPERATION.FILTER, new FilterStageBuilder(this._dataset));
this.use(SPARQL_OPERATION.GRAPH, new GraphStageBuilder(this._dataset));
this.use(SPARQL_OPERATION.MINUS, new MinusStageBuilder(this._dataset));
this.use(SPARQL_OPERATION.SERVICE, new ServiceStageBuilder(this._dataset));
this.use(SPARQL_OPERATION.OPTIONAL, new OptionalStageBuilder(this._dataset));
this.use(SPARQL_OPERATION.ORDER_BY, new OrderByStageBuilder(this._dataset));
this.use(SPARQL_OPERATION.PROPERTY_PATH, new GlushkovStageBuilder(this._dataset));
this.use(SPARQL_OPERATION.UNION, new UnionStageBuilder(this._dataset));
this.use(SPARQL_OPERATION.UPDATE, new UpdateStageBuilder(this._dataset));
}
set optimizer(opt) {
this._optimizer = opt;
}
use(kind, stageBuilder) {
stageBuilder.builder = null;
stageBuilder.builder = this;
this._stageBuilders.set(kind, stageBuilder);
}
async useCache(cache) {
this._currentCache = cache;
}
disableCache() {
this._currentCache = null;
}
async build(query, context) {
if (isNull(context) || isUndefined(context)) {
context = new ExecutionContext();
context.cache = this._currentCache;
}
query = this._optimizer.optimize(query);
switch (query.type) {
case "query":
return this._buildQueryPlan(query, context);
case "update":
if (!this._stageBuilders.has(SPARQL_OPERATION.UPDATE)) {
throw new Error("A PlanBuilder cannot evaluate SPARQL UPDATE queries without a StageBuilder for it");
}
return this._stageBuilders
.get(SPARQL_OPERATION.UPDATE)
.execute(query.updates, context);
default:
throw new SyntaxError("Unsupported SPARQL query type");
}
}
async _buildQueryPlan(query, context, source) {
const engine = Pipeline.getInstance();
if (isNull(source) || isUndefined(source)) {
// build pipeline starting iterator
source = engine.of(new BindingBase());
}
context.setProperty(ContextSymbols.PREFIXES, query.prefixes);
// rewrite a DESCRIBE query into a CONSTRUCT query
if (query.queryType === "DESCRIBE") {
const template = [];
const where = [
{
type: "bgp",
triples: [],
},
];
query.variables.forEach((v) => {
const triple = RDF.quad(v, RDF.variable(`pred__describe__${v}`), RDF.variable(`obj__describe__${v}`));
template.push(triple);
where[0].triples.push(triple);
});
const construct = {
prefixes: query.prefixes,
from: query.from,
queryType: "CONSTRUCT",
template,
type: "query",
where: query.where?.concat(where),
};
return this._buildQueryPlan(construct, context, source);
}
// from the begining, dectect any LIMIT/OFFSET modifiers, as they cimpact the caching strategy
context.setProperty(ContextSymbols.HAS_LIMIT_OFFSET, "limit" in query || "offset" in query);
// Handles FROM clauses
if (query.from) {
context.defaultGraphs = query.from.default;
context.namedGraphs = query.from.named;
}
// Handles WHERE clause
let graphIterator;
if (query.where?.length) {
graphIterator = await this._buildWhere(source, query.where, context);
}
else {
graphIterator = engine.of(new BindingBase());
}
let aggregates = [];
// Parse query variable to separate projection & aggregate variables
if ("variables" in query) {
const next = [];
for (const v of query.variables) {
if ("variable" in v) {
aggregates.push(v);
next.push(v.variable);
}
else {
next.push(v);
}
}
query.variables = next;
}
// Handles SPARQL aggregations
if ("group" in query || aggregates.length > 0) {
// Handles GROUP BY
graphIterator = await this._stageBuilders
.get(SPARQL_OPERATION.AGGREGATE)
.execute(graphIterator, query, context, this._customFunctions);
}
if (aggregates.length > 0) {
// Handles SPARQL aggregation functions
for (const agg of aggregates) {
graphIterator = await this._stageBuilders
.get(SPARQL_OPERATION.BIND)
.execute(graphIterator, agg, this._customFunctions, context);
}
}
// Handles ORDER BY
if ("order" in query) {
if (!this._stageBuilders.has(SPARQL_OPERATION.ORDER_BY)) {
throw new Error("A PlanBuilder cannot evaluate SPARQL ORDER BY clauses without a StageBuilder for it");
}
graphIterator = await this._stageBuilders
.get(SPARQL_OPERATION.ORDER_BY)
.execute(graphIterator, query.order);
}
switch (query.queryType) {
case "SELECT": {
graphIterator = select(graphIterator, query);
break;
}
case "CONSTRUCT": {
graphIterator = construct(graphIterator, query);
break;
}
case "ASK": {
graphIterator = ask(graphIterator);
break;
}
default: {
throw new Error("Unsupported SPARQL query type.");
}
}
// Create iterators for modifiers
if ("distinct" in query && query.distinct) {
if (!this._stageBuilders.has(SPARQL_OPERATION.DISTINCT)) {
throw new Error("A PlanBuilder cannot evaluate a DISTINCT clause without a StageBuilder for it");
}
graphIterator = await this._stageBuilders
.get(SPARQL_OPERATION.DISTINCT)
.execute(graphIterator, context);
}
// Add offsets and limits if requested
if ("offset" in query) {
graphIterator = engine.skip(graphIterator, query.offset);
}
if ("limit" in query) {
graphIterator = engine.limit(graphIterator, query.limit);
}
// graphIterator.queryType = query.queryType
return graphIterator;
}
async _buildWhere(source, groups, context) {
groups = sortBy(groups, (g) => {
switch (g.type) {
case "graph":
if (isVariable(g.name)) {
return 5;
}
return 0;
case "bgp":
return 0;
case "values":
return 3;
case "filter":
return 4;
default:
return 1;
}
});
// Handle VALUES clauses using query rewriting
if (some(groups, (g) => g.type === "values")) {
return this._buildValues(source, groups, context);
}
// merge BGPs on the same level
let newGroups = [];
let prec = null;
for (let i = 0; i < groups.length; i++) {
let group = groups[i];
if (group.type === "bgp" && prec !== null && prec.type === "bgp") {
let lastGroup = newGroups[newGroups.length - 1];
lastGroup.triples = lastGroup.triples.concat(group.triples);
}
else {
newGroups.push(group);
}
prec = groups[i];
}
groups = newGroups;
for (const group of groups) {
source = await this._buildGroup(source, group, context);
}
return source;
}
/**
* Build a physical plan for a SPARQL group clause
* @param source - Input {@link PipelineStage}
* @param group - SPARQL Group
* @param options - Execution options
* @return A {@link PipelineStage} used to evaluate the SPARQL Group
*/
async _buildGroup(source, group, context) {
const engine = Pipeline.getInstance();
// Reset flags on the options for child iterators
let childContext = context.clone();
switch (group.type) {
case "bgp":
if (!this._stageBuilders.has(SPARQL_OPERATION.BGP)) {
throw new Error("A PlanBuilder cannot evaluate a Basic Graph Pattern without a Stage Builder for it");
}
// find possible Property paths
let [classicTriples, pathTriples] = extractPropertyPaths(group);
if (pathTriples.length > 0) {
if (!this._stageBuilders.has(SPARQL_OPERATION.PROPERTY_PATH)) {
throw new Error("A PlanBuilder cannot evaluate property paths without a Stage Builder for it");
}
source = await this._stageBuilders
.get(SPARQL_OPERATION.PROPERTY_PATH)
.execute(source, pathTriples, context);
}
// delegate remaining BGP evaluation to the dedicated executor
return await this._stageBuilders
.get(SPARQL_OPERATION.BGP)
.execute(source, classicTriples, childContext);
case "query":
return this._buildQueryPlan(group, childContext, source);
case "graph":
if (!this._stageBuilders.has(SPARQL_OPERATION.GRAPH)) {
throw new Error("A PlanBuilder cannot evaluate a GRAPH clause without a Stage Builder for it");
}
// delegate GRAPH evaluation to an executor
return this._stageBuilders
.get(SPARQL_OPERATION.GRAPH)
.execute(source, group, childContext);
case "service":
if (!this._stageBuilders.has(SPARQL_OPERATION.SERVICE)) {
throw new Error("A PlanBuilder cannot evaluate a SERVICE clause without a Stage Builder for it");
}
return this._stageBuilders
.get(SPARQL_OPERATION.SERVICE)
.execute(source, group, childContext);
case "group":
return this._buildWhere(source, group.patterns, childContext);
case "optional":
if (!this._stageBuilders.has(SPARQL_OPERATION.OPTIONAL)) {
throw new Error("A PlanBuilder cannot evaluate an OPTIONAL clause without a Stage Builder for it");
}
return this._stageBuilders
.get(SPARQL_OPERATION.OPTIONAL)
.execute(source, group, childContext);
case "union":
if (!this._stageBuilders.has(SPARQL_OPERATION.UNION)) {
throw new Error("A PlanBuilder cannot evaluate an UNION clause without a Stage Builder for it");
}
return this._stageBuilders
.get(SPARQL_OPERATION.UNION)
.execute(source, group, childContext);
case "minus":
if (!this._stageBuilders.has(SPARQL_OPERATION.MINUS)) {
throw new Error("A PlanBuilder cannot evaluate a MINUS clause without a Stage Builder for it");
}
return this._stageBuilders
.get(SPARQL_OPERATION.MINUS)
.execute(source, group, childContext);
case "filter":
if (!this._stageBuilders.has(SPARQL_OPERATION.FILTER)) {
throw new Error("A PlanBuilder cannot evaluate a FILTER clause without a Stage Builder for it");
}
return this._stageBuilders
.get(SPARQL_OPERATION.FILTER)
.execute(source, group, this._customFunctions, childContext);
case "bind":
if (!this._stageBuilders.has(SPARQL_OPERATION.BIND)) {
throw new Error("A PlanBuilder cannot evaluate a BIND clause without a Stage Builder for it");
}
return this._stageBuilders
.get(SPARQL_OPERATION.BIND)
.execute(source, group, this._customFunctions, childContext);
default:
throw new Error(`Unsupported SPARQL group pattern found in query: ${group.type}`);
}
}
/**
* Build a {@link PipelineStage} which evaluates a SPARQL query with VALUES clause(s).
* It rely on a query rewritiing approach:
* ?s ?p ?o . VALUES ?s { :1 :2 } becomes {:1 ?p ?o BIND(:1 AS ?s)} UNION {:2 ?p ?o BIND(:2 AS ?s)}
* @param source - Input {@link PipelineStage}
* @param groups - Query body, i.e., WHERE clause
* @param options - Execution options
* @return A {@link PipelineStage} which evaluates a SPARQL query with VALUES clause(s)
*/
async _buildValues(source, groups, context) {
let [values, others] = partition(groups, (g) => g.type === "values");
const bindingsLists = values.map((g) => g.values);
// for each VALUES clause
const iterators = [];
for (const bList of bindingsLists) {
// for each value to bind in the VALUES clause
const unionBranches = [];
for (const b of bList) {
const bindings = BindingBase.fromValuePatternRow(b);
// BIND each group with the set of bindings and then evaluates it
const temp = others.map((g) => deepApplyBindings(g, bindings));
unionBranches.push(extendByBindings(await this._buildWhere(source, temp, context), bindings));
}
iterators.push(Pipeline.getInstance().merge(...unionBranches));
}
// Users may use more than one VALUES clause
if (iterators.length > 1) {
return Pipeline.getInstance().merge(...iterators);
}
return iterators[0];
}
}
//# sourceMappingURL=plan-builder.js.map