@jahed/sparql-engine
Version:
SPARQL query engine for servers and web browsers.
201 lines • 9.51 kB
JavaScript
import { isNull, mean, orderBy, round, sortBy } from "lodash-es";
import ExecutionContext from "../engine/context/execution-context.js";
import { Pipeline } from "../engine/pipeline/pipeline.js";
import indexJoin from "../operators/join/index-join.js";
import { isVariable, UNBOUND } from "../utils/rdf.js";
import { leftLinearJoinOrdering } from "../utils/sparql.js";
import { BindingBase, Bindings } from "./bindings.js";
import { GRAPH_CAPABILITY } from "./graph_capability.js";
function parseCapabilities(registry, proto) {
registry.set(GRAPH_CAPABILITY.ESTIMATE_TRIPLE_CARD, proto.estimateCardinality != null);
registry.set(GRAPH_CAPABILITY.UNION, proto.evalUnion != null);
}
function countVariables(triple) {
let count = 0;
if (isVariable(triple.subject)) {
count++;
}
if (isVariable(triple.predicate)) {
count++;
}
if (isVariable(triple.object)) {
count++;
}
return count;
}
/**
* An abstract RDF Graph, accessed through a RDF Dataset
* @abstract
*/
export default class Graph {
iri;
_capabilities;
constructor(iri = UNBOUND) {
this.iri = iri;
this._capabilities = new Map();
parseCapabilities(this._capabilities, Object.getPrototypeOf(this));
}
/**
* Test if a graph has a capability
* @param token - Capability tested
* @return True if the graph has the reuqested capability, false otherwise
*/
_isCapable(token) {
return this._capabilities.has(token) && this._capabilities.get(token);
}
/**
* Estimate the cardinality of a Triple pattern, i.e., the number of matching RDF Triples in the RDF Graph.
* @param triple - Triple pattern to estimate cardinality
* @return A Promise fulfilled with the pattern's estimated cardinality
*/
estimateCardinality(triple) {
throw new SyntaxError("Error: this graph is not capable of estimating the cardinality of a triple pattern");
}
/**
* Get a {@link PipelineStage} which finds RDF triples matching a triple pattern and a set of keywords in the RDF Graph.
* The search can be constrained by min and max relevance (a 0 to 1 score signifying how closely the literal matches the search terms).
*
* The {@link Graph} class provides a default implementation that computes the relevance
* score as the percentage of words matching the list of input keywords.
* If the minRank and/or maxRanks parameters are used, then
* the graph materializes all matching RDF triples, sort them by descending rank and then
* selects the appropriates ranks.
* Otherwise, the rank is not computed and all triples are associated with a rank of -1.
*
* Consequently, the default implementation should works fines for a basic usage, but more advanced users
* should provides their own implementation, integrated with their own backend.
* For example, a SQL-based RDF Graph should rely on GIN or GIST indexes for the full text search.
* @param pattern - Triple pattern to find
* @param variable - SPARQL variable on which the keyword search is performed
* @param keywords - List of keywords to seach for occurence
* @param matchAll - True if only values that contain all of the specified search terms should be considered.
* @param minRelevance - Minimum relevance score (set it to null to disable it)
* @param maxRelevance - Maximum relevance score (set it to null to disable it)
* @param minRank - Minimum rank of the matches (set it to null to disable it)
* @param maxRank - Maximum rank of the matches (set it to null to disable it)
* @param context - Execution options
* @return A {@link PipelineInput} which output tuples of shape [matching RDF triple, score, rank].
*/
fullTextSearch(pattern, variable, keywords, matchAll, minRelevance, maxRelevance, minRank, maxRank, context) {
if (isNull(minRelevance)) {
minRelevance = 0;
}
if (isNull(maxRelevance)) {
maxRelevance = Number.MAX_SAFE_INTEGER;
}
// find all RDF triples matching the input triple pattern
const source = Pipeline.getInstance().from(this.find(pattern, context));
// compute the score of each matching RDF triple as the average number of words
// in the RDF term that matches kewyords
let iterator = Pipeline.getInstance().map(source, (triple) => {
let words = [];
if (pattern.subject.equals(variable)) {
words = triple.subject.value.split(" ");
}
else if (pattern.predicate.equals(variable)) {
words = triple.predicate.value.split(" ");
}
else if (pattern.object.equals(variable)) {
words = triple.object.value.split(" ");
}
// For each keyword, compute % of words matching the keyword
const keywordScores = keywords.map((keyword) => {
return (words.reduce((acc, word) => {
if (word.includes(keyword)) {
acc += 1;
}
return acc;
}, 0) / words.length);
});
// if we should match all keyword, not matching a single keyword gives you a score of 0
if (matchAll && keywordScores.some((v) => v === 0)) {
return { triple, rank: -1, score: 0 };
}
// The relevance score is computed as the average keyword score
return { triple, rank: -1, score: round(mean(keywordScores), 3) };
});
// filter by min & max relevance scores
iterator = Pipeline.getInstance().filter(iterator, (v) => {
return (v.score > 0 && minRelevance <= v.score && v.score <= maxRelevance);
});
// if needed, rank the matches by descending score
if (!isNull(minRank) || !isNull(maxRank)) {
if (isNull(minRank)) {
minRank = 0;
}
if (isNull(maxRank)) {
maxRank = Number.MAX_SAFE_INTEGER;
}
// null or negative values for minRank and/or maxRank will yield no results
if (minRank < 0 || maxRank < 0) {
return Pipeline.getInstance().empty();
}
// ranks the matches, and then only keeps the desired ranks
iterator = Pipeline.getInstance().flatMap(Pipeline.getInstance().collect(iterator), (values) => {
return (orderBy(values, ["score"], ["desc"])
// add rank
.map((item, rank) => {
item.rank = rank;
return item;
})
// slice using the minRank and maxRank parameters
.slice(minRank, maxRank + 1));
});
}
// finally, format results as tuples [RDF triple, triple's score, triple's rank]
return Pipeline.getInstance().map(iterator, (v) => [
v.triple,
v.score,
v.rank,
]);
}
/**
* Evaluates an union of Basic Graph patterns on the Graph using a {@link PipelineStage}.
* @param patterns - The set of BGPs to evaluate
* @param context - Execution options
* @return A {@link PipelineStage} which evaluates the Basic Graph pattern on the Graph
*/
evalUnion(patterns, context) {
throw new SyntaxError("Error: this graph is not capable of evaluating UNION queries");
}
/**
* Evaluates a Basic Graph pattern, i.e., a set of triple patterns, on the Graph using a {@link PipelineStage}.
* @param bgp - The set of triple patterns to evaluate
* @param context - Execution options
* @return A {@link PipelineStage} which evaluates the Basic Graph pattern on the Graph
*/
evalBGP(bgp, context) {
const engine = Pipeline.getInstance();
if (this._isCapable(GRAPH_CAPABILITY.ESTIMATE_TRIPLE_CARD)) {
const op = engine.from(Promise.all(bgp.map((triple) => {
return this.estimateCardinality(triple).then((c) => {
return {
triple,
cardinality: c,
nbVars: countVariables(triple),
};
});
})));
return engine.mergeMap(op, (results) => {
const sortedPatterns = leftLinearJoinOrdering(sortBy(results, "cardinality").map((t) => t.triple));
const start = engine.of(new BindingBase());
return sortedPatterns.reduce((iter, t) => {
return indexJoin(iter, t, this, context);
}, start);
});
}
else {
// FIX ME: this trick is required, otherwise ADD, COPY and MOVE queries are not evaluated correctly. We need to find why...
return engine.mergeMap(engine.from(Promise.resolve(null)), () => {
const start = engine.of(new BindingBase());
return leftLinearJoinOrdering(bgp).reduce((iter, t) => {
return indexJoin(iter, t, this, context);
}, start);
});
}
}
}
// disable optional methods
Object.defineProperty(Graph.prototype, "estimateCardinality", { value: null });
Object.defineProperty(Graph.prototype, "evalUnion", { value: null });
//# sourceMappingURL=graph.js.map