@comunica/actor-query-source-identify-hypermedia-sparql
Version:
A sparql query-source-identify-hypermedia actor
348 lines • 18 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.QuerySourceSparql = void 0;
const context_entries_1 = require("@comunica/context-entries");
const core_1 = require("@comunica/core");
const utils_metadata_1 = require("@comunica/utils-metadata");
const utils_query_operation_1 = require("@comunica/utils-query-operation");
const asynciterator_1 = require("asynciterator");
const fetch_sparql_endpoint_1 = require("fetch-sparql-endpoint");
const lru_cache_1 = require("lru-cache");
const rdf_terms_1 = require("rdf-terms");
const sparqlalgebrajs_1 = require("sparqlalgebrajs");
class QuerySourceSparql {
constructor(url, context, mediatorHttp, bindMethod, dataFactory, algebraFactory, bindingsFactory, forceHttpGet, cacheSize, countTimeout, cardinalityCountQueries, cardinalityEstimateConstruction, defaultGraph, unionDefaultGraph, datasets) {
this.referenceValue = url;
this.url = url;
this.context = context;
this.mediatorHttp = mediatorHttp;
this.bindMethod = bindMethod;
this.dataFactory = dataFactory;
this.algebraFactory = algebraFactory;
this.bindingsFactory = bindingsFactory;
this.endpointFetcher = new fetch_sparql_endpoint_1.SparqlEndpointFetcher({
method: forceHttpGet ? 'GET' : 'POST',
fetch: (input, init) => this.mediatorHttp.mediate({ input, init, context: this.lastSourceContext }),
prefixVariableQuestionMark: true,
dataFactory,
});
this.cache = cacheSize > 0 ?
new lru_cache_1.LRUCache({ max: cacheSize }) :
undefined;
this.countTimeout = countTimeout;
this.cardinalityCountQueries = cardinalityCountQueries;
this.cardinalityEstimateConstruction = cardinalityEstimateConstruction;
this.defaultGraph = defaultGraph;
this.unionDefaultGraph = unionDefaultGraph ?? false;
this.datasets = datasets;
}
async getSelectorShape() {
return QuerySourceSparql.SELECTOR_SHAPE;
}
queryBindings(operationIn, context, options) {
// If bindings are passed, modify the operations
let operationPromise;
if (options?.joinBindings) {
operationPromise = QuerySourceSparql.addBindingsToOperation(this.algebraFactory, this.bindMethod, operationIn, options.joinBindings);
}
else {
operationPromise = Promise.resolve(operationIn);
}
const bindings = new asynciterator_1.TransformIterator(async () => {
// Prepare queries
const operation = await operationPromise;
const variables = sparqlalgebrajs_1.Util.inScopeVariables(operation);
const queryString = context.get(context_entries_1.KeysInitQuery.queryString);
const queryFormat = context.getSafe(context_entries_1.KeysInitQuery.queryFormat);
const selectQuery = !options?.joinBindings && queryString && queryFormat.language === 'sparql' ?
queryString :
QuerySourceSparql.operationToSelectQuery(this.algebraFactory, operation, variables);
const undefVariables = QuerySourceSparql.getOperationUndefs(operation);
return this.queryBindingsRemote(this.url, selectQuery, variables, context, undefVariables);
}, { autoStart: false });
this.attachMetadata(bindings, context, operationPromise);
return bindings;
}
queryQuads(operation, context) {
this.lastSourceContext = this.context.merge(context);
const rawStream = this.endpointFetcher.fetchTriples(this.url, context.get(context_entries_1.KeysInitQuery.queryString) ?? QuerySourceSparql.operationToQuery(operation));
this.lastSourceContext = undefined;
const quads = (0, asynciterator_1.wrap)(rawStream, { autoStart: false, maxBufferSize: Number.POSITIVE_INFINITY });
this.attachMetadata(quads, context, Promise.resolve(operation.input));
return quads;
}
queryBoolean(operation, context) {
this.lastSourceContext = this.context.merge(context);
const promise = this.endpointFetcher.fetchAsk(this.url, context.get(context_entries_1.KeysInitQuery.queryString) ?? QuerySourceSparql.operationToQuery(operation));
this.lastSourceContext = undefined;
return promise;
}
queryVoid(operation, context) {
this.lastSourceContext = this.context.merge(context);
const promise = this.endpointFetcher.fetchUpdate(this.url, context.get(context_entries_1.KeysInitQuery.queryString) ?? QuerySourceSparql.operationToQuery(operation));
this.lastSourceContext = undefined;
return promise;
}
attachMetadata(target, context, operationPromise) {
// Emit metadata containing the estimated count
let variablesCount = [];
// eslint-disable-next-line no-async-promise-executor,ts/no-misused-promises
new Promise(async (resolve, reject) => {
try {
const operation = await operationPromise;
const variablesScoped = sparqlalgebrajs_1.Util.inScopeVariables(operation);
const countQuery = this.operationToNormalizedCountQuery(operation);
const undefVariables = QuerySourceSparql.getOperationUndefs(operation);
variablesCount = variablesScoped.map(variable => ({
variable,
canBeUndef: undefVariables.some(undefVariable => undefVariable.equals(variable)),
}));
const cachedCardinality = this.cache?.get(countQuery);
if (cachedCardinality) {
return resolve(cachedCardinality);
}
// Attempt to estimate locally prior to sending a COUNT request, as this should be much faster.
// The estimates may be off by varying amounts, so this is set behind a configuration flag.
if (this.cardinalityEstimateConstruction) {
const localEstimate = await this.estimateOperationCardinality(operation);
if (Number.isFinite(localEstimate.value)) {
this.cache?.set(countQuery, localEstimate);
return resolve(localEstimate);
}
}
// Don't send count queries if disabled.
if (!this.cardinalityCountQueries) {
return resolve({ type: 'estimate', value: Number.POSITIVE_INFINITY, dataset: this.url });
}
const timeoutHandler = setTimeout(() => resolve({
type: 'estimate',
value: Number.POSITIVE_INFINITY,
dataset: this.url,
}), this.countTimeout);
const varCount = this.dataFactory.variable('count');
const bindingsStream = await this
.queryBindingsRemote(this.url, countQuery, [varCount], context, []);
bindingsStream.on('data', (bindings) => {
clearTimeout(timeoutHandler);
const count = bindings.get(varCount);
const cardinality = {
type: 'estimate',
value: Number.POSITIVE_INFINITY,
dataset: this.url,
};
if (count) {
const cardinalityValue = Number.parseInt(count.value, 10);
if (!Number.isNaN(cardinalityValue)) {
cardinality.type = 'exact';
cardinality.value = cardinalityValue;
this.cache?.set(countQuery, cardinality);
}
}
return resolve(cardinality);
});
bindingsStream.on('error', () => {
clearTimeout(timeoutHandler);
resolve({ type: 'estimate', value: Number.POSITIVE_INFINITY, dataset: this.url });
});
bindingsStream.on('end', () => {
clearTimeout(timeoutHandler);
resolve({ type: 'estimate', value: Number.POSITIVE_INFINITY, dataset: this.url });
});
}
catch (error) {
return reject(error);
}
})
.then((cardinality) => {
target.setProperty('metadata', {
state: new utils_metadata_1.MetadataValidationState(),
cardinality,
variables: variablesCount,
});
})
.catch(() => target.setProperty('metadata', {
state: new utils_metadata_1.MetadataValidationState(),
cardinality: { type: 'estimate', value: Number.POSITIVE_INFINITY, dataset: this.url },
variables: variablesCount,
}));
}
/**
* Convert an algebra operation into a query string, and if the operation is a simple triple pattern,
* then also replace any variables with s, p, and o to increase the chance of cache hits.
* @param {Algebra.Operation} operation The operation to convert into a query string.
* @returns {string} Query string for a COUNT query over the operation.
*/
operationToNormalizedCountQuery(operation) {
const normalizedOperation = operation.type === sparqlalgebrajs_1.Algebra.types.PATTERN ?
this.algebraFactory.createPattern(operation.subject.termType === 'Variable' ? this.dataFactory.variable('s') : operation.subject, operation.predicate.termType === 'Variable' ? this.dataFactory.variable('p') : operation.predicate, operation.object.termType === 'Variable' ? this.dataFactory.variable('o') : operation.object) :
operation;
const operationString = QuerySourceSparql.operationToCountQuery(this.dataFactory, this.algebraFactory, normalizedOperation);
return operationString;
}
/**
* Performs local cardinality estimation for the specified SPARQL algebra operation, which should
* result in better estimation performance at the expense of accuracy.
* @param {Algebra.Operation} operation A query operation.
*/
async estimateOperationCardinality(operation) {
const dataset = {
getCardinality: (operation) => {
const queryString = this.operationToNormalizedCountQuery(operation);
const cachedCardinality = this.cache?.get(queryString);
if (cachedCardinality) {
return cachedCardinality;
}
if (this.datasets) {
const cardinalities = this.datasets
.filter(ds => this.unionDefaultGraph || (this.defaultGraph && ds.uri.endsWith(this.defaultGraph)))
.map(ds => (0, utils_query_operation_1.estimateCardinality)(operation, ds));
const cardinality = {
type: cardinalities.some(card => card.type === 'estimate') ? 'estimate' : 'exact',
value: cardinalities.length > 0 ? cardinalities.reduce((acc, card) => acc + card.value, 0) : 0,
dataset: this.url,
};
return cardinality;
}
},
source: this.url,
uri: this.url,
};
return (0, utils_query_operation_1.estimateCardinality)(operation, dataset);
}
/**
* Create an operation that includes the bindings from the given bindings stream.
* @param algebraFactory The algebra factory.
* @param bindMethod A method for adding bindings to an operation.
* @param operation The operation to bind to.
* @param addBindings The bindings to add.
* @param addBindings.bindings The bindings stream.
* @param addBindings.metadata The bindings metadata.
*/
static async addBindingsToOperation(algebraFactory, bindMethod, operation, addBindings) {
const bindings = await addBindings.bindings.toArray();
switch (bindMethod) {
case 'values':
return algebraFactory.createJoin([
algebraFactory.createValues(addBindings.metadata.variables.map(v => v.variable), bindings.map(binding => Object.fromEntries([...binding]
.map(([key, value]) => [`?${key.value}`, value])))),
operation,
], false);
case 'union': {
throw new Error('Not implemented yet: "union" case');
}
case 'filter': {
throw new Error('Not implemented yet: "filter" case');
}
}
}
/**
* Convert an operation to a select query for this pattern.
* @param algebraFactory The algebra factory.
* @param {Algebra.Operation} operation A query operation.
* @param {RDF.Variable[]} variables The variables in scope for the operation.
* @return {string} A select query string.
*/
static operationToSelectQuery(algebraFactory, operation, variables) {
return QuerySourceSparql.operationToQuery(algebraFactory.createProject(operation, variables));
}
/**
* Convert an operation to a count query for the number of matching triples for this pattern.
* @param dataFactory The data factory.
* @param algebraFactory The algebra factory.
* @param {Algebra.Operation} operation A query operation.
* @return {string} A count query string.
*/
static operationToCountQuery(dataFactory, algebraFactory, operation) {
return QuerySourceSparql.operationToQuery(algebraFactory.createProject(algebraFactory.createExtend(algebraFactory.createGroup(operation, [], [algebraFactory.createBoundAggregate(dataFactory.variable('var0'), 'count', algebraFactory.createWildcardExpression(), false)]), dataFactory.variable('count'), algebraFactory.createTermExpression(dataFactory.variable('var0'))), [dataFactory.variable('count')]));
}
/**
* Convert an operation to a query for this pattern.
* @param {Algebra.Operation} operation A query operation.
* @return {string} A query string.
*/
static operationToQuery(operation) {
return (0, sparqlalgebrajs_1.toSparql)(operation, { sparqlStar: true });
}
/**
* Check if the given operation may produce undefined values.
* @param operation
*/
static getOperationUndefs(operation) {
const variables = [];
sparqlalgebrajs_1.Util.recurseOperation(operation, {
leftjoin(subOperation) {
const left = sparqlalgebrajs_1.Util.inScopeVariables(subOperation.input[0]);
const right = sparqlalgebrajs_1.Util.inScopeVariables(subOperation.input[1]);
for (const varRight of right) {
if (!left.some(varLeft => varLeft.equals(varRight))) {
variables.push(varRight);
}
}
return false;
},
values(values) {
for (const variable of values.variables) {
if (values.bindings.some(bindings => !(`?${variable.value}` in bindings))) {
variables.push(variable);
}
}
return false;
},
union(union) {
// Determine variables in scope of the union branches that are not occurring in every branch
const scopedVariables = union.input.map(sparqlalgebrajs_1.Util.inScopeVariables);
for (const variable of (0, rdf_terms_1.uniqTerms)(scopedVariables.flat())) {
if (!scopedVariables.every(input => input.some(inputVar => inputVar.equals(variable)))) {
variables.push(variable);
}
}
return true;
},
});
return (0, rdf_terms_1.uniqTerms)(variables);
}
/**
* Send a SPARQL query to a SPARQL endpoint and retrieve its bindings as a stream.
* @param {string} endpoint A SPARQL endpoint URL.
* @param {string} query A SPARQL query string.
* @param {RDF.Variable[]} variables The expected variables.
* @param {IActionContext} context The source context.
* @param undefVariables Variables that may have undefs.
* @return {BindingsStream} A stream of bindings.
*/
async queryBindingsRemote(endpoint, query, variables, context, undefVariables) {
// Index undef variables
const undefVariablesIndex = new Set();
for (const undefVariable of undefVariables) {
undefVariablesIndex.add(undefVariable.value);
}
this.lastSourceContext = this.context.merge(context);
const rawStream = await this.endpointFetcher.fetchBindings(endpoint, query);
this.lastSourceContext = undefined;
return (0, asynciterator_1.wrap)(rawStream, { autoStart: false, maxBufferSize: Number.POSITIVE_INFINITY })
.map((rawData) => this.bindingsFactory.bindings(variables
.map((variable) => {
const value = rawData[`?${variable.value}`];
if (!undefVariablesIndex.has(variable.value) && !value) {
core_1.Actor.getContextLogger(this.context)?.warn(`The endpoint ${endpoint} failed to provide a binding for ${variable.value}.`);
}
return [variable, value];
})
.filter(([_, v]) => Boolean(v))));
}
toString() {
return `QuerySourceSparql(${this.url})`;
}
}
exports.QuerySourceSparql = QuerySourceSparql;
QuerySourceSparql.SELECTOR_SHAPE = {
type: 'disjunction',
children: [
{
type: 'operation',
operation: { operationType: 'wildcard' },
joinBindings: true,
},
],
};
//# sourceMappingURL=QuerySourceSparql.js.map