@comunica/actor-query-source-identify-hypermedia-sparql
Version:
A sparql query-source-identify-hypermedia actor
476 lines • 23.9 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.QuerySourceSparql = void 0;
const context_entries_1 = require("@comunica/context-entries");
const core_1 = require("@comunica/core");
const utils_algebra_1 = require("@comunica/utils-algebra");
const utils_metadata_1 = require("@comunica/utils-metadata");
const utils_query_operation_1 = require("@comunica/utils-query-operation");
const asynciterator_1 = require("asynciterator");
const fetch_sparql_endpoint_1 = require("fetch-sparql-endpoint");
const lru_cache_1 = require("lru-cache");
const rdf_terms_1 = require("rdf-terms");
class QuerySourceSparql {
referenceValue;
url;
urlBackup;
context;
mediatorHttp;
mediatorQuerySerialize;
bindMethod;
countTimeout;
cardinalityCountQueries;
cardinalityEstimateConstruction;
defaultGraph;
unionDefaultGraph;
propertyFeatures;
datasets;
extensionFunctions;
dataFactory;
algebraFactory;
bindingsFactory;
endpointFetcher;
cache;
lastSourceContext;
constructor(url, urlBackup, context, mediatorHttp, mediatorQuerySerialize, bindMethod, dataFactory, algebraFactory, bindingsFactory, forceHttpGet, cacheSize, countTimeout, cardinalityCountQueries, cardinalityEstimateConstruction, forceGetIfUrlLengthBelow, parseUnsupportedVersions, metadata) {
this.referenceValue = urlBackup;
this.url = url;
this.urlBackup = urlBackup;
this.context = context;
this.mediatorHttp = mediatorHttp;
this.mediatorQuerySerialize = mediatorQuerySerialize;
this.bindMethod = bindMethod;
this.dataFactory = dataFactory;
this.algebraFactory = algebraFactory;
this.bindingsFactory = bindingsFactory;
this.endpointFetcher = new fetch_sparql_endpoint_1.SparqlEndpointFetcher({
method: forceHttpGet ? 'GET' : 'POST',
fetch: async (input, init) => {
const response = await this.mediatorHttp.mediate({ input, init, context: this.lastSourceContext });
// If we encounter a 404, try our backup URL.
// After retrying the request with the new URL, we replace the URL for future requests.
if (response.status === 404 && this.url !== this.urlBackup) {
core_1.Actor.getContextLogger(this.context)?.warn(`Encountered a 404 when requesting ${this.url} according to the service description of ${this.urlBackup}. This is a server configuration issue. Retrying the current and modifying future requests to ${this.urlBackup} instead.`);
input = input.replace(this.url, this.urlBackup);
this.url = this.urlBackup;
return await this.mediatorHttp.mediate({ input, init, context: this.lastSourceContext });
}
return response;
},
prefixVariableQuestionMark: true,
dataFactory,
forceGetIfUrlLengthBelow,
directPost: metadata.postAccepted && !metadata.postAccepted.includes('application/x-www-form-urlencoded'),
parseUnsupportedVersions,
});
this.cache = cacheSize > 0 ?
new lru_cache_1.LRUCache({ max: cacheSize }) :
undefined;
this.countTimeout = countTimeout;
this.cardinalityCountQueries = cardinalityCountQueries;
this.cardinalityEstimateConstruction = cardinalityEstimateConstruction;
this.defaultGraph = metadata.defaultGraph;
this.unionDefaultGraph = metadata.unionDefaultGraph ?? false;
this.datasets = metadata.datasets;
this.extensionFunctions = metadata.extensionFunctions;
this.propertyFeatures = metadata.propertyFeatures ? new Set(metadata.propertyFeatures) : undefined;
}
async getFilterFactor() {
return 1;
}
async getSelectorShape() {
const innerDisjunction = {
type: 'disjunction',
children: [
{
type: 'operation',
operation: { operationType: 'wildcard' },
joinBindings: true,
},
],
};
if (this.extensionFunctions) {
innerDisjunction.children.push({
type: 'operation',
operation: {
operationType: 'type',
type: utils_algebra_1.Algebra.Types.EXPRESSION,
extensionFunctions: this.extensionFunctions,
},
joinBindings: true,
});
}
return {
type: 'conjunction',
children: [
innerDisjunction,
{
// DISTINCT CONSTRUCT is not allowed in SPARQL 1.1, so we explicitly disallowed it.
type: 'negation',
child: {
type: 'operation',
operation: { operationType: 'type', type: utils_algebra_1.Algebra.Types.DISTINCT },
children: [
{
type: 'operation',
operation: { operationType: 'type', type: utils_algebra_1.Algebra.Types.CONSTRUCT },
children: [
{
type: 'operation',
operation: { operationType: 'wildcard' },
joinBindings: true,
},
],
},
],
},
},
],
};
}
queryBindings(operationIn, context, options) {
// If bindings are passed, modify the operations
let operationPromise;
if (options?.joinBindings) {
operationPromise = QuerySourceSparql.addBindingsToOperation(this.algebraFactory, this.bindMethod, operationIn, options.joinBindings);
}
else {
operationPromise = Promise.resolve(operationIn);
}
const bindings = new asynciterator_1.TransformIterator(async () => {
// Prepare queries
const operation = await operationPromise;
const variables = utils_algebra_1.algebraUtils.inScopeVariables(operation);
const queryString = context.get(context_entries_1.KeysInitQuery.queryString);
const queryFormat = context.getSafe(context_entries_1.KeysInitQuery.queryFormat);
const selectQuery = !options?.joinBindings && queryString && queryFormat.language === 'sparql' ?
queryString :
await this.operationToSelectQuery(this.algebraFactory, operation, variables);
const undefVariables = QuerySourceSparql.getOperationUndefs(operation);
return this.queryBindingsRemote(this.url, selectQuery, variables, context, undefVariables);
}, { autoStart: false });
this.attachMetadata(bindings, context, operationPromise);
return bindings;
}
queryQuads(operation, context) {
const quads = (0, asynciterator_1.wrap)((async () => {
this.lastSourceContext = this.context.merge(context);
const query = context.get(context_entries_1.KeysInitQuery.queryString) ?? await this.operationToQuery(operation);
const rawStream = await this.endpointFetcher.fetchTriples(this.url, query);
return rawStream;
})(), { autoStart: false, maxBufferSize: Number.POSITIVE_INFINITY });
this.attachMetadata(quads, context, Promise.resolve(operation.input));
return quads;
}
async queryBoolean(operation, context) {
// Shortcut the ASK query to return true when supported propertyFeature predicates are used in it.
if (this.operationUsesPropertyFeatures(operation)) {
return true;
}
// Without propertyFeature overlap, perform the actual ASK query.
this.lastSourceContext = this.context.merge(context);
const query = context.get(context_entries_1.KeysInitQuery.queryString) ?? await this.operationToQuery(operation);
const promise = this.endpointFetcher.fetchAsk(this.url, query);
return promise;
}
async queryVoid(operation, context) {
this.lastSourceContext = this.context.merge(context);
const query = context.get(context_entries_1.KeysInitQuery.queryString) ?? await this.operationToQuery(operation);
const promise = this.endpointFetcher.fetchUpdate(this.url, query);
return promise;
}
attachMetadata(target, context, operationPromise) {
// Emit metadata containing the estimated count
let variablesCount = [];
// eslint-disable-next-line no-async-promise-executor,ts/no-misused-promises
new Promise(async (resolve, reject) => {
try {
const operation = await operationPromise;
const variablesScoped = utils_algebra_1.algebraUtils.inScopeVariables(operation);
const countQuery = await this.operationToNormalizedCountQuery(operation);
const undefVariables = QuerySourceSparql.getOperationUndefs(operation);
variablesCount = variablesScoped.map(variable => ({
variable,
canBeUndef: undefVariables.some(undefVariable => undefVariable.equals(variable)),
}));
const cachedCardinality = this.cache?.get(countQuery);
if (cachedCardinality) {
return resolve(cachedCardinality);
}
// Attempt to estimate locally prior to sending a COUNT request, as this should be much faster.
// The estimates may be off by varying amounts, so this is set behind a configuration flag.
if (this.cardinalityEstimateConstruction) {
const localEstimate = await this.estimateOperationCardinality(operation);
if (Number.isFinite(localEstimate.value)) {
this.cache?.set(countQuery, localEstimate);
return resolve(localEstimate);
}
}
// Don't send count queries if disabled.
if (!this.cardinalityCountQueries) {
return resolve({ type: 'estimate', value: Number.POSITIVE_INFINITY, dataset: this.url });
}
const timeoutHandler = setTimeout(() => resolve({
type: 'estimate',
value: Number.POSITIVE_INFINITY,
dataset: this.url,
}), this.countTimeout);
const varCount = this.dataFactory.variable('count');
const bindingsStream = await this
.queryBindingsRemote(this.url, countQuery, [varCount], context, []);
bindingsStream
.on('data', (bindings) => {
clearTimeout(timeoutHandler);
const count = bindings.get(varCount);
const cardinality = {
type: 'estimate',
value: Number.POSITIVE_INFINITY,
dataset: this.url,
};
if (count) {
const cardinalityValue = Number.parseInt(count.value, 10);
if (!Number.isNaN(cardinalityValue)) {
cardinality.type = 'exact';
cardinality.value = cardinalityValue;
this.cache?.set(countQuery, cardinality);
}
}
return resolve(cardinality);
})
.on('error', () => {
clearTimeout(timeoutHandler);
resolve({ type: 'estimate', value: Number.POSITIVE_INFINITY, dataset: this.url });
})
.on('end', () => {
clearTimeout(timeoutHandler);
resolve({ type: 'estimate', value: Number.POSITIVE_INFINITY, dataset: this.url });
});
}
catch (error) {
reject(error);
}
})
.then(cardinality => target.setProperty('metadata', {
state: new utils_metadata_1.MetadataValidationState(),
cardinality,
variables: variablesCount,
}))
.catch(() => target.setProperty('metadata', {
state: new utils_metadata_1.MetadataValidationState(),
cardinality: { type: 'estimate', value: Number.POSITIVE_INFINITY, dataset: this.url },
variables: variablesCount,
}));
}
/**
* Convert an algebra operation into a query string, and if the operation is a simple triple pattern,
* then also replace any variables with s, p, and o to increase the chance of cache hits.
* @param {Algebra.Operation} operation The operation to convert into a query string.
* @returns {string} Query string for a COUNT query over the operation.
*/
async operationToNormalizedCountQuery(operation) {
const normalizedOperation = (0, utils_algebra_1.isKnownOperation)(operation, utils_algebra_1.Algebra.Types.PATTERN) ?
this.algebraFactory.createPattern(operation.subject.termType === 'Variable' ? this.dataFactory.variable('s') : operation.subject, operation.predicate.termType === 'Variable' ? this.dataFactory.variable('p') : operation.predicate, operation.object.termType === 'Variable' ? this.dataFactory.variable('o') : operation.object) :
operation;
return await this.operationToCountQuery(this.dataFactory, this.algebraFactory, normalizedOperation);
}
/**
* Performs local cardinality estimation for the specified SPARQL algebra operation, which should
* result in better estimation performance at the expense of accuracy.
* @param {Algebra.Operation} operation A query operation.
*/
async estimateOperationCardinality(operation) {
if (this.operationUsesPropertyFeatures(operation)) {
return { type: 'estimate', value: 1, dataset: this.url };
}
const dataset = {
getCardinality: async (operation) => {
const queryString = await this.operationToNormalizedCountQuery(operation);
const cachedCardinality = this.cache?.get(queryString);
if (cachedCardinality) {
return cachedCardinality;
}
if (this.datasets) {
const cardinalities = await Promise.all(this.datasets
.filter(ds => this.unionDefaultGraph || (this.defaultGraph && ds.uri.endsWith(this.defaultGraph)))
.map(ds => (0, utils_query_operation_1.estimateCardinality)(operation, ds)));
const cardinality = {
type: cardinalities.some(card => card.type === 'estimate') ? 'estimate' : 'exact',
value: cardinalities.length > 0 ? cardinalities.reduce((acc, card) => acc + card.value, 0) : 0,
dataset: this.url,
};
return cardinality;
}
},
source: this.url,
uri: this.url,
};
return (0, utils_query_operation_1.estimateCardinality)(operation, dataset);
}
/**
* Checks whether the provided operation makes use of this endpoint's property features,
* if the endpoint has property features detected.
* @param {Algebra.Operation} operation The operation to check.
* @returns {boolean} Whether the operation makes use of property features.
*/
operationUsesPropertyFeatures(operation) {
let propertyFeaturesUsed = false;
if (this.propertyFeatures) {
utils_algebra_1.algebraUtils.visitOperation(operation, {
[utils_algebra_1.Algebra.Types.PATTERN]: {
visitor: (subOp) => {
if (subOp.predicate.termType === 'NamedNode' && this.propertyFeatures.has(subOp.predicate.value)) {
propertyFeaturesUsed = true;
}
return false;
},
},
[utils_algebra_1.Algebra.Types.LINK]: {
visitor: (subOp) => {
if (this.propertyFeatures.has(subOp.iri.value)) {
propertyFeaturesUsed = true;
}
return false;
},
},
[utils_algebra_1.Algebra.Types.NPS]: {
visitor: (subOp) => {
if (subOp.iris.some(iri => this.propertyFeatures.has(iri.value))) {
propertyFeaturesUsed = true;
}
return false;
},
},
});
}
return propertyFeaturesUsed;
}
/**
* Create an operation that includes the bindings from the given bindings stream.
* @param algebraFactory The algebra factory.
* @param bindMethod A method for adding bindings to an operation.
* @param operation The operation to bind to.
* @param addBindings The bindings to add.
* @param addBindings.bindings The bindings stream.
* @param addBindings.metadata The bindings metadata.
*/
static async addBindingsToOperation(algebraFactory, bindMethod, operation, addBindings) {
const bindings = await addBindings.bindings.toArray();
switch (bindMethod) {
case 'values':
return algebraFactory.createJoin([
algebraFactory.createValues(addBindings.metadata.variables.map(v => v.variable), bindings.map(binding => Object.fromEntries([...binding]
.map(([key, value]) => [key.value, value])))),
operation,
], false);
case 'union': {
throw new Error('Not implemented yet: "union" case');
}
case 'filter': {
throw new Error('Not implemented yet: "filter" case');
}
}
}
/**
* Convert an operation to a select query for this pattern.
* @param algebraFactory The algebra factory.
* @param {Algebra.Operation} operation A query operation.
* @param {RDF.Variable[]} variables The variables in scope for the operation.
* @return {string} A select query string.
*/
operationToSelectQuery(algebraFactory, operation, variables) {
return this.operationToQuery(algebraFactory.createProject(operation, variables));
}
/**
* Convert an operation to a count query for the number of matching triples for this pattern.
* @param dataFactory The data factory.
* @param algebraFactory The algebra factory.
* @param {Algebra.Operation} operation A query operation.
* @return {string} A count query string.
*/
operationToCountQuery(dataFactory, algebraFactory, operation) {
return this.operationToQuery(algebraFactory.createProject(algebraFactory.createExtend(algebraFactory.createGroup(operation, [], [algebraFactory.createBoundAggregate(dataFactory.variable('var0'), 'count', algebraFactory.createWildcardExpression(), false)]), dataFactory.variable('count'), algebraFactory.createTermExpression(dataFactory.variable('var0'))), [dataFactory.variable('count')]));
}
/**
* Convert an operation to a query for this pattern.
* @param {Algebra.Operation} operation A query operation.
* @return {string} A query string.
*/
async operationToQuery(operation) {
return (await this.mediatorQuerySerialize.mediate({
queryFormat: { language: 'sparql', version: '1.2' },
operation,
newlines: false,
indentWidth: 0,
context: this.context,
})).query;
}
/**
* Check if the given operation may produce undefined values.
* @param operation
*/
static getOperationUndefs(operation) {
const variables = [];
utils_algebra_1.algebraUtils.visitOperation(operation, {
[utils_algebra_1.Algebra.Types.LEFT_JOIN]: { preVisitor: (subOperation) => {
const left = utils_algebra_1.algebraUtils.inScopeVariables(subOperation.input[0]);
const right = utils_algebra_1.algebraUtils.inScopeVariables(subOperation.input[1]);
for (const varRight of right) {
if (!left.some(varLeft => varLeft.equals(varRight))) {
variables.push(varRight);
}
}
return { continue: false };
} },
[utils_algebra_1.Algebra.Types.VALUES]: { preVisitor: (values) => {
for (const variable of values.variables) {
if (values.bindings.some(bindings => !(variable.value in bindings))) {
variables.push(variable);
}
}
return { continue: false };
} },
[utils_algebra_1.Algebra.Types.UNION]: { preVisitor: (union) => {
// Determine variables in scope of the union branches that are not occurring in every branch
const scopedVariables = union.input.map(op => utils_algebra_1.algebraUtils.inScopeVariables(op));
for (const variable of (0, rdf_terms_1.uniqTerms)(scopedVariables.flat())) {
if (!scopedVariables.every(input => input.some(inputVar => inputVar.equals(variable)))) {
variables.push(variable);
}
}
return {};
} },
});
return (0, rdf_terms_1.uniqTerms)(variables);
}
/**
* Send a SPARQL query to a SPARQL endpoint and retrieve its bindings as a stream.
* @param {string} endpoint A SPARQL endpoint URL.
* @param {string} query A SPARQL query string.
* @param {RDF.Variable[]} variables The expected variables.
* @param {IActionContext} context The source context.
* @param undefVariables Variables that may have undefs.
* @return {BindingsStream} A stream of bindings.
*/
async queryBindingsRemote(endpoint, query, variables, context, undefVariables) {
// Index undef variables
const undefVariablesSet = new Set(undefVariables.map(v => v.value));
this.lastSourceContext = this.context.merge(context);
const rawStream = await this.endpointFetcher.fetchBindings(endpoint, query);
const wrapped = (0, asynciterator_1.wrap)(rawStream, { autoStart: false, maxBufferSize: Number.POSITIVE_INFINITY });
return wrapped.map((rawData) => {
const bindings = variables.map((variable) => {
const value = rawData[`?${variable.value}`];
if (!undefVariablesSet.has(variable.value) && !value) {
core_1.Actor.getContextLogger(this.context)?.warn(`The endpoint ${endpoint} failed to provide a binding for ${variable.value}.`);
}
return [variable, value];
}).filter(([_, v]) => Boolean(v));
return this.bindingsFactory.bindings(bindings);
});
}
toString() {
return `QuerySourceSparql(${this.url})`;
}
}
exports.QuerySourceSparql = QuerySourceSparql;
//# sourceMappingURL=QuerySourceSparql.js.map