UNPKG

@comunica/actor-query-source-identify-hypermedia-sparql

Version:

A sparql query-source-identify-hypermedia actor

476 lines 23.9 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.QuerySourceSparql = void 0; const context_entries_1 = require("@comunica/context-entries"); const core_1 = require("@comunica/core"); const utils_algebra_1 = require("@comunica/utils-algebra"); const utils_metadata_1 = require("@comunica/utils-metadata"); const utils_query_operation_1 = require("@comunica/utils-query-operation"); const asynciterator_1 = require("asynciterator"); const fetch_sparql_endpoint_1 = require("fetch-sparql-endpoint"); const lru_cache_1 = require("lru-cache"); const rdf_terms_1 = require("rdf-terms"); class QuerySourceSparql { referenceValue; url; urlBackup; context; mediatorHttp; mediatorQuerySerialize; bindMethod; countTimeout; cardinalityCountQueries; cardinalityEstimateConstruction; defaultGraph; unionDefaultGraph; propertyFeatures; datasets; extensionFunctions; dataFactory; algebraFactory; bindingsFactory; endpointFetcher; cache; lastSourceContext; constructor(url, urlBackup, context, mediatorHttp, mediatorQuerySerialize, bindMethod, dataFactory, algebraFactory, bindingsFactory, forceHttpGet, cacheSize, countTimeout, cardinalityCountQueries, cardinalityEstimateConstruction, forceGetIfUrlLengthBelow, parseUnsupportedVersions, metadata) { this.referenceValue = urlBackup; this.url = url; this.urlBackup = urlBackup; this.context = context; this.mediatorHttp = mediatorHttp; this.mediatorQuerySerialize = mediatorQuerySerialize; this.bindMethod = bindMethod; this.dataFactory = dataFactory; this.algebraFactory = algebraFactory; this.bindingsFactory = bindingsFactory; this.endpointFetcher = new fetch_sparql_endpoint_1.SparqlEndpointFetcher({ method: forceHttpGet ? 'GET' : 'POST', fetch: async (input, init) => { const response = await this.mediatorHttp.mediate({ input, init, context: this.lastSourceContext }); // If we encounter a 404, try our backup URL. // After retrying the request with the new URL, we replace the URL for future requests. if (response.status === 404 && this.url !== this.urlBackup) { core_1.Actor.getContextLogger(this.context)?.warn(`Encountered a 404 when requesting ${this.url} according to the service description of ${this.urlBackup}. This is a server configuration issue. Retrying the current and modifying future requests to ${this.urlBackup} instead.`); input = input.replace(this.url, this.urlBackup); this.url = this.urlBackup; return await this.mediatorHttp.mediate({ input, init, context: this.lastSourceContext }); } return response; }, prefixVariableQuestionMark: true, dataFactory, forceGetIfUrlLengthBelow, directPost: metadata.postAccepted && !metadata.postAccepted.includes('application/x-www-form-urlencoded'), parseUnsupportedVersions, }); this.cache = cacheSize > 0 ? new lru_cache_1.LRUCache({ max: cacheSize }) : undefined; this.countTimeout = countTimeout; this.cardinalityCountQueries = cardinalityCountQueries; this.cardinalityEstimateConstruction = cardinalityEstimateConstruction; this.defaultGraph = metadata.defaultGraph; this.unionDefaultGraph = metadata.unionDefaultGraph ?? false; this.datasets = metadata.datasets; this.extensionFunctions = metadata.extensionFunctions; this.propertyFeatures = metadata.propertyFeatures ? new Set(metadata.propertyFeatures) : undefined; } async getFilterFactor() { return 1; } async getSelectorShape() { const innerDisjunction = { type: 'disjunction', children: [ { type: 'operation', operation: { operationType: 'wildcard' }, joinBindings: true, }, ], }; if (this.extensionFunctions) { innerDisjunction.children.push({ type: 'operation', operation: { operationType: 'type', type: utils_algebra_1.Algebra.Types.EXPRESSION, extensionFunctions: this.extensionFunctions, }, joinBindings: true, }); } return { type: 'conjunction', children: [ innerDisjunction, { // DISTINCT CONSTRUCT is not allowed in SPARQL 1.1, so we explicitly disallowed it. type: 'negation', child: { type: 'operation', operation: { operationType: 'type', type: utils_algebra_1.Algebra.Types.DISTINCT }, children: [ { type: 'operation', operation: { operationType: 'type', type: utils_algebra_1.Algebra.Types.CONSTRUCT }, children: [ { type: 'operation', operation: { operationType: 'wildcard' }, joinBindings: true, }, ], }, ], }, }, ], }; } queryBindings(operationIn, context, options) { // If bindings are passed, modify the operations let operationPromise; if (options?.joinBindings) { operationPromise = QuerySourceSparql.addBindingsToOperation(this.algebraFactory, this.bindMethod, operationIn, options.joinBindings); } else { operationPromise = Promise.resolve(operationIn); } const bindings = new asynciterator_1.TransformIterator(async () => { // Prepare queries const operation = await operationPromise; const variables = utils_algebra_1.algebraUtils.inScopeVariables(operation); const queryString = context.get(context_entries_1.KeysInitQuery.queryString); const queryFormat = context.getSafe(context_entries_1.KeysInitQuery.queryFormat); const selectQuery = !options?.joinBindings && queryString && queryFormat.language === 'sparql' ? queryString : await this.operationToSelectQuery(this.algebraFactory, operation, variables); const undefVariables = QuerySourceSparql.getOperationUndefs(operation); return this.queryBindingsRemote(this.url, selectQuery, variables, context, undefVariables); }, { autoStart: false }); this.attachMetadata(bindings, context, operationPromise); return bindings; } queryQuads(operation, context) { const quads = (0, asynciterator_1.wrap)((async () => { this.lastSourceContext = this.context.merge(context); const query = context.get(context_entries_1.KeysInitQuery.queryString) ?? await this.operationToQuery(operation); const rawStream = await this.endpointFetcher.fetchTriples(this.url, query); return rawStream; })(), { autoStart: false, maxBufferSize: Number.POSITIVE_INFINITY }); this.attachMetadata(quads, context, Promise.resolve(operation.input)); return quads; } async queryBoolean(operation, context) { // Shortcut the ASK query to return true when supported propertyFeature predicates are used in it. if (this.operationUsesPropertyFeatures(operation)) { return true; } // Without propertyFeature overlap, perform the actual ASK query. this.lastSourceContext = this.context.merge(context); const query = context.get(context_entries_1.KeysInitQuery.queryString) ?? await this.operationToQuery(operation); const promise = this.endpointFetcher.fetchAsk(this.url, query); return promise; } async queryVoid(operation, context) { this.lastSourceContext = this.context.merge(context); const query = context.get(context_entries_1.KeysInitQuery.queryString) ?? await this.operationToQuery(operation); const promise = this.endpointFetcher.fetchUpdate(this.url, query); return promise; } attachMetadata(target, context, operationPromise) { // Emit metadata containing the estimated count let variablesCount = []; // eslint-disable-next-line no-async-promise-executor,ts/no-misused-promises new Promise(async (resolve, reject) => { try { const operation = await operationPromise; const variablesScoped = utils_algebra_1.algebraUtils.inScopeVariables(operation); const countQuery = await this.operationToNormalizedCountQuery(operation); const undefVariables = QuerySourceSparql.getOperationUndefs(operation); variablesCount = variablesScoped.map(variable => ({ variable, canBeUndef: undefVariables.some(undefVariable => undefVariable.equals(variable)), })); const cachedCardinality = this.cache?.get(countQuery); if (cachedCardinality) { return resolve(cachedCardinality); } // Attempt to estimate locally prior to sending a COUNT request, as this should be much faster. // The estimates may be off by varying amounts, so this is set behind a configuration flag. if (this.cardinalityEstimateConstruction) { const localEstimate = await this.estimateOperationCardinality(operation); if (Number.isFinite(localEstimate.value)) { this.cache?.set(countQuery, localEstimate); return resolve(localEstimate); } } // Don't send count queries if disabled. if (!this.cardinalityCountQueries) { return resolve({ type: 'estimate', value: Number.POSITIVE_INFINITY, dataset: this.url }); } const timeoutHandler = setTimeout(() => resolve({ type: 'estimate', value: Number.POSITIVE_INFINITY, dataset: this.url, }), this.countTimeout); const varCount = this.dataFactory.variable('count'); const bindingsStream = await this .queryBindingsRemote(this.url, countQuery, [varCount], context, []); bindingsStream .on('data', (bindings) => { clearTimeout(timeoutHandler); const count = bindings.get(varCount); const cardinality = { type: 'estimate', value: Number.POSITIVE_INFINITY, dataset: this.url, }; if (count) { const cardinalityValue = Number.parseInt(count.value, 10); if (!Number.isNaN(cardinalityValue)) { cardinality.type = 'exact'; cardinality.value = cardinalityValue; this.cache?.set(countQuery, cardinality); } } return resolve(cardinality); }) .on('error', () => { clearTimeout(timeoutHandler); resolve({ type: 'estimate', value: Number.POSITIVE_INFINITY, dataset: this.url }); }) .on('end', () => { clearTimeout(timeoutHandler); resolve({ type: 'estimate', value: Number.POSITIVE_INFINITY, dataset: this.url }); }); } catch (error) { reject(error); } }) .then(cardinality => target.setProperty('metadata', { state: new utils_metadata_1.MetadataValidationState(), cardinality, variables: variablesCount, })) .catch(() => target.setProperty('metadata', { state: new utils_metadata_1.MetadataValidationState(), cardinality: { type: 'estimate', value: Number.POSITIVE_INFINITY, dataset: this.url }, variables: variablesCount, })); } /** * Convert an algebra operation into a query string, and if the operation is a simple triple pattern, * then also replace any variables with s, p, and o to increase the chance of cache hits. * @param {Algebra.Operation} operation The operation to convert into a query string. * @returns {string} Query string for a COUNT query over the operation. */ async operationToNormalizedCountQuery(operation) { const normalizedOperation = (0, utils_algebra_1.isKnownOperation)(operation, utils_algebra_1.Algebra.Types.PATTERN) ? this.algebraFactory.createPattern(operation.subject.termType === 'Variable' ? this.dataFactory.variable('s') : operation.subject, operation.predicate.termType === 'Variable' ? this.dataFactory.variable('p') : operation.predicate, operation.object.termType === 'Variable' ? this.dataFactory.variable('o') : operation.object) : operation; return await this.operationToCountQuery(this.dataFactory, this.algebraFactory, normalizedOperation); } /** * Performs local cardinality estimation for the specified SPARQL algebra operation, which should * result in better estimation performance at the expense of accuracy. * @param {Algebra.Operation} operation A query operation. */ async estimateOperationCardinality(operation) { if (this.operationUsesPropertyFeatures(operation)) { return { type: 'estimate', value: 1, dataset: this.url }; } const dataset = { getCardinality: async (operation) => { const queryString = await this.operationToNormalizedCountQuery(operation); const cachedCardinality = this.cache?.get(queryString); if (cachedCardinality) { return cachedCardinality; } if (this.datasets) { const cardinalities = await Promise.all(this.datasets .filter(ds => this.unionDefaultGraph || (this.defaultGraph && ds.uri.endsWith(this.defaultGraph))) .map(ds => (0, utils_query_operation_1.estimateCardinality)(operation, ds))); const cardinality = { type: cardinalities.some(card => card.type === 'estimate') ? 'estimate' : 'exact', value: cardinalities.length > 0 ? cardinalities.reduce((acc, card) => acc + card.value, 0) : 0, dataset: this.url, }; return cardinality; } }, source: this.url, uri: this.url, }; return (0, utils_query_operation_1.estimateCardinality)(operation, dataset); } /** * Checks whether the provided operation makes use of this endpoint's property features, * if the endpoint has property features detected. * @param {Algebra.Operation} operation The operation to check. * @returns {boolean} Whether the operation makes use of property features. */ operationUsesPropertyFeatures(operation) { let propertyFeaturesUsed = false; if (this.propertyFeatures) { utils_algebra_1.algebraUtils.visitOperation(operation, { [utils_algebra_1.Algebra.Types.PATTERN]: { visitor: (subOp) => { if (subOp.predicate.termType === 'NamedNode' && this.propertyFeatures.has(subOp.predicate.value)) { propertyFeaturesUsed = true; } return false; }, }, [utils_algebra_1.Algebra.Types.LINK]: { visitor: (subOp) => { if (this.propertyFeatures.has(subOp.iri.value)) { propertyFeaturesUsed = true; } return false; }, }, [utils_algebra_1.Algebra.Types.NPS]: { visitor: (subOp) => { if (subOp.iris.some(iri => this.propertyFeatures.has(iri.value))) { propertyFeaturesUsed = true; } return false; }, }, }); } return propertyFeaturesUsed; } /** * Create an operation that includes the bindings from the given bindings stream. * @param algebraFactory The algebra factory. * @param bindMethod A method for adding bindings to an operation. * @param operation The operation to bind to. * @param addBindings The bindings to add. * @param addBindings.bindings The bindings stream. * @param addBindings.metadata The bindings metadata. */ static async addBindingsToOperation(algebraFactory, bindMethod, operation, addBindings) { const bindings = await addBindings.bindings.toArray(); switch (bindMethod) { case 'values': return algebraFactory.createJoin([ algebraFactory.createValues(addBindings.metadata.variables.map(v => v.variable), bindings.map(binding => Object.fromEntries([...binding] .map(([key, value]) => [key.value, value])))), operation, ], false); case 'union': { throw new Error('Not implemented yet: "union" case'); } case 'filter': { throw new Error('Not implemented yet: "filter" case'); } } } /** * Convert an operation to a select query for this pattern. * @param algebraFactory The algebra factory. * @param {Algebra.Operation} operation A query operation. * @param {RDF.Variable[]} variables The variables in scope for the operation. * @return {string} A select query string. */ operationToSelectQuery(algebraFactory, operation, variables) { return this.operationToQuery(algebraFactory.createProject(operation, variables)); } /** * Convert an operation to a count query for the number of matching triples for this pattern. * @param dataFactory The data factory. * @param algebraFactory The algebra factory. * @param {Algebra.Operation} operation A query operation. * @return {string} A count query string. */ operationToCountQuery(dataFactory, algebraFactory, operation) { return this.operationToQuery(algebraFactory.createProject(algebraFactory.createExtend(algebraFactory.createGroup(operation, [], [algebraFactory.createBoundAggregate(dataFactory.variable('var0'), 'count', algebraFactory.createWildcardExpression(), false)]), dataFactory.variable('count'), algebraFactory.createTermExpression(dataFactory.variable('var0'))), [dataFactory.variable('count')])); } /** * Convert an operation to a query for this pattern. * @param {Algebra.Operation} operation A query operation. * @return {string} A query string. */ async operationToQuery(operation) { return (await this.mediatorQuerySerialize.mediate({ queryFormat: { language: 'sparql', version: '1.2' }, operation, newlines: false, indentWidth: 0, context: this.context, })).query; } /** * Check if the given operation may produce undefined values. * @param operation */ static getOperationUndefs(operation) { const variables = []; utils_algebra_1.algebraUtils.visitOperation(operation, { [utils_algebra_1.Algebra.Types.LEFT_JOIN]: { preVisitor: (subOperation) => { const left = utils_algebra_1.algebraUtils.inScopeVariables(subOperation.input[0]); const right = utils_algebra_1.algebraUtils.inScopeVariables(subOperation.input[1]); for (const varRight of right) { if (!left.some(varLeft => varLeft.equals(varRight))) { variables.push(varRight); } } return { continue: false }; } }, [utils_algebra_1.Algebra.Types.VALUES]: { preVisitor: (values) => { for (const variable of values.variables) { if (values.bindings.some(bindings => !(variable.value in bindings))) { variables.push(variable); } } return { continue: false }; } }, [utils_algebra_1.Algebra.Types.UNION]: { preVisitor: (union) => { // Determine variables in scope of the union branches that are not occurring in every branch const scopedVariables = union.input.map(op => utils_algebra_1.algebraUtils.inScopeVariables(op)); for (const variable of (0, rdf_terms_1.uniqTerms)(scopedVariables.flat())) { if (!scopedVariables.every(input => input.some(inputVar => inputVar.equals(variable)))) { variables.push(variable); } } return {}; } }, }); return (0, rdf_terms_1.uniqTerms)(variables); } /** * Send a SPARQL query to a SPARQL endpoint and retrieve its bindings as a stream. * @param {string} endpoint A SPARQL endpoint URL. * @param {string} query A SPARQL query string. * @param {RDF.Variable[]} variables The expected variables. * @param {IActionContext} context The source context. * @param undefVariables Variables that may have undefs. * @return {BindingsStream} A stream of bindings. */ async queryBindingsRemote(endpoint, query, variables, context, undefVariables) { // Index undef variables const undefVariablesSet = new Set(undefVariables.map(v => v.value)); this.lastSourceContext = this.context.merge(context); const rawStream = await this.endpointFetcher.fetchBindings(endpoint, query); const wrapped = (0, asynciterator_1.wrap)(rawStream, { autoStart: false, maxBufferSize: Number.POSITIVE_INFINITY }); return wrapped.map((rawData) => { const bindings = variables.map((variable) => { const value = rawData[`?${variable.value}`]; if (!undefVariablesSet.has(variable.value) && !value) { core_1.Actor.getContextLogger(this.context)?.warn(`The endpoint ${endpoint} failed to provide a binding for ${variable.value}.`); } return [variable, value]; }).filter(([_, v]) => Boolean(v)); return this.bindingsFactory.bindings(bindings); }); } toString() { return `QuerySourceSparql(${this.url})`; } } exports.QuerySourceSparql = QuerySourceSparql; //# sourceMappingURL=QuerySourceSparql.js.map