UNPKG

rdf-stores

Version:

A TypeScript/JavaScript implementation of the RDF/JS store interface with support for quoted triples.

603 lines 28.5 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.RdfStore = void 0; const asynciterator_1 = require("asynciterator"); const rdf_data_factory_1 = require("rdf-data-factory"); const rdf_string_1 = require("rdf-string"); const rdf_terms_1 = require("rdf-terms"); const DatasetCoreWrapper_1 = require("./dataset/DatasetCoreWrapper"); const TermDictionaryNumberRecordFullTerms_1 = require("./dictionary/TermDictionaryNumberRecordFullTerms"); const TermDictionaryQuotedIndexed_1 = require("./dictionary/TermDictionaryQuotedIndexed"); const RdfStoreIndexNestedMapQuoted_1 = require("./index/RdfStoreIndexNestedMapQuoted"); const OrderUtils_1 = require("./OrderUtils"); /** * An RDF store allows quads to be stored and fetched, based on one or more customizable indexes. */ class RdfStore { constructor(options) { this.features = { quotedTripleFiltering: true, indexNodes: false, indexDistinctTerms: true }; this._size = 0; this.options = options; this.dataFactory = options.dataFactory; this.dictionary = options.dictionary; this.indexesWrapped = RdfStore.constructIndexesWrapped(options); this.indexesWrappedComponentOrders = this.indexesWrapped.map(indexThis => indexThis.componentOrder); this.indexNodes = options.indexNodes ? new Map() : undefined; this.features.indexNodes = Boolean(options.indexNodes); } /** * Create an RDF store with default settings. * Concretely, this store stores triples in GSPO, GPOS, and GOSP order, * and makes use of in-memory number dictionary encoding. * @param nodes If an index of nodes (subjects or objects) must be maintained. */ static createDefault(nodes) { return new RdfStore({ indexCombinations: RdfStore.DEFAULT_INDEX_COMBINATIONS, indexConstructor: subOptions => new RdfStoreIndexNestedMapQuoted_1.RdfStoreIndexNestedMapQuoted(subOptions), indexNodes: nodes, dictionary: new TermDictionaryQuotedIndexed_1.TermDictionaryQuotedIndexed(new TermDictionaryNumberRecordFullTerms_1.TermDictionaryNumberRecordFullTerms()), dataFactory: new rdf_data_factory_1.DataFactory(), }); } /** * Internal helper to create index objects. * @param options The RDF store options object. */ static constructIndexesWrapped(options) { const indexes = []; if (options.indexCombinations.length === 0) { throw new Error('At least one index combination is required'); } for (const componentOrder of options.indexCombinations) { if (!RdfStore.isCombinationValid(componentOrder)) { throw new Error(`Invalid index combination: ${componentOrder}`); } indexes.push({ index: options.indexConstructor(options), componentOrder, componentOrderInverse: Object.fromEntries(componentOrder.map((value, key) => [value, key])), }); } return indexes; } /** * Check if a given quad term order is valid. * @param combination A quad term order. */ static isCombinationValid(combination) { for (const quadTermName of rdf_terms_1.QUAD_TERM_NAMES) { if (!combination.includes(quadTermName)) { return false; } } return combination.length === 4; } /** * The number of quads in this store. */ get size() { return this._size; } /** * Add a quad to the store. * @param quad An RDF quad. * @return boolean If the quad was not yet present in the index. */ addQuad(quad) { const quadEncoded = [ this.dictionary.encode(quad.subject), this.dictionary.encode(quad.predicate), this.dictionary.encode(quad.object), this.dictionary.encode(quad.graph), ]; let newQuad = false; for (const indexWrapped of this.indexesWrapped) { // Before sending the quad to the index, make sure its components are ordered corresponding to the index's order. newQuad = indexWrapped.index .set((0, OrderUtils_1.orderQuadComponents)(indexWrapped.componentOrder, quadEncoded), true); } if (newQuad) { this._size++; // If we're indexing nodes, add subject and object to this index. if (this.indexNodes) { let graphIndex = this.indexNodes.get(quadEncoded[3]); if (!graphIndex) { graphIndex = new Set(); this.indexNodes.set(quadEncoded[3], graphIndex); } graphIndex.add(quadEncoded[0]); graphIndex.add(quadEncoded[2]); } return true; } return false; } /** * Remove a quad from the store. * @param quad An RDF quad. * @return boolean If the quad was present in the index. */ removeQuad(quad) { const quadEncoded = [ this.dictionary.encodeOptional(quad.subject), this.dictionary.encodeOptional(quad.predicate), this.dictionary.encodeOptional(quad.object), this.dictionary.encodeOptional(quad.graph), ]; // We can quickly return false if the quad is not present in the dictionary // eslint-disable-next-line unicorn/no-useless-undefined if (quadEncoded.includes(undefined)) { return false; } let wasPresent = false; for (const indexWrapped of this.indexesWrapped) { // Before sending the quad to the index, make sure its components are ordered corresponding to the index's order. wasPresent = indexWrapped.index .remove((0, OrderUtils_1.orderQuadComponents)(indexWrapped.componentOrder, quadEncoded)); if (!wasPresent) { break; } } if (wasPresent) { this._size--; // If we're indexing nodes, remove subject and object from this index if they are not present anymore. if (this.indexNodes) { const graphIndex = this.indexNodes.get(quadEncoded[3]); if (!this.readQuads(quad.subject, undefined, undefined, quad.graph).next().value) { graphIndex.delete(quadEncoded[0]); } if (!this.readQuads(undefined, undefined, quad.object, quad.graph).next().value) { graphIndex.delete(quadEncoded[2]); } if (graphIndex.size === 0) { this.indexNodes.delete(quadEncoded[3]); } } return true; } return false; } /** * Removes all streamed quads. * @param stream A stream of quads */ remove(stream) { stream.on('data', quad => this.removeQuad(quad)); return stream; } /** * All quads matching the pattern will be removed. * @param subject The optional subject. * @param predicate The optional predicate. * @param object The optional object. * @param graph The optional graph. */ removeMatches(subject, predicate, object, graph) { return this.remove(this.match(subject, predicate, object, graph)); } /** * Deletes the given named graph. * @param graph The graph term or string to match. */ deleteGraph(graph) { if (typeof graph === 'string') { graph = this.dataFactory.namedNode(graph); } return this.removeMatches(undefined, undefined, undefined, graph); } /** * Import the given stream of quads into the store. * @param stream A stream of RDF quads. */ import(stream) { stream.on('data', (quad) => this.addQuad(quad)); return stream; } /** * Returns a generator producing all quads matching the pattern. * @param subject The optional subject. * @param predicate The optional predicate. * @param object The optional object. * @param graph The optional graph. */ *readQuads(subject, predicate, object, graph) { // Check if our dictionary and our indexes have quoted pattern support const indexesSupportQuotedPatterns = Boolean(this.dictionary.features.quotedTriples) && Object.values(this.indexesWrapped).every(wrapped => wrapped.index.features.quotedTripleFiltering); // Construct a quad pattern array const [quadComponents, requireQuotedTripleFiltering] = (0, OrderUtils_1.quadToPattern)(subject, predicate, object, graph, indexesSupportQuotedPatterns); // Determine the best index for this pattern const indexWrapped = this.indexesWrapped[(0, OrderUtils_1.getBestIndex)(this.indexesWrappedComponentOrders, quadComponents)]; // Re-order the quad pattern based on this best index's component order const quadComponentsOrdered = (0, OrderUtils_1.orderQuadComponents)(indexWrapped.componentOrder, quadComponents); // Call the best index's find method. // eslint-disable-next-line unicorn/no-array-callback-reference for (const decomposedQuad of indexWrapped.index.find(quadComponentsOrdered)) { // De-order the resulting quad components into the normal SPOG order for quad creation. const quad = this.dataFactory.quad(decomposedQuad[indexWrapped.componentOrderInverse.subject], decomposedQuad[indexWrapped.componentOrderInverse.predicate], decomposedQuad[indexWrapped.componentOrderInverse.object], decomposedQuad[indexWrapped.componentOrderInverse.graph]); if (requireQuotedTripleFiltering) { if ((0, rdf_terms_1.matchPattern)(quad, subject, predicate, object, graph)) { yield quad; } } else { yield quad; } } } /** * Returns an array containing all quads matching the pattern. * @param subject The optional subject. * @param predicate The optional predicate. * @param object The optional object. * @param graph The optional graph. */ getQuads(subject, predicate, object, graph) { return [...this.readQuads(subject, predicate, object, graph)]; } /** * Returns a stream that produces all quads matching the pattern. * @param subject The optional subject. * @param predicate The optional predicate. * @param object The optional object. * @param graph The optional graph. */ match(subject, predicate, object, graph) { return (0, asynciterator_1.wrap)(this.readQuads(subject, predicate, object, graph)); } /** * Returns a generator producing all quads matching the pattern. * @param subject The subject, which can be a variable. * @param predicate The predicate, which can be a variable. * @param object The object, which can be a variable. * @param graph The graph, which can be a variable. */ *readBindings(bindingsFactory, subject, predicate, object, graph) { // Check if our dictionary and our indexes have quoted pattern support const indexesSupportQuotedPatterns = Boolean(this.dictionary.features.quotedTriples) && Object.values(this.indexesWrapped).every(wrapped => wrapped.index.features.quotedTripleFiltering); // Construct a quad pattern array const [quadComponents, requireQuotedTripleFiltering] = (0, OrderUtils_1.quadToPattern)(subject, predicate, object, graph, indexesSupportQuotedPatterns); // Determine the best index for this pattern const indexWrapped = this.indexesWrapped[(0, OrderUtils_1.getBestIndex)(this.indexesWrappedComponentOrders, quadComponents)]; // Re-order the quad pattern based on this best index's component order const quadComponentsOrdered = (0, OrderUtils_1.orderQuadComponents)(indexWrapped.componentOrder, quadComponents); const ids = (0, OrderUtils_1.encodeOptionalTerms)(quadComponentsOrdered, this.dictionary); // Abort if any of the terms does not exist in the dictionary if (!ids) { return; } // Collect variables to bind const terms = (0, OrderUtils_1.orderQuadComponents)(indexWrapped.componentOrder, [subject, predicate, object, graph]); const variableIndexes = []; for (let i = 0; i < terms.length; i++) { if (terms[i].termType === 'Variable' || terms[i].termType === 'Quad') { variableIndexes.push(i); } } // Check if we need to do post-filtering for overlapping variables let shouldFilterIndexes = false; const filterIndexes = terms.map((variable, i) => { const equalVariables = []; for (let j = i + 1; j < terms.length; j++) { if (variable.equals(terms[j])) { equalVariables.push(j); shouldFilterIndexes = true; } } return equalVariables; }); // Call the best index's find method. for (const decomposedQuadEncoded of indexWrapped.index .findEncoded(ids, quadComponentsOrdered)) { let skipBinding = false; let checkForBindingConflicts = false; const bindingsEntries = []; for (const i of variableIndexes) { // If we had overlapping variables, potentially exclude this binding if values for variable are unequal if (shouldFilterIndexes) { const filterI = filterIndexes[i]; for (const j of filterI) { if (decomposedQuadEncoded[i] !== decomposedQuadEncoded[j]) { skipBinding = true; break; } } if (skipBinding) { break; } } const decodedTerm = this.dictionary.decode(decomposedQuadEncoded[i]); // Handle quoted triples // TODO: it may be possible to implement a more efficient of findEncoded if requireQuotedTripleFiltering is // false that would return bindings instead of quads. The following could then be skipped. // variableIndexes would also need to be changed to check requireQuotedTripleFiltering (see readQuads). if (terms[i].termType === 'Quad') { // If the term is a quad, it may also contain nested variables, // so we need to extract those additional bindings. const additionalBindings = (0, rdf_terms_1.matchPatternMappings)(decodedTerm, terms[i], { returnMappings: true }); if (additionalBindings) { checkForBindingConflicts = true; for (const [key, value] of Object.entries(additionalBindings)) { const variable = this.dataFactory.variable(key); if (bindingsEntries.some(entry => entry[0].equals(variable) && !entry[1].equals(value))) { // Skip this binding if we find conflicting variable bindings skipBinding = true; break; } bindingsEntries.push([variable, value]); } continue; } skipBinding = true; break; } // If for the current bindings object, we previously found a quoted quad term that bound variables within it, // make sure that later bindings to this variable from other terms don't conflict. if (checkForBindingConflicts && bindingsEntries .some(entry => entry[0].equals(terms[i]) && !entry[1].equals(decodedTerm))) { // Skip this binding if we find conflicting variable bindings skipBinding = true; break; } bindingsEntries.push([terms[i], decodedTerm]); } if (!skipBinding) { // Create and yield the bindings object yield bindingsFactory.bindings(bindingsEntries); } } } /** * Returns an array containing all bindings matching the pattern. * @param bindingsFactory The factory that will be used to create bindings. * @param subject The subject, which can be a variable. * @param predicate The predicate, which can be a variable. * @param object The object, which can be a variable. * @param graph The graph, which can be a variable. */ getBindings(bindingsFactory, subject, predicate, object, graph) { return [...this.readBindings(bindingsFactory, subject, predicate, object, graph)]; } /** * Returns a stream that produces all quads matching the pattern. * @param bindingsFactory The factory that will be used to create bindings. * @param subject The subject, which can be a variable. * @param predicate The predicate, which can be a variable. * @param object The object, which can be a variable. * @param graph The graph, which can be a variable. */ matchBindings(bindingsFactory, subject, predicate, object, graph) { return (0, asynciterator_1.wrap)(this.readBindings(bindingsFactory, subject, predicate, object, graph)); } /** * Returns the number of distinct terms that exist in the store. * * @param terms An array of quad term names */ countDistinctTerms(terms) { // Determine the best index for this pattern const bestIndex = (0, OrderUtils_1.getBestIndexTerms)(this.indexesWrappedComponentOrders, terms); const indexWrapped = this.indexesWrapped[bestIndex]; // Order terms, and keep index for fast inverse ordering during decoding const termOrderInToIndex = []; for (let i = 0; i < terms.length; i++) { termOrderInToIndex[i] = indexWrapped.componentOrder.indexOf(terms[i]); } const termsOrderedUnfiltered = [undefined, undefined, undefined, undefined]; for (let i = 0; i < terms.length; i++) { termsOrderedUnfiltered[termOrderInToIndex[i]] = terms[i]; } const termsOrdered = termsOrderedUnfiltered.filter(t => t !== undefined); // Determine path of terms to follow in the index const matchTerms = (0, OrderUtils_1.getIndexMatchTermsPath)(indexWrapped.componentOrder, termsOrdered); // Ensure distinctness (this can only occur when insufficient indexes are available) if (matchTerms.includes(false)) { return this.getDistinctTerms(terms).length; } // Call the best index's count method return indexWrapped.index.countTerms(matchTerms); } /** * Returns a generator producing distinct arrays of terms that exist in the store. * Each returned array corresponds to the terms specified by given quad term names. * * For example, when requesting the terms `[ 'subject', 'predicate' ]`, * a produced array could be `[ 'ex:s', 'ex:p' ]`, * * @param terms An array of quad term names */ *readDistinctTerms(terms) { // Determine the best index for this pattern const bestIndex = (0, OrderUtils_1.getBestIndexTerms)(this.indexesWrappedComponentOrders, terms); const indexWrapped = this.indexesWrapped[bestIndex]; // Order terms, and keep index for fast inverse ordering during decoding const termOrderInToIndex = []; for (let i = 0; i < terms.length; i++) { termOrderInToIndex[i] = indexWrapped.componentOrder.indexOf(terms[i]); } const termsOrderedUnfiltered = [undefined, undefined, undefined, undefined]; for (let i = 0; i < terms.length; i++) { termsOrderedUnfiltered[termOrderInToIndex[i]] = terms[i]; } const termsOrdered = termsOrderedUnfiltered.filter(t => t !== undefined); const termOrderOrderedToIn = []; for (let i = 0; i < termsOrdered.length; i++) { termOrderOrderedToIn[i] = terms.indexOf(termsOrdered[i]); } // Determine path of terms to follow in the index const matchTerms = (0, OrderUtils_1.getIndexMatchTermsPath)(indexWrapped.componentOrder, termsOrdered); // Ensure distinctness (this can only occur when insufficient indexes are available) let distinctTerms; if (matchTerms.includes(false)) { distinctTerms = new Set(); } // Call the best index's find method for (const readTerms of indexWrapped.index.findTerms(matchTerms)) { // Inverse term ordering const readTermsInversed = []; for (let i = 0; i < readTerms.length; i++) { readTermsInversed[termOrderOrderedToIn[i]] = readTerms[i]; } // Decode terms const decodedTerms = readTermsInversed.map(t => this.dictionary.decode(t)); // Filter to ensure distinct terms are returned if (distinctTerms) { const decodedTermsId = decodedTerms.map(element => (0, rdf_string_1.termToString)(element)).join(','); if (distinctTerms.has(decodedTermsId)) { continue; } distinctTerms.add(decodedTermsId); } yield decodedTerms; } } /** * Returns an array with distinct arrays of terms that exist in the store. * Each returned array corresponds to the terms specified by given quad term names. * * For example, when requesting the terms `[ 'subject', 'predicate' ]`, * a produced array could be `[ 'ex:s', 'ex:p' ]`, * * @param terms An array of quad term names */ getDistinctTerms(terms) { return [...this.readDistinctTerms(terms)]; } /** * Returns a stream with distinct arrays of terms that exist in the store. * Each returned array corresponds to the terms specified by given quad term names. * * For example, when requesting the terms `[ 'subject', 'predicate' ]`, * a produced array could be `[ 'ex:s', 'ex:p' ]`, * * @param terms An array of quad term names */ matchDistinctTerms(terms) { return (0, asynciterator_1.wrap)(this.readDistinctTerms(terms)); } /** * Returns the number of nodes in the given graph (can be a variable). * Nodes are all terms that are either a subject or object within the store. * * This method can only be called when the store is constructed with `indexNodes: true`. */ countNodes(graph) { if (!this.indexNodes) { throw new Error(`Nodes can only be read when the store was constructed with 'indexNodes: true'`); } if (graph.termType === 'Variable') { let size = 0; for (const graphIndex of this.indexNodes.values()) { size += graphIndex.size; } return size; } const graphEncoded = this.dictionary.encodeOptional(graph); if (graphEncoded !== undefined) { return this.indexNodes.get(graphEncoded).size; } return 0; } /** * Returns a generator producing all nodes in the given graph (can be a variable). * Nodes are all terms that are either a subject or object within the store. * * This method can only be called when the store is constructed with `indexNodes: true`. * * @param graph The graph to read the nodes from, or a variable if all graphs need to be considered. * * @returns a generator of tuples containing the named graph as first element and the node term as second element. */ *readNodes(graph) { if (!this.indexNodes) { throw new Error(`Nodes can only be read when the store was constructed with 'indexNodes: true'`); } // Decode nodes of all graphs if variable, or only the given graphs. if (graph.termType === 'Variable') { for (const entry of this.indexNodes.entries()) { const graphDecoded = this.dictionary.decode(entry[0]); for (const term of entry[1]) { yield [graphDecoded, this.dictionary.decode(term)]; } } } else { const graphEncoded = this.dictionary.encodeOptional(graph); if (graphEncoded !== undefined) { const graphIndex = this.indexNodes.get(graphEncoded); for (const term of graphIndex) { yield [graph, this.dictionary.decode(term)]; } } } } /** * Returns an array containing all nodes in the given graph (can be a variable). * Nodes are all terms that are either a subject or object within the store. * * This method can only be called when the store is constructed with `indexNodes: true`. * * @param graph The graph to read the nodes from, or a variable if all graphs need to be considered. * * @returns an array of tuples containing the named graph as first element and the node term as second element. */ getNodes(graph) { return [...this.readNodes(graph)]; } /** * Returns a stream containing all nodes in the given graph (can be a variable). * Nodes are all terms that are either a subject or object within the store. * * This method can only be called when the store is constructed with `indexNodes: true`. * * @param graph The graph to read the nodes from, or a variable if all graphs need to be considered. * * @returns a stream of tuples containing the named graph as first element and the node term as second element. */ matchNodes(graph) { if (!this.indexNodes) { throw new Error(`Nodes can only be read when the store was constructed with 'indexNodes: true'`); } return (0, asynciterator_1.wrap)(this.readNodes(graph)); } /** * Returns the exact cardinality of the quads matching the pattern. * @param subject The optional subject. * @param predicate The optional predicate. * @param object The optional object. * @param graph The optional graph. */ countQuads(subject, predicate, object, graph) { // Check if our dictionary and our indexes have quoted pattern support const indexesSupportQuotedPatterns = Boolean(this.dictionary.features.quotedTriples) && Object.values(this.indexesWrapped).every(wrapped => wrapped.index.features.quotedTripleFiltering); // Construct a quad pattern array const [quadComponents] = (0, OrderUtils_1.quadToPattern)(subject, predicate, object, graph, indexesSupportQuotedPatterns); // Optimize all-variables pattern if (quadComponents.every(quadComponent => quadComponent === undefined)) { return this.size; } // Determine the best index for this pattern const indexWrapped = this.indexesWrapped[(0, OrderUtils_1.getBestIndex)(this.indexesWrappedComponentOrders, quadComponents)]; // Re-order the quad pattern based on this best index's component order const quadComponentsOrdered = (0, OrderUtils_1.orderQuadComponents)(indexWrapped.componentOrder, quadComponents); // Call the best index's count method. return indexWrapped.index.count(quadComponentsOrdered); } /** * Wrap this store inside a DatasetCore interface. * Any mutations in either this store or the wrapper will propagate to each other. */ asDataset() { return new DatasetCoreWrapper_1.DatasetCoreWrapper(this); } } exports.RdfStore = RdfStore; RdfStore.DEFAULT_INDEX_COMBINATIONS = [ ['graph', 'subject', 'predicate', 'object'], ['graph', 'predicate', 'object', 'subject'], ['graph', 'object', 'subject', 'predicate'], ]; //# sourceMappingURL=RdfStore.js.map