rdf-stores
Version:
A TypeScript/JavaScript implementation of the RDF/JS store interface with support for quoted triples.
603 lines • 28.5 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.RdfStore = void 0;
const asynciterator_1 = require("asynciterator");
const rdf_data_factory_1 = require("rdf-data-factory");
const rdf_string_1 = require("rdf-string");
const rdf_terms_1 = require("rdf-terms");
const DatasetCoreWrapper_1 = require("./dataset/DatasetCoreWrapper");
const TermDictionaryNumberRecordFullTerms_1 = require("./dictionary/TermDictionaryNumberRecordFullTerms");
const TermDictionaryQuotedIndexed_1 = require("./dictionary/TermDictionaryQuotedIndexed");
const RdfStoreIndexNestedMapQuoted_1 = require("./index/RdfStoreIndexNestedMapQuoted");
const OrderUtils_1 = require("./OrderUtils");
/**
* An RDF store allows quads to be stored and fetched, based on one or more customizable indexes.
*/
class RdfStore {
constructor(options) {
this.features = { quotedTripleFiltering: true, indexNodes: false, indexDistinctTerms: true };
this._size = 0;
this.options = options;
this.dataFactory = options.dataFactory;
this.dictionary = options.dictionary;
this.indexesWrapped = RdfStore.constructIndexesWrapped(options);
this.indexesWrappedComponentOrders = this.indexesWrapped.map(indexThis => indexThis.componentOrder);
this.indexNodes = options.indexNodes ? new Map() : undefined;
this.features.indexNodes = Boolean(options.indexNodes);
}
/**
* Create an RDF store with default settings.
* Concretely, this store stores triples in GSPO, GPOS, and GOSP order,
* and makes use of in-memory number dictionary encoding.
* @param nodes If an index of nodes (subjects or objects) must be maintained.
*/
static createDefault(nodes) {
return new RdfStore({
indexCombinations: RdfStore.DEFAULT_INDEX_COMBINATIONS,
indexConstructor: subOptions => new RdfStoreIndexNestedMapQuoted_1.RdfStoreIndexNestedMapQuoted(subOptions),
indexNodes: nodes,
dictionary: new TermDictionaryQuotedIndexed_1.TermDictionaryQuotedIndexed(new TermDictionaryNumberRecordFullTerms_1.TermDictionaryNumberRecordFullTerms()),
dataFactory: new rdf_data_factory_1.DataFactory(),
});
}
/**
* Internal helper to create index objects.
* @param options The RDF store options object.
*/
static constructIndexesWrapped(options) {
const indexes = [];
if (options.indexCombinations.length === 0) {
throw new Error('At least one index combination is required');
}
for (const componentOrder of options.indexCombinations) {
if (!RdfStore.isCombinationValid(componentOrder)) {
throw new Error(`Invalid index combination: ${componentOrder}`);
}
indexes.push({
index: options.indexConstructor(options),
componentOrder,
componentOrderInverse: Object.fromEntries(componentOrder.map((value, key) => [value, key])),
});
}
return indexes;
}
/**
* Check if a given quad term order is valid.
* @param combination A quad term order.
*/
static isCombinationValid(combination) {
for (const quadTermName of rdf_terms_1.QUAD_TERM_NAMES) {
if (!combination.includes(quadTermName)) {
return false;
}
}
return combination.length === 4;
}
/**
* The number of quads in this store.
*/
get size() {
return this._size;
}
/**
* Add a quad to the store.
* @param quad An RDF quad.
* @return boolean If the quad was not yet present in the index.
*/
addQuad(quad) {
const quadEncoded = [
this.dictionary.encode(quad.subject),
this.dictionary.encode(quad.predicate),
this.dictionary.encode(quad.object),
this.dictionary.encode(quad.graph),
];
let newQuad = false;
for (const indexWrapped of this.indexesWrapped) {
// Before sending the quad to the index, make sure its components are ordered corresponding to the index's order.
newQuad = indexWrapped.index
.set((0, OrderUtils_1.orderQuadComponents)(indexWrapped.componentOrder, quadEncoded), true);
}
if (newQuad) {
this._size++;
// If we're indexing nodes, add subject and object to this index.
if (this.indexNodes) {
let graphIndex = this.indexNodes.get(quadEncoded[3]);
if (!graphIndex) {
graphIndex = new Set();
this.indexNodes.set(quadEncoded[3], graphIndex);
}
graphIndex.add(quadEncoded[0]);
graphIndex.add(quadEncoded[2]);
}
return true;
}
return false;
}
/**
* Remove a quad from the store.
* @param quad An RDF quad.
* @return boolean If the quad was present in the index.
*/
removeQuad(quad) {
const quadEncoded = [
this.dictionary.encodeOptional(quad.subject),
this.dictionary.encodeOptional(quad.predicate),
this.dictionary.encodeOptional(quad.object),
this.dictionary.encodeOptional(quad.graph),
];
// We can quickly return false if the quad is not present in the dictionary
// eslint-disable-next-line unicorn/no-useless-undefined
if (quadEncoded.includes(undefined)) {
return false;
}
let wasPresent = false;
for (const indexWrapped of this.indexesWrapped) {
// Before sending the quad to the index, make sure its components are ordered corresponding to the index's order.
wasPresent = indexWrapped.index
.remove((0, OrderUtils_1.orderQuadComponents)(indexWrapped.componentOrder, quadEncoded));
if (!wasPresent) {
break;
}
}
if (wasPresent) {
this._size--;
// If we're indexing nodes, remove subject and object from this index if they are not present anymore.
if (this.indexNodes) {
const graphIndex = this.indexNodes.get(quadEncoded[3]);
if (!this.readQuads(quad.subject, undefined, undefined, quad.graph).next().value) {
graphIndex.delete(quadEncoded[0]);
}
if (!this.readQuads(undefined, undefined, quad.object, quad.graph).next().value) {
graphIndex.delete(quadEncoded[2]);
}
if (graphIndex.size === 0) {
this.indexNodes.delete(quadEncoded[3]);
}
}
return true;
}
return false;
}
/**
* Removes all streamed quads.
* @param stream A stream of quads
*/
remove(stream) {
stream.on('data', quad => this.removeQuad(quad));
return stream;
}
/**
* All quads matching the pattern will be removed.
* @param subject The optional subject.
* @param predicate The optional predicate.
* @param object The optional object.
* @param graph The optional graph.
*/
removeMatches(subject, predicate, object, graph) {
return this.remove(this.match(subject, predicate, object, graph));
}
/**
* Deletes the given named graph.
* @param graph The graph term or string to match.
*/
deleteGraph(graph) {
if (typeof graph === 'string') {
graph = this.dataFactory.namedNode(graph);
}
return this.removeMatches(undefined, undefined, undefined, graph);
}
/**
* Import the given stream of quads into the store.
* @param stream A stream of RDF quads.
*/
import(stream) {
stream.on('data', (quad) => this.addQuad(quad));
return stream;
}
/**
* Returns a generator producing all quads matching the pattern.
* @param subject The optional subject.
* @param predicate The optional predicate.
* @param object The optional object.
* @param graph The optional graph.
*/
*readQuads(subject, predicate, object, graph) {
// Check if our dictionary and our indexes have quoted pattern support
const indexesSupportQuotedPatterns = Boolean(this.dictionary.features.quotedTriples) &&
Object.values(this.indexesWrapped).every(wrapped => wrapped.index.features.quotedTripleFiltering);
// Construct a quad pattern array
const [quadComponents, requireQuotedTripleFiltering] = (0, OrderUtils_1.quadToPattern)(subject, predicate, object, graph, indexesSupportQuotedPatterns);
// Determine the best index for this pattern
const indexWrapped = this.indexesWrapped[(0, OrderUtils_1.getBestIndex)(this.indexesWrappedComponentOrders, quadComponents)];
// Re-order the quad pattern based on this best index's component order
const quadComponentsOrdered = (0, OrderUtils_1.orderQuadComponents)(indexWrapped.componentOrder, quadComponents);
// Call the best index's find method.
// eslint-disable-next-line unicorn/no-array-callback-reference
for (const decomposedQuad of indexWrapped.index.find(quadComponentsOrdered)) {
// De-order the resulting quad components into the normal SPOG order for quad creation.
const quad = this.dataFactory.quad(decomposedQuad[indexWrapped.componentOrderInverse.subject], decomposedQuad[indexWrapped.componentOrderInverse.predicate], decomposedQuad[indexWrapped.componentOrderInverse.object], decomposedQuad[indexWrapped.componentOrderInverse.graph]);
if (requireQuotedTripleFiltering) {
if ((0, rdf_terms_1.matchPattern)(quad, subject, predicate, object, graph)) {
yield quad;
}
}
else {
yield quad;
}
}
}
/**
* Returns an array containing all quads matching the pattern.
* @param subject The optional subject.
* @param predicate The optional predicate.
* @param object The optional object.
* @param graph The optional graph.
*/
getQuads(subject, predicate, object, graph) {
return [...this.readQuads(subject, predicate, object, graph)];
}
/**
* Returns a stream that produces all quads matching the pattern.
* @param subject The optional subject.
* @param predicate The optional predicate.
* @param object The optional object.
* @param graph The optional graph.
*/
match(subject, predicate, object, graph) {
return (0, asynciterator_1.wrap)(this.readQuads(subject, predicate, object, graph));
}
/**
* Returns a generator producing all quads matching the pattern.
* @param subject The subject, which can be a variable.
* @param predicate The predicate, which can be a variable.
* @param object The object, which can be a variable.
* @param graph The graph, which can be a variable.
*/
*readBindings(bindingsFactory, subject, predicate, object, graph) {
// Check if our dictionary and our indexes have quoted pattern support
const indexesSupportQuotedPatterns = Boolean(this.dictionary.features.quotedTriples) &&
Object.values(this.indexesWrapped).every(wrapped => wrapped.index.features.quotedTripleFiltering);
// Construct a quad pattern array
const [quadComponents, requireQuotedTripleFiltering] = (0, OrderUtils_1.quadToPattern)(subject, predicate, object, graph, indexesSupportQuotedPatterns);
// Determine the best index for this pattern
const indexWrapped = this.indexesWrapped[(0, OrderUtils_1.getBestIndex)(this.indexesWrappedComponentOrders, quadComponents)];
// Re-order the quad pattern based on this best index's component order
const quadComponentsOrdered = (0, OrderUtils_1.orderQuadComponents)(indexWrapped.componentOrder, quadComponents);
const ids = (0, OrderUtils_1.encodeOptionalTerms)(quadComponentsOrdered, this.dictionary);
// Abort if any of the terms does not exist in the dictionary
if (!ids) {
return;
}
// Collect variables to bind
const terms = (0, OrderUtils_1.orderQuadComponents)(indexWrapped.componentOrder, [subject, predicate, object, graph]);
const variableIndexes = [];
for (let i = 0; i < terms.length; i++) {
if (terms[i].termType === 'Variable' || terms[i].termType === 'Quad') {
variableIndexes.push(i);
}
}
// Check if we need to do post-filtering for overlapping variables
let shouldFilterIndexes = false;
const filterIndexes = terms.map((variable, i) => {
const equalVariables = [];
for (let j = i + 1; j < terms.length; j++) {
if (variable.equals(terms[j])) {
equalVariables.push(j);
shouldFilterIndexes = true;
}
}
return equalVariables;
});
// Call the best index's find method.
for (const decomposedQuadEncoded of indexWrapped.index
.findEncoded(ids, quadComponentsOrdered)) {
let skipBinding = false;
let checkForBindingConflicts = false;
const bindingsEntries = [];
for (const i of variableIndexes) {
// If we had overlapping variables, potentially exclude this binding if values for variable are unequal
if (shouldFilterIndexes) {
const filterI = filterIndexes[i];
for (const j of filterI) {
if (decomposedQuadEncoded[i] !== decomposedQuadEncoded[j]) {
skipBinding = true;
break;
}
}
if (skipBinding) {
break;
}
}
const decodedTerm = this.dictionary.decode(decomposedQuadEncoded[i]);
// Handle quoted triples
// TODO: it may be possible to implement a more efficient of findEncoded if requireQuotedTripleFiltering is
// false that would return bindings instead of quads. The following could then be skipped.
// variableIndexes would also need to be changed to check requireQuotedTripleFiltering (see readQuads).
if (terms[i].termType === 'Quad') {
// If the term is a quad, it may also contain nested variables,
// so we need to extract those additional bindings.
const additionalBindings = (0, rdf_terms_1.matchPatternMappings)(decodedTerm, terms[i], { returnMappings: true });
if (additionalBindings) {
checkForBindingConflicts = true;
for (const [key, value] of Object.entries(additionalBindings)) {
const variable = this.dataFactory.variable(key);
if (bindingsEntries.some(entry => entry[0].equals(variable) && !entry[1].equals(value))) {
// Skip this binding if we find conflicting variable bindings
skipBinding = true;
break;
}
bindingsEntries.push([variable, value]);
}
continue;
}
skipBinding = true;
break;
}
// If for the current bindings object, we previously found a quoted quad term that bound variables within it,
// make sure that later bindings to this variable from other terms don't conflict.
if (checkForBindingConflicts && bindingsEntries
.some(entry => entry[0].equals(terms[i]) && !entry[1].equals(decodedTerm))) {
// Skip this binding if we find conflicting variable bindings
skipBinding = true;
break;
}
bindingsEntries.push([terms[i], decodedTerm]);
}
if (!skipBinding) {
// Create and yield the bindings object
yield bindingsFactory.bindings(bindingsEntries);
}
}
}
/**
* Returns an array containing all bindings matching the pattern.
* @param bindingsFactory The factory that will be used to create bindings.
* @param subject The subject, which can be a variable.
* @param predicate The predicate, which can be a variable.
* @param object The object, which can be a variable.
* @param graph The graph, which can be a variable.
*/
getBindings(bindingsFactory, subject, predicate, object, graph) {
return [...this.readBindings(bindingsFactory, subject, predicate, object, graph)];
}
/**
* Returns a stream that produces all quads matching the pattern.
* @param bindingsFactory The factory that will be used to create bindings.
* @param subject The subject, which can be a variable.
* @param predicate The predicate, which can be a variable.
* @param object The object, which can be a variable.
* @param graph The graph, which can be a variable.
*/
matchBindings(bindingsFactory, subject, predicate, object, graph) {
return (0, asynciterator_1.wrap)(this.readBindings(bindingsFactory, subject, predicate, object, graph));
}
/**
* Returns the number of distinct terms that exist in the store.
*
* @param terms An array of quad term names
*/
countDistinctTerms(terms) {
// Determine the best index for this pattern
const bestIndex = (0, OrderUtils_1.getBestIndexTerms)(this.indexesWrappedComponentOrders, terms);
const indexWrapped = this.indexesWrapped[bestIndex];
// Order terms, and keep index for fast inverse ordering during decoding
const termOrderInToIndex = [];
for (let i = 0; i < terms.length; i++) {
termOrderInToIndex[i] = indexWrapped.componentOrder.indexOf(terms[i]);
}
const termsOrderedUnfiltered = [undefined, undefined, undefined, undefined];
for (let i = 0; i < terms.length; i++) {
termsOrderedUnfiltered[termOrderInToIndex[i]] = terms[i];
}
const termsOrdered = termsOrderedUnfiltered.filter(t => t !== undefined);
// Determine path of terms to follow in the index
const matchTerms = (0, OrderUtils_1.getIndexMatchTermsPath)(indexWrapped.componentOrder, termsOrdered);
// Ensure distinctness (this can only occur when insufficient indexes are available)
if (matchTerms.includes(false)) {
return this.getDistinctTerms(terms).length;
}
// Call the best index's count method
return indexWrapped.index.countTerms(matchTerms);
}
/**
* Returns a generator producing distinct arrays of terms that exist in the store.
* Each returned array corresponds to the terms specified by given quad term names.
*
* For example, when requesting the terms `[ 'subject', 'predicate' ]`,
* a produced array could be `[ 'ex:s', 'ex:p' ]`,
*
* @param terms An array of quad term names
*/
*readDistinctTerms(terms) {
// Determine the best index for this pattern
const bestIndex = (0, OrderUtils_1.getBestIndexTerms)(this.indexesWrappedComponentOrders, terms);
const indexWrapped = this.indexesWrapped[bestIndex];
// Order terms, and keep index for fast inverse ordering during decoding
const termOrderInToIndex = [];
for (let i = 0; i < terms.length; i++) {
termOrderInToIndex[i] = indexWrapped.componentOrder.indexOf(terms[i]);
}
const termsOrderedUnfiltered = [undefined, undefined, undefined, undefined];
for (let i = 0; i < terms.length; i++) {
termsOrderedUnfiltered[termOrderInToIndex[i]] = terms[i];
}
const termsOrdered = termsOrderedUnfiltered.filter(t => t !== undefined);
const termOrderOrderedToIn = [];
for (let i = 0; i < termsOrdered.length; i++) {
termOrderOrderedToIn[i] = terms.indexOf(termsOrdered[i]);
}
// Determine path of terms to follow in the index
const matchTerms = (0, OrderUtils_1.getIndexMatchTermsPath)(indexWrapped.componentOrder, termsOrdered);
// Ensure distinctness (this can only occur when insufficient indexes are available)
let distinctTerms;
if (matchTerms.includes(false)) {
distinctTerms = new Set();
}
// Call the best index's find method
for (const readTerms of indexWrapped.index.findTerms(matchTerms)) {
// Inverse term ordering
const readTermsInversed = [];
for (let i = 0; i < readTerms.length; i++) {
readTermsInversed[termOrderOrderedToIn[i]] = readTerms[i];
}
// Decode terms
const decodedTerms = readTermsInversed.map(t => this.dictionary.decode(t));
// Filter to ensure distinct terms are returned
if (distinctTerms) {
const decodedTermsId = decodedTerms.map(element => (0, rdf_string_1.termToString)(element)).join(',');
if (distinctTerms.has(decodedTermsId)) {
continue;
}
distinctTerms.add(decodedTermsId);
}
yield decodedTerms;
}
}
/**
* Returns an array with distinct arrays of terms that exist in the store.
* Each returned array corresponds to the terms specified by given quad term names.
*
* For example, when requesting the terms `[ 'subject', 'predicate' ]`,
* a produced array could be `[ 'ex:s', 'ex:p' ]`,
*
* @param terms An array of quad term names
*/
getDistinctTerms(terms) {
return [...this.readDistinctTerms(terms)];
}
/**
* Returns a stream with distinct arrays of terms that exist in the store.
* Each returned array corresponds to the terms specified by given quad term names.
*
* For example, when requesting the terms `[ 'subject', 'predicate' ]`,
* a produced array could be `[ 'ex:s', 'ex:p' ]`,
*
* @param terms An array of quad term names
*/
matchDistinctTerms(terms) {
return (0, asynciterator_1.wrap)(this.readDistinctTerms(terms));
}
/**
* Returns the number of nodes in the given graph (can be a variable).
* Nodes are all terms that are either a subject or object within the store.
*
* This method can only be called when the store is constructed with `indexNodes: true`.
*/
countNodes(graph) {
if (!this.indexNodes) {
throw new Error(`Nodes can only be read when the store was constructed with 'indexNodes: true'`);
}
if (graph.termType === 'Variable') {
let size = 0;
for (const graphIndex of this.indexNodes.values()) {
size += graphIndex.size;
}
return size;
}
const graphEncoded = this.dictionary.encodeOptional(graph);
if (graphEncoded !== undefined) {
return this.indexNodes.get(graphEncoded).size;
}
return 0;
}
/**
* Returns a generator producing all nodes in the given graph (can be a variable).
* Nodes are all terms that are either a subject or object within the store.
*
* This method can only be called when the store is constructed with `indexNodes: true`.
*
* @param graph The graph to read the nodes from, or a variable if all graphs need to be considered.
*
* @returns a generator of tuples containing the named graph as first element and the node term as second element.
*/
*readNodes(graph) {
if (!this.indexNodes) {
throw new Error(`Nodes can only be read when the store was constructed with 'indexNodes: true'`);
}
// Decode nodes of all graphs if variable, or only the given graphs.
if (graph.termType === 'Variable') {
for (const entry of this.indexNodes.entries()) {
const graphDecoded = this.dictionary.decode(entry[0]);
for (const term of entry[1]) {
yield [graphDecoded, this.dictionary.decode(term)];
}
}
}
else {
const graphEncoded = this.dictionary.encodeOptional(graph);
if (graphEncoded !== undefined) {
const graphIndex = this.indexNodes.get(graphEncoded);
for (const term of graphIndex) {
yield [graph, this.dictionary.decode(term)];
}
}
}
}
/**
* Returns an array containing all nodes in the given graph (can be a variable).
* Nodes are all terms that are either a subject or object within the store.
*
* This method can only be called when the store is constructed with `indexNodes: true`.
*
* @param graph The graph to read the nodes from, or a variable if all graphs need to be considered.
*
* @returns an array of tuples containing the named graph as first element and the node term as second element.
*/
getNodes(graph) {
return [...this.readNodes(graph)];
}
/**
* Returns a stream containing all nodes in the given graph (can be a variable).
* Nodes are all terms that are either a subject or object within the store.
*
* This method can only be called when the store is constructed with `indexNodes: true`.
*
* @param graph The graph to read the nodes from, or a variable if all graphs need to be considered.
*
* @returns a stream of tuples containing the named graph as first element and the node term as second element.
*/
matchNodes(graph) {
if (!this.indexNodes) {
throw new Error(`Nodes can only be read when the store was constructed with 'indexNodes: true'`);
}
return (0, asynciterator_1.wrap)(this.readNodes(graph));
}
/**
* Returns the exact cardinality of the quads matching the pattern.
* @param subject The optional subject.
* @param predicate The optional predicate.
* @param object The optional object.
* @param graph The optional graph.
*/
countQuads(subject, predicate, object, graph) {
// Check if our dictionary and our indexes have quoted pattern support
const indexesSupportQuotedPatterns = Boolean(this.dictionary.features.quotedTriples) &&
Object.values(this.indexesWrapped).every(wrapped => wrapped.index.features.quotedTripleFiltering);
// Construct a quad pattern array
const [quadComponents] = (0, OrderUtils_1.quadToPattern)(subject, predicate, object, graph, indexesSupportQuotedPatterns);
// Optimize all-variables pattern
if (quadComponents.every(quadComponent => quadComponent === undefined)) {
return this.size;
}
// Determine the best index for this pattern
const indexWrapped = this.indexesWrapped[(0, OrderUtils_1.getBestIndex)(this.indexesWrappedComponentOrders, quadComponents)];
// Re-order the quad pattern based on this best index's component order
const quadComponentsOrdered = (0, OrderUtils_1.orderQuadComponents)(indexWrapped.componentOrder, quadComponents);
// Call the best index's count method.
return indexWrapped.index.count(quadComponentsOrdered);
}
/**
* Wrap this store inside a DatasetCore interface.
* Any mutations in either this store or the wrapper will propagate to each other.
*/
asDataset() {
return new DatasetCoreWrapper_1.DatasetCoreWrapper(this);
}
}
exports.RdfStore = RdfStore;
RdfStore.DEFAULT_INDEX_COMBINATIONS = [
['graph', 'subject', 'predicate', 'object'],
['graph', 'predicate', 'object', 'subject'],
['graph', 'object', 'subject', 'predicate'],
];
//# sourceMappingURL=RdfStore.js.map