UNPKG

extract-cbd-shape

Version:

Extract an entity based on CBD and a SHACL shape

302 lines 11.9 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.CbdExtracted = exports.CBDShapeExtractor = void 0; const rdf_dereference_1 = require("rdf-dereference"); const Shape_1 = require("./Shape"); const rdf_data_factory_1 = require("rdf-data-factory"); const rdf_stores_1 = require("rdf-stores"); const debug_1 = __importDefault(require("debug")); const ShapesGraph_1 = require("./ShapesGraph"); const log = (0, debug_1.default)("extract-cbd-shape"); const df = new rdf_data_factory_1.DataFactory(); class DereferenceNeeded { target; msg; constructor(target, msg) { this.target = target; this.msg = msg; } } /** * Usage: * import {ShapeExtractor} from "extract-cbd-shape"; * ... * let shapeExtractor = new ShapeExtractor(shape, dereferencer); * let entityquads = await shapeExtractor.extract(store, entity); */ class CBDShapeExtractor { dereferencer; shapesGraph; options; constructor(shapesGraphStore, dereferencer, options = {}) { // Assign with default options this.options = Object.assign({ cbdDefaultGraph: false }, options); if (!dereferencer) { this.dereferencer = rdf_dereference_1.rdfDereferencer; } else { this.dereferencer = dereferencer; } //Pre-process shape if (shapesGraphStore) { this.shapesGraph = new ShapesGraph_1.ShapesGraph(shapesGraphStore); } } async bulkExtract(store, ids, shapeId, graphsToIgnore, itemExtracted) { const out = []; const idSet = new Set(ids.map((x) => x.value)); const memberSpecificQuads = {}; for (let id of ids) { memberSpecificQuads[id.value] = []; } const newStore = rdf_stores_1.RdfStore.createDefault(); for (let quad of store.readQuads(null, null, null, null)) { if (quad.graph.termType == "NamedNode" && idSet.has(quad.graph.value)) { memberSpecificQuads[quad.graph.value].push(quad); } else { newStore.addQuad(quad); } } const promises = []; for (let id of ids) { const promise = this.extract(newStore, id, shapeId, (graphsToIgnore || []).slice()).then((quads) => { quads.push(...memberSpecificQuads[id.value]); if (itemExtracted) { itemExtracted({ subject: id, quads }); } out.push({ subject: id, quads }); }); promises.push(promise); } await Promise.all(promises); return out; } /** * Extracts: * * first level quads, * * their blank nodes with their quads (recursively), * * all quads in the namedgraph of this entity, * * all quads of required paths found in the shape * * the same algorithm on top of all found node links * @param store The RdfStore loaded with a set of initial quads * @param id The entity to be described/extracted * @param shapeId The optional SHACL NodeShape identifier * @param graphsToIgnore The optional parameter of graph to ignore when other entities are mentioned in the current context * @returns Promise of a quad array of the described entity */ async extract(store, id, shapeId, graphsToIgnore) { // First extract everything except for something within the graphs to ignore, or within the graph of the current entity, as that’s going to be added anyway later on let dontExtractFromGraph = (graphsToIgnore ? graphsToIgnore : []).map((item) => { return item.value; }); const extractInstance = new ExtractInstance(store, this.dereferencer, dontExtractFromGraph, this.options, this.shapesGraph); return await extractInstance.extract(id, false, shapeId); } } exports.CBDShapeExtractor = CBDShapeExtractor; class CbdExtracted { topology; cbdExtractedMap; constructor(topology, cbdExtracted = new Shape_1.RDFMap()) { if (topology) { this.topology = topology; } else { this.topology = { forwards: {}, backwards: {} }; } this.cbdExtractedMap = cbdExtracted; } addCBDTerm(term) { const t = this.cbdExtractedMap.get(term); if (t) { t.cbd = true; } else { this.cbdExtractedMap.set(term, { cbd: true, shape: false }); } } addShapeTerm(term) { const t = this.cbdExtractedMap.get(term); if (t) { t.shape = true; } else { this.cbdExtractedMap.set(term, { cbd: true, shape: false }); } } cbdExtracted(term) { return !!this.cbdExtractedMap.get(term)?.shape; } push(term, inverse) { if (inverse) { if (!this.topology.backwards[term.value]) { const ne = { forwards: {}, backwards: {}, }; ne.forwards[term.value] = this.topology; this.topology.backwards[term.value] = ne; } return new CbdExtracted(this.topology.backwards[term.value], this.cbdExtractedMap); } else { if (!this.topology.forwards[term.value]) { const ne = { forwards: {}, backwards: {}, }; ne.backwards[term.value] = this.topology; this.topology.forwards[term.value] = ne; } return new CbdExtracted(this.topology.forwards[term.value], this.cbdExtractedMap); } } enter(term, inverse) { const out = inverse ? this.topology.backwards[term.value] : this.topology.forwards[term.value]; if (out) { return new CbdExtracted(out, this.cbdExtractedMap); } } } exports.CbdExtracted = CbdExtracted; class ExtractInstance { dereferenced = new Set(); store; dereferencer; options; graphsToIgnore; shapesGraph; constructor(store, dereferencer, graphsToIgnore, options, shapesGraph) { this.store = store; this.dereferencer = dereferencer; this.shapesGraph = shapesGraph; this.graphsToIgnore = graphsToIgnore; this.options = options; } async extract(id, offline, shapeId) { const result = await this.maybeExtractRecursively(id, new CbdExtracted(), offline, shapeId); result.push(...this.store.getQuads(null, null, null, id)); if (result.length === 0) { if (await this.dereference(id.value)) { // retry const result = await this.maybeExtractRecursively(id, new CbdExtracted(), offline, shapeId); return result.filter((value, index, array) => { return index === array.findIndex((x) => x.equals(value)); }); } } return result.filter((value, index, array) => { return index === array.findIndex((x) => x.equals(value)); }); } async dereference(url) { if (this.dereferenced.has(url)) { log("Will not dereference " + url + " again"); return false; } this.dereferenced.add(url); await this.loadQuadStreamInStore((await this.dereferencer.dereference(url, { fetch: this.options.fetch, })).data); return true; } async maybeExtractRecursively(id, extracted, offline, shapeId) { if (extracted.cbdExtracted(id)) { return []; } extracted.addShapeTerm(id); return this.extractRecursively(id, extracted, offline, shapeId); } async extractRecursively(id, extracted, offline, shapeId) { const result = []; let shape; if (shapeId instanceof Shape_1.ShapeTemplate) { shape = shapeId; } else if (shapeId && this.shapesGraph) { shape = this.shapesGraph.shapes.get(shapeId); } if (!shape?.closed) { this.CBD(id, result, extracted, this.graphsToIgnore); } // Next, on our newly fetched data, // we’ll need to process all paths of the shape. If the shape is open, we’re going to do CBD afterwards, so let’s omit paths with only a PredicatePath when the shape is open if (!!shape) { //For all valid items in the atLeastOneLists, process the required path, optional paths and nodelinks. Do the same for the atLeastOneLists inside these options. let extraPaths = []; let extraNodeLinks = []; // Process atLeastOneLists in extraPaths and extra NodeLinks shape.fillPathsAndLinks(extraPaths, extraNodeLinks); for (let path of shape.requiredPaths.concat(shape.optionalPaths, extraPaths)) { if (!path.found(extracted) || shape.closed) { let pathQuads = path .match(this.store, extracted, id, this.graphsToIgnore) .flatMap((pathResult) => { return pathResult.path; }); result.push(...pathQuads); } } for (let nodeLink of shape.nodeLinks.concat(extraNodeLinks)) { let matches = nodeLink.pathPattern.match(this.store, extracted, id, this.graphsToIgnore); // I don't know how to do this correctly, but this is not the way for (let match of matches) { result.push(...(await this.maybeExtractRecursively(match.target, match.cbdExtracted, offline, nodeLink.link))); } } } if (!offline && id.termType === "NamedNode") { if (shape) { const problems = shape.requiredAreNotPresent(extracted); if (problems) { if (await this.dereference(id.value)) { // retry return this.extractRecursively(id, extracted, offline, shapeId); } else { log(`${id.value} does not adhere to the shape (${problems.toString()})`); } } } } return result; } /** * Performs Concise Bounded Description: extract star-shape and recurses over the blank nodes * @param result list of quads * @param extractedStar topology object to keep track of already found properties * @param store store to use for cbd * @param id starting subject * @param graphsToIgnore */ CBD(id, result, extractedStar, graphsToIgnore) { extractedStar.addCBDTerm(id); const graph = this.options.cbdDefaultGraph ? df.defaultGraph() : null; const quads = this.store.getQuads(id, null, null, graph); for (const q of quads) { // Ignore quads in the graphs to ignore if (graphsToIgnore?.includes(q.graph.value)) { continue; } result.push(q); const next = extractedStar.push(q.predicate, false); // Conditionally get more quads: if it’s a not yet extracted blank node if (q.object.termType === "BlankNode" && !extractedStar.cbdExtracted(q.object)) { this.CBD(q.object, result, next, graphsToIgnore); } } } loadQuadStreamInStore(quadStream) { return new Promise((resolve, reject) => { this.store.import(quadStream).on("end", resolve).on("error", reject); }); } } //# sourceMappingURL=CBDShapeExtractor.js.map