extract-cbd-shape
Version:
Extract an entity based on CBD and a SHACL shape
302 lines • 11.9 kB
JavaScript
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.CbdExtracted = exports.CBDShapeExtractor = void 0;
const rdf_dereference_1 = require("rdf-dereference");
const Shape_1 = require("./Shape");
const rdf_data_factory_1 = require("rdf-data-factory");
const rdf_stores_1 = require("rdf-stores");
const debug_1 = __importDefault(require("debug"));
const ShapesGraph_1 = require("./ShapesGraph");
const log = (0, debug_1.default)("extract-cbd-shape");
const df = new rdf_data_factory_1.DataFactory();
class DereferenceNeeded {
target;
msg;
constructor(target, msg) {
this.target = target;
this.msg = msg;
}
}
/**
* Usage:
* import {ShapeExtractor} from "extract-cbd-shape";
* ...
* let shapeExtractor = new ShapeExtractor(shape, dereferencer);
* let entityquads = await shapeExtractor.extract(store, entity);
*/
class CBDShapeExtractor {
dereferencer;
shapesGraph;
options;
constructor(shapesGraphStore, dereferencer, options = {}) {
// Assign with default options
this.options = Object.assign({ cbdDefaultGraph: false }, options);
if (!dereferencer) {
this.dereferencer = rdf_dereference_1.rdfDereferencer;
}
else {
this.dereferencer = dereferencer;
}
//Pre-process shape
if (shapesGraphStore) {
this.shapesGraph = new ShapesGraph_1.ShapesGraph(shapesGraphStore);
}
}
async bulkExtract(store, ids, shapeId, graphsToIgnore, itemExtracted) {
const out = [];
const idSet = new Set(ids.map((x) => x.value));
const memberSpecificQuads = {};
for (let id of ids) {
memberSpecificQuads[id.value] = [];
}
const newStore = rdf_stores_1.RdfStore.createDefault();
for (let quad of store.readQuads(null, null, null, null)) {
if (quad.graph.termType == "NamedNode" && idSet.has(quad.graph.value)) {
memberSpecificQuads[quad.graph.value].push(quad);
}
else {
newStore.addQuad(quad);
}
}
const promises = [];
for (let id of ids) {
const promise = this.extract(newStore, id, shapeId, (graphsToIgnore || []).slice()).then((quads) => {
quads.push(...memberSpecificQuads[id.value]);
if (itemExtracted) {
itemExtracted({ subject: id, quads });
}
out.push({ subject: id, quads });
});
promises.push(promise);
}
await Promise.all(promises);
return out;
}
/**
* Extracts:
* * first level quads,
* * their blank nodes with their quads (recursively),
* * all quads in the namedgraph of this entity,
* * all quads of required paths found in the shape
* * the same algorithm on top of all found node links
* @param store The RdfStore loaded with a set of initial quads
* @param id The entity to be described/extracted
* @param shapeId The optional SHACL NodeShape identifier
* @param graphsToIgnore The optional parameter of graph to ignore when other entities are mentioned in the current context
* @returns Promise of a quad array of the described entity
*/
async extract(store, id, shapeId, graphsToIgnore) {
// First extract everything except for something within the graphs to ignore, or within the graph of the current entity, as that’s going to be added anyway later on
let dontExtractFromGraph = (graphsToIgnore ? graphsToIgnore : []).map((item) => {
return item.value;
});
const extractInstance = new ExtractInstance(store, this.dereferencer, dontExtractFromGraph, this.options, this.shapesGraph);
return await extractInstance.extract(id, false, shapeId);
}
}
exports.CBDShapeExtractor = CBDShapeExtractor;
class CbdExtracted {
topology;
cbdExtractedMap;
constructor(topology, cbdExtracted = new Shape_1.RDFMap()) {
if (topology) {
this.topology = topology;
}
else {
this.topology = { forwards: {}, backwards: {} };
}
this.cbdExtractedMap = cbdExtracted;
}
addCBDTerm(term) {
const t = this.cbdExtractedMap.get(term);
if (t) {
t.cbd = true;
}
else {
this.cbdExtractedMap.set(term, { cbd: true, shape: false });
}
}
addShapeTerm(term) {
const t = this.cbdExtractedMap.get(term);
if (t) {
t.shape = true;
}
else {
this.cbdExtractedMap.set(term, { cbd: true, shape: false });
}
}
cbdExtracted(term) {
return !!this.cbdExtractedMap.get(term)?.shape;
}
push(term, inverse) {
if (inverse) {
if (!this.topology.backwards[term.value]) {
const ne = {
forwards: {},
backwards: {},
};
ne.forwards[term.value] = this.topology;
this.topology.backwards[term.value] = ne;
}
return new CbdExtracted(this.topology.backwards[term.value], this.cbdExtractedMap);
}
else {
if (!this.topology.forwards[term.value]) {
const ne = {
forwards: {},
backwards: {},
};
ne.backwards[term.value] = this.topology;
this.topology.forwards[term.value] = ne;
}
return new CbdExtracted(this.topology.forwards[term.value], this.cbdExtractedMap);
}
}
enter(term, inverse) {
const out = inverse
? this.topology.backwards[term.value]
: this.topology.forwards[term.value];
if (out) {
return new CbdExtracted(out, this.cbdExtractedMap);
}
}
}
exports.CbdExtracted = CbdExtracted;
class ExtractInstance {
dereferenced = new Set();
store;
dereferencer;
options;
graphsToIgnore;
shapesGraph;
constructor(store, dereferencer, graphsToIgnore, options, shapesGraph) {
this.store = store;
this.dereferencer = dereferencer;
this.shapesGraph = shapesGraph;
this.graphsToIgnore = graphsToIgnore;
this.options = options;
}
async extract(id, offline, shapeId) {
const result = await this.maybeExtractRecursively(id, new CbdExtracted(), offline, shapeId);
result.push(...this.store.getQuads(null, null, null, id));
if (result.length === 0) {
if (await this.dereference(id.value)) {
// retry
const result = await this.maybeExtractRecursively(id, new CbdExtracted(), offline, shapeId);
return result.filter((value, index, array) => {
return index === array.findIndex((x) => x.equals(value));
});
}
}
return result.filter((value, index, array) => {
return index === array.findIndex((x) => x.equals(value));
});
}
async dereference(url) {
if (this.dereferenced.has(url)) {
log("Will not dereference " + url + " again");
return false;
}
this.dereferenced.add(url);
await this.loadQuadStreamInStore((await this.dereferencer.dereference(url, {
fetch: this.options.fetch,
})).data);
return true;
}
async maybeExtractRecursively(id, extracted, offline, shapeId) {
if (extracted.cbdExtracted(id)) {
return [];
}
extracted.addShapeTerm(id);
return this.extractRecursively(id, extracted, offline, shapeId);
}
async extractRecursively(id, extracted, offline, shapeId) {
const result = [];
let shape;
if (shapeId instanceof Shape_1.ShapeTemplate) {
shape = shapeId;
}
else if (shapeId && this.shapesGraph) {
shape = this.shapesGraph.shapes.get(shapeId);
}
if (!shape?.closed) {
this.CBD(id, result, extracted, this.graphsToIgnore);
}
// Next, on our newly fetched data,
// we’ll need to process all paths of the shape. If the shape is open, we’re going to do CBD afterwards, so let’s omit paths with only a PredicatePath when the shape is open
if (!!shape) {
//For all valid items in the atLeastOneLists, process the required path, optional paths and nodelinks. Do the same for the atLeastOneLists inside these options.
let extraPaths = [];
let extraNodeLinks = [];
// Process atLeastOneLists in extraPaths and extra NodeLinks
shape.fillPathsAndLinks(extraPaths, extraNodeLinks);
for (let path of shape.requiredPaths.concat(shape.optionalPaths, extraPaths)) {
if (!path.found(extracted) || shape.closed) {
let pathQuads = path
.match(this.store, extracted, id, this.graphsToIgnore)
.flatMap((pathResult) => {
return pathResult.path;
});
result.push(...pathQuads);
}
}
for (let nodeLink of shape.nodeLinks.concat(extraNodeLinks)) {
let matches = nodeLink.pathPattern.match(this.store, extracted, id, this.graphsToIgnore);
// I don't know how to do this correctly, but this is not the way
for (let match of matches) {
result.push(...(await this.maybeExtractRecursively(match.target, match.cbdExtracted, offline, nodeLink.link)));
}
}
}
if (!offline && id.termType === "NamedNode") {
if (shape) {
const problems = shape.requiredAreNotPresent(extracted);
if (problems) {
if (await this.dereference(id.value)) {
// retry
return this.extractRecursively(id, extracted, offline, shapeId);
}
else {
log(`${id.value} does not adhere to the shape (${problems.toString()})`);
}
}
}
}
return result;
}
/**
* Performs Concise Bounded Description: extract star-shape and recurses over the blank nodes
* @param result list of quads
* @param extractedStar topology object to keep track of already found properties
* @param store store to use for cbd
* @param id starting subject
* @param graphsToIgnore
*/
CBD(id, result, extractedStar, graphsToIgnore) {
extractedStar.addCBDTerm(id);
const graph = this.options.cbdDefaultGraph ? df.defaultGraph() : null;
const quads = this.store.getQuads(id, null, null, graph);
for (const q of quads) {
// Ignore quads in the graphs to ignore
if (graphsToIgnore?.includes(q.graph.value)) {
continue;
}
result.push(q);
const next = extractedStar.push(q.predicate, false);
// Conditionally get more quads: if it’s a not yet extracted blank node
if (q.object.termType === "BlankNode" &&
!extractedStar.cbdExtracted(q.object)) {
this.CBD(q.object, result, next, graphsToIgnore);
}
}
}
loadQuadStreamInStore(quadStream) {
return new Promise((resolve, reject) => {
this.store.import(quadStream).on("end", resolve).on("error", reject);
});
}
}
//# sourceMappingURL=CBDShapeExtractor.js.map