extract-cbd-shape
Version:
Extract an entity based on CBD and a SHACL shape
373 lines • 17.4 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.ShapesGraph = void 0;
const Path_1 = require("./Path");
const types_1 = require("@treecg/types");
const Shape_1 = require("./Shape");
const rdf_data_factory_1 = require("rdf-data-factory");
const df = new rdf_data_factory_1.DataFactory();
const SHACL = (0, types_1.createTermNamespace)("http://www.w3.org/ns/shacl#", "zeroOrMorePath", "zeroOrOnePath", "oneOrMorePath", "inversePath", "alternativePath", "deactivated", "minCount", "path", "node", "closed", "property", "and", "xone", "or", "targetClass", "datatype", "NodeShape");
class ShapesGraph {
shapes;
counter;
constructor(shapeStore) {
this.shapes = this.initializeFromStore(shapeStore);
this.counter = 0;
}
/**
* This function returns a Mermaid representation of a shape identified by a given term.
* @param term {Term} - The term of the Shape that is the start of the representation.
*/
toMermaid(term) {
const startShape = this.shapes.get(term);
this.counter = 0;
if (!startShape) {
throw new Error(`No shape found for term "${term.value}"`);
}
let mermaid = 'flowchart LR\n';
mermaid += this.toMermaidSingleShape(startShape, '1', startShape.label || 'Shape');
return mermaid;
}
/**
* This function returns a Mermaid representation of a given shape.
* @param shape - The shape for which to generate a representation.
* @param id - The ID to identify the shape in the representation.
* @param name - The name used for the shape in the representation.
* @private
*/
toMermaidSingleShape(shape, id, name) {
let mermaid = ` S${id}((${name}))\n`;
let alreadyProcessedPaths = [];
shape.nodeLinks.forEach(nodeLink => {
let p = nodeLink.pathPattern.toString();
const isPathRequired = this.isPathRequired(p, shape.requiredPaths);
alreadyProcessedPaths.push(p);
p = this.clean(p);
const linkedShape = this.shapes.get(nodeLink.link);
if (!linkedShape) {
throw new Error(`The linked shape "${nodeLink.link}" is not found`);
}
const linkedShapeId = `${id}_${this.counter}`;
let link = '-->';
if (!isPathRequired) {
link = '-.->';
}
if (p.startsWith('^')) {
p = p.substring(1);
mermaid += ` S${linkedShapeId}[ ]${link}|"${p}"|S${id}\n`;
}
else {
mermaid += ` S${id}${link}|"${p}"|S${linkedShapeId}[ ]\n`;
}
this.counter++;
const linkedShapeMermaid = this.toMermaidSingleShape(linkedShape, linkedShapeId, linkedShape.label || 'Shape');
mermaid += linkedShapeMermaid;
});
shape.atLeastOneLists.forEach(list => {
if (list.length > 0) {
const xId = `${id}_${this.counter}`;
mermaid += ` S${id}---X${xId}{OR}\n`;
list.forEach(shape => {
const shapeId = `${id}_${this.counter}`;
this.counter++;
mermaid += ` X${xId}---S${shapeId}\n`;
const linkedShapeMermaid = this.toMermaidSingleShape(shape, shapeId, shape.label || 'Shape');
mermaid += linkedShapeMermaid;
});
}
});
mermaid += this.simplePathToMermaid(shape.requiredPaths, alreadyProcessedPaths, id, '-->');
mermaid += this.simplePathToMermaid(shape.optionalPaths, alreadyProcessedPaths, id, '-.->');
return mermaid;
}
/**
* This function removes < and > from a label.
* It also adds the invisible character after 'http(s):' and after 'www' to avoid
* the path being interpreted as a link. See https://github.com/orgs/community/discussions/106690.
* @param path - The path from which to remove the < and >.
* @private
*/
clean(path) {
return path.replace(/</g, '')
.replace(/http:/g, 'http:')
.replace(/https:/g, 'https:')
.replace(/www/g, 'www')
.replace(/>/g, '');
}
/**
* This function returns true if the given path is required.
* @param path - The path that needs to be checked.
* @param requiredPaths - An array of all required paths.
* @private
*/
isPathRequired(path, requiredPaths) {
for (const requiredPath of requiredPaths) {
if (path === requiredPath.toString()) {
return true;
}
}
return false;
}
/**
* This function returns a Mermaid presentation for an array of simple paths.
* This function is intended to be used with shape.requiredPaths and shape.optionalPaths.
* @param paths - An array of paths.
* @param alreadyProcessedPaths - An array of stringified paths that already have been processed.
* @param shapedId - The id of the shape to which these paths belong.
* @param link - The Mermaid link that needs to be used.
* @private
*/
simplePathToMermaid(paths, alreadyProcessedPaths, shapedId, link) {
let mermaid = '';
paths.forEach(path => {
const literalType = path.literalType ? this.clean(path.literalType.value) : null;
let p = path.toString();
if (alreadyProcessedPaths.includes(p)) {
return;
}
alreadyProcessedPaths.push(p);
p = this.clean(p);
if (this.isRealInversePath(p)) {
p = this.getRealPath(p);
mermaid += ` S${shapedId}_${this.counter}[${literalType || " "}]${link}|"${p}"|S${shapedId}\n`;
}
else {
p = this.getRealPath(p);
mermaid += ` S${shapedId}${link}|"${p}"|S${shapedId}_${this.counter}[${literalType || " "}]\n`;
}
this.counter++;
});
return mermaid;
}
/**
* This function returns true if a given path is real inverse path.
* This means that the path is not a double, quadruple, ... inverse path.
* @param path - The path that needs to be checked.
* @private
*/
isRealInversePath(path) {
const found = path.match(/^(\^+)[^\^]+/);
if (!found) {
return false;
}
return found[1].length % 2 !== 0;
}
/**
* This function removes all the ^ from the path.
* @param path - The path from which to remove the ^.
* @private
*/
getRealPath(path) {
const found = path.match(/^\^*([^\^]+)/);
if (!found) {
throw new Error(`No real path found in "${path}"`);
}
return found[1];
}
constructPathPattern(shapeStore, listItem, literalType) {
if (listItem.termType === "BlankNode") {
//Look for special types
let zeroOrMorePathObjects = getObjects(shapeStore, listItem, SHACL.zeroOrMorePath, null);
let oneOrMorePathObjects = getObjects(shapeStore, listItem, SHACL.oneOrMorePath, null);
let zeroOrOnePathObjects = getObjects(shapeStore, listItem, SHACL.zeroOrOnePath, null);
let inversePathObjects = getObjects(shapeStore, listItem, SHACL.inversePath, null);
let alternativePathObjects = getObjects(shapeStore, listItem, SHACL.alternativePath, null);
if (zeroOrMorePathObjects[0]) {
return new Path_1.ZeroOrMorePath(this.constructPathPattern(shapeStore, zeroOrMorePathObjects[0], literalType));
}
else if (oneOrMorePathObjects[0]) {
return new Path_1.OneOrMorePath(this.constructPathPattern(shapeStore, oneOrMorePathObjects[0], literalType));
}
else if (zeroOrOnePathObjects[0]) {
return new Path_1.ZeroOrOnePath(this.constructPathPattern(shapeStore, zeroOrOnePathObjects[0], literalType));
}
else if (inversePathObjects[0]) {
return new Path_1.InversePath(this.constructPathPattern(shapeStore, inversePathObjects[0], literalType));
}
else if (alternativePathObjects[0]) {
let alternativeListArray = this.rdfListToArray(shapeStore, alternativePathObjects[0]).map((value) => {
return this.constructPathPattern(shapeStore, value, literalType);
});
return new Path_1.AlternativePath(alternativeListArray);
}
else {
const items = this.rdfListToArray(shapeStore, listItem);
return new Path_1.SequencePath(items.map((x) => this.constructPathPattern(shapeStore, x, literalType)));
}
}
return new Path_1.PredicatePath(listItem, literalType);
}
/**
* @param shapeStore
* @param propertyShapeId
* @param shape
* @param required
* @returns false if it wasn't a property shape
*/
preprocessPropertyShape(shapeStore, propertyShapeId, shape, required) {
//Skip if shape has been deactivated
let deactivated = getObjects(shapeStore, propertyShapeId, SHACL.deactivated, null);
if (deactivated.length > 0 && deactivated[0].value === "true") {
return true; //Success: doesn't matter what kind of thing it was, it's deactivated so let's just proceed
}
// Check if sh:datatype is defined
const literalType = getObjects(shapeStore, propertyShapeId, SHACL.datatype, null)[0];
let path = getObjects(shapeStore, propertyShapeId, SHACL.path, null)[0];
//Process the path now and make sure there's a match function
if (!path) {
return false; //this isn't a property shape...
}
let pathPattern = this.constructPathPattern(shapeStore, path, literalType);
let minCount = getObjects(shapeStore, propertyShapeId, SHACL.minCount, null);
if ((minCount[0] && minCount[0].value !== "0") || required) {
shape.requiredPaths.push(pathPattern);
}
else {
//TODO: don't include node links?
shape.optionalPaths.push(pathPattern);
}
// **TODO**: will the sh:or, sh:xone, sh:and, etc. be of use here? It won't contain any more information about possible properties?
// Maybe to potentially point to another node, xone a datatype?
// Does it link to a literal or to a new node?
let nodeLink = getObjects(shapeStore, propertyShapeId, SHACL.node, null);
if (nodeLink[0]) {
shape.nodeLinks.push(new Shape_1.NodeLink(pathPattern, nodeLink[0]));
}
//TODO: Can Nodelinks appear in conditionals from here? Probably they can? (same comment as ↑)
return true; // Success: the property shape has been processed
}
/**
* Processes a NodeShape or PropertyShape and adds NodeLinks and required properties to the arrays.
* @param shapeStore
* @param shapeId
* @param shape
* @returns
*/
preprocessShape(shapeStore, shapeId, shape) {
return this.preprocessPropertyShape(shapeStore, shapeId, shape)
? true
: this.preprocessNodeShape(shapeStore, shapeId, shape);
}
/**
* Processes a NodeShape
* @param shapeStore
* @param nodeShapeId
* @param shape
*/
preprocessNodeShape(shapeStore, nodeShapeId, shape) {
// Extract label following this strategy:
// first look for rdfs:label
// fallback to sh:targetClass (if any)
// fallback to last part of the node shape ID or the ID itself if it's a blank node
const rdfsLabel = getObjects(shapeStore, nodeShapeId, types_1.RDFS.terms.label)[0];
if (rdfsLabel) {
shape.label = rdfsLabel.value;
}
else {
const targetClass = getObjects(shapeStore, nodeShapeId, SHACL.targetClass, null)[0];
if (targetClass) {
// Make sure that IRIs are visible as node labels in mermaid diagrams
shape.label = this.clean(targetClass.value);
}
else {
shape.label = nodeShapeId.termType === "BlankNode" ?
nodeShapeId.value :
nodeShapeId.value.split("/")[nodeShapeId.value.split("/").length - 1];
}
}
//Check if it's closed or open
let closedIndicator = getObjects(shapeStore, nodeShapeId, SHACL.closed, null)[0];
if (closedIndicator && closedIndicator.value === "true") {
shape.closed = true;
}
//Process properties if it has any
let properties = getObjects(shapeStore, nodeShapeId, SHACL.property, null);
for (let prop of properties) {
this.preprocessPropertyShape(shapeStore, prop, shape);
}
// process sh:and: just add all IDs to this array
// Process everything you can find nested in AND clauses
for (let andList of getObjects(shapeStore, nodeShapeId, SHACL.and, null)) {
// Try to process it as a property shape
//for every andList found, iterate through it and try to preprocess the property shape
for (let and of this.rdfListToArray(shapeStore, andList)) {
this.preprocessShape(shapeStore, and, shape);
}
}
//Process zero or more sh:xone and sh:or lists in the same way -- explanation in README why they can be handled in the same way
for (let xoneOrOrList of getObjects(shapeStore, nodeShapeId, SHACL.xone, null).concat(getObjects(shapeStore, nodeShapeId, SHACL.or, null))) {
let atLeastOneList = this.rdfListToArray(shapeStore, xoneOrOrList).map((val) => {
let newShape = new Shape_1.ShapeTemplate();
//Create a new shape and process as usual -- but mind that we don't trigger a circular shape here...
this.preprocessShape(shapeStore, val, newShape);
return newShape;
//Add this one to the shapesgraph
});
shape.atLeastOneLists.push(atLeastOneList);
}
//And finally, we're just ignoring sh:not. Don't process this one
}
/**
* @param shapeStore
*/
initializeFromStore(shapeStore) {
//get all named nodes of entities that are sh:NodeShapes which we'll recognize through their use of sh:property (we'll find other relevant shape nodes later on)
//TODO: This is a limitation though: we only support NodeShapes with at least one sh:property set? Other NodeShapes in this context are otherwise just meaningless?
const shapeNodes = []
.concat(getSubjects(shapeStore, SHACL.property, null, null))
.concat(getSubjects(shapeStore, types_1.RDF.terms.type, SHACL.NodeShape, null))
.concat(getObjects(shapeStore, null, SHACL.node, null))
//DISTINCT
.filter((value, index, array) => {
return array.findIndex((x) => x.equals(value)) === index;
});
let shapes = new Shape_1.RDFMap();
for (let shapeId of shapeNodes) {
let shape = new Shape_1.ShapeTemplate();
//Don't process if shape is deactivated
let deactivated = getObjects(shapeStore, shapeId, SHACL.deactivated, null);
if (!(deactivated.length > 0 && deactivated[0].value === "true")) {
this.preprocessNodeShape(shapeStore, shapeId, shape);
shapes.set(shapeId, shape);
}
}
return shapes;
}
/**
* Processes all element from an RDF List, or detects it wasn't a list after all and it's just one element.
* @param shapeStore
* @param item
* @returns
*/
*rdfListToGenerator(shapeStore, item) {
if (getObjects(shapeStore, item, df.namedNode("http://www.w3.org/1999/02/22-rdf-syntax-ns#first"), null)[0]) {
yield getObjects(shapeStore, item, df.namedNode("http://www.w3.org/1999/02/22-rdf-syntax-ns#first"), null)[0];
let rest = getObjects(shapeStore, item, df.namedNode("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"), null)[0];
while (rest &&
rest.value !== "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil") {
yield getObjects(shapeStore, rest, df.namedNode("http://www.w3.org/1999/02/22-rdf-syntax-ns#first"), null)[0];
rest = getObjects(shapeStore, rest, df.namedNode("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"), null)[0];
}
}
else {
// It's not a list. It's just one element.
yield item;
}
return;
}
rdfListToArray(shapeStore, item) {
return Array.from(this.rdfListToGenerator(shapeStore, item));
}
}
exports.ShapesGraph = ShapesGraph;
const getSubjects = function (store, predicate, object, graph) {
return store.getQuads(null, predicate, object, graph).map((quad) => {
return quad.subject;
});
};
const getObjects = function (store, subject, predicate, graph) {
return store.getQuads(subject, predicate, null, graph).map((quad) => {
return quad.object;
});
};
//# sourceMappingURL=ShapesGraph.js.map