UNPKG

extract-cbd-shape

Version:

Extract an entity based on CBD and a SHACL shape

572 lines (514 loc) 17.5 kB
import { RdfStore } from "rdf-stores"; import { Term } from "@rdfjs/types"; import { AlternativePath, InversePath, OneOrMorePath, Path, PredicatePath, SequencePath, ZeroOrMorePath, ZeroOrOnePath } from "./Path"; import { createTermNamespace, RDF, RDFS } from "@treecg/types"; import { NodeLink, RDFMap, ShapeTemplate } from "./Shape"; import { DataFactory } from "rdf-data-factory"; const df = new DataFactory(); const SHACL = createTermNamespace( "http://www.w3.org/ns/shacl#", "zeroOrMorePath", "zeroOrOnePath", "oneOrMorePath", "inversePath", "alternativePath", "deactivated", "minCount", "path", "node", "closed", "property", "and", "xone", "or", "targetClass", "datatype", "NodeShape", ); export class ShapesGraph { shapes: RDFMap<ShapeTemplate>; private counter: number; constructor(shapeStore: RdfStore) { this.shapes = this.initializeFromStore(shapeStore); this.counter = 0; } /** * This function returns a Mermaid representation of a shape identified by a given term. * @param term {Term} - The term of the Shape that is the start of the representation. */ public toMermaid(term: Term): string { const startShape = this.shapes.get(term); this.counter = 0; if (!startShape) { throw new Error(`No shape found for term "${term.value}"`); } let mermaid = 'flowchart LR\n'; mermaid += this.toMermaidSingleShape(startShape, '1', startShape.label || 'Shape'); return mermaid; } /** * This function returns a Mermaid representation of a given shape. * @param shape - The shape for which to generate a representation. * @param id - The ID to identify the shape in the representation. * @param name - The name used for the shape in the representation. * @private */ private toMermaidSingleShape(shape: ShapeTemplate, id: string, name: string): string { let mermaid = ` S${id}((${name}))\n`; let alreadyProcessedPaths: string[] = []; shape.nodeLinks.forEach(nodeLink => { let p = nodeLink.pathPattern.toString(); const isPathRequired = this.isPathRequired(p, shape.requiredPaths); alreadyProcessedPaths.push(p); p = this.clean(p); const linkedShape = this.shapes.get(nodeLink.link); if (!linkedShape) { throw new Error(`The linked shape "${nodeLink.link}" is not found`); } const linkedShapeId = `${id}_${this.counter}`; let link = '-->'; if (!isPathRequired) { link = '-.->'; } if (p.startsWith('^')) { p = p.substring(1); mermaid += ` S${linkedShapeId}[ ]${link}|"${p}"|S${id}\n`; } else { mermaid += ` S${id}${link}|"${p}"|S${linkedShapeId}[ ]\n`; } this.counter++; const linkedShapeMermaid = this.toMermaidSingleShape(linkedShape, linkedShapeId, linkedShape.label || 'Shape'); mermaid += linkedShapeMermaid; }); shape.atLeastOneLists.forEach(list => { if (list.length > 0) { const xId = `${id}_${this.counter}`; mermaid += ` S${id}---X${xId}{OR}\n`; list.forEach(shape => { const shapeId = `${id}_${this.counter}`; this.counter++; mermaid += ` X${xId}---S${shapeId}\n`; const linkedShapeMermaid = this.toMermaidSingleShape(shape, shapeId, shape.label || 'Shape'); mermaid += linkedShapeMermaid; }); } }); mermaid += this.simplePathToMermaid(shape.requiredPaths, alreadyProcessedPaths, id, '-->'); mermaid += this.simplePathToMermaid(shape.optionalPaths, alreadyProcessedPaths, id, '-.->'); return mermaid; } /** * This function removes < and > from a label. * It also adds the invisible character ‎ after 'http(s):' and after 'www' to avoid * the path being interpreted as a link. See https://github.com/orgs/community/discussions/106690. * @param path - The path from which to remove the < and >. * @private */ private clean(path: string): string { return path.replace(/</g, '') .replace(/http:/g, 'http:‎') .replace(/https:/g, 'https:‎') .replace(/www/g, 'www‎') .replace(/>/g, ''); } /** * This function returns true if the given path is required. * @param path - The path that needs to be checked. * @param requiredPaths - An array of all required paths. * @private */ private isPathRequired(path: string, requiredPaths: Path[]): boolean { for (const requiredPath of requiredPaths) { if (path === requiredPath.toString()) { return true; } } return false; } /** * This function returns a Mermaid presentation for an array of simple paths. * This function is intended to be used with shape.requiredPaths and shape.optionalPaths. * @param paths - An array of paths. * @param alreadyProcessedPaths - An array of stringified paths that already have been processed. * @param shapedId - The id of the shape to which these paths belong. * @param link - The Mermaid link that needs to be used. * @private */ private simplePathToMermaid(paths: Path[], alreadyProcessedPaths: string[], shapedId: string, link: string) { let mermaid = ''; paths.forEach(path => { const literalType = path.literalType ? this.clean(path.literalType.value) : null; let p = path.toString(); if (alreadyProcessedPaths.includes(p)) { return; } alreadyProcessedPaths.push(p); p = this.clean(p); if (this.isRealInversePath(p)) { p = this.getRealPath(p); mermaid += ` S${shapedId}_${this.counter}[${literalType || " "}]${link}|"${p}"|S${shapedId}\n`; } else { p = this.getRealPath(p); mermaid += ` S${shapedId}${link}|"${p}"|S${shapedId}_${this.counter}[${literalType || " "}]\n`; } this.counter++; }); return mermaid; } /** * This function returns true if a given path is real inverse path. * This means that the path is not a double, quadruple, ... inverse path. * @param path - The path that needs to be checked. * @private */ private isRealInversePath(path: string): boolean { const found = path.match(/^(\^+)[^\^]+/); if (!found) { return false; } return found[1].length % 2 !== 0; } /** * This function removes all the ^ from the path. * @param path - The path from which to remove the ^. * @private */ private getRealPath(path: string): string { const found = path.match(/^\^*([^\^]+)/); if (!found) { throw new Error(`No real path found in "${path}"`); } return found[1]; } protected constructPathPattern(shapeStore: RdfStore, listItem: Term, literalType?: Term): Path { if (listItem.termType === "BlankNode") { //Look for special types let zeroOrMorePathObjects = getObjects( shapeStore, listItem, SHACL.zeroOrMorePath, null, ); let oneOrMorePathObjects = getObjects( shapeStore, listItem, SHACL.oneOrMorePath, null, ); let zeroOrOnePathObjects = getObjects( shapeStore, listItem, SHACL.zeroOrOnePath, null, ); let inversePathObjects = getObjects( shapeStore, listItem, SHACL.inversePath, null, ); let alternativePathObjects = getObjects( shapeStore, listItem, SHACL.alternativePath, null, ); if (zeroOrMorePathObjects[0]) { return new ZeroOrMorePath( this.constructPathPattern(shapeStore, zeroOrMorePathObjects[0], literalType), ); } else if (oneOrMorePathObjects[0]) { return new OneOrMorePath( this.constructPathPattern(shapeStore, oneOrMorePathObjects[0], literalType), ); } else if (zeroOrOnePathObjects[0]) { return new ZeroOrOnePath( this.constructPathPattern(shapeStore, zeroOrOnePathObjects[0], literalType), ); } else if (inversePathObjects[0]) { return new InversePath( this.constructPathPattern(shapeStore, inversePathObjects[0], literalType), ); } else if (alternativePathObjects[0]) { let alternativeListArray = this.rdfListToArray( shapeStore, alternativePathObjects[0], ).map((value: Term) => { return this.constructPathPattern(shapeStore, value, literalType); }); return new AlternativePath(alternativeListArray); } else { const items = this.rdfListToArray(shapeStore, listItem); return new SequencePath( items.map((x) => this.constructPathPattern(shapeStore, x, literalType)), ); } } return new PredicatePath(listItem, literalType); } /** * @param shapeStore * @param propertyShapeId * @param shape * @param required * @returns false if it wasn't a property shape */ protected preprocessPropertyShape( shapeStore: RdfStore, propertyShapeId: Term, shape: ShapeTemplate, required?: boolean, ): boolean { //Skip if shape has been deactivated let deactivated = getObjects( shapeStore, propertyShapeId, SHACL.deactivated, null, ); if (deactivated.length > 0 && deactivated[0].value === "true") { return true; //Success: doesn't matter what kind of thing it was, it's deactivated so let's just proceed } // Check if sh:datatype is defined const literalType = getObjects( shapeStore, propertyShapeId, SHACL.datatype, null, )[0]; let path = getObjects(shapeStore, propertyShapeId, SHACL.path, null)[0]; //Process the path now and make sure there's a match function if (!path) { return false; //this isn't a property shape... } let pathPattern = this.constructPathPattern(shapeStore, path, literalType); let minCount = getObjects( shapeStore, propertyShapeId, SHACL.minCount, null, ); if ((minCount[0] && minCount[0].value !== "0") || required) { shape.requiredPaths.push(pathPattern); } else { //TODO: don't include node links? shape.optionalPaths.push(pathPattern); } // **TODO**: will the sh:or, sh:xone, sh:and, etc. be of use here? It won't contain any more information about possible properties? // Maybe to potentially point to another node, xone a datatype? // Does it link to a literal or to a new node? let nodeLink = getObjects(shapeStore, propertyShapeId, SHACL.node, null); if (nodeLink[0]) { shape.nodeLinks.push(new NodeLink(pathPattern, nodeLink[0])); } //TODO: Can Nodelinks appear in conditionals from here? Probably they can? (same comment as ↑) return true; // Success: the property shape has been processed } /** * Processes a NodeShape or PropertyShape and adds NodeLinks and required properties to the arrays. * @param shapeStore * @param shapeId * @param shape * @returns */ preprocessShape(shapeStore: RdfStore, shapeId: Term, shape: ShapeTemplate) { return this.preprocessPropertyShape(shapeStore, shapeId, shape) ? true : this.preprocessNodeShape(shapeStore, shapeId, shape); } /** * Processes a NodeShape * @param shapeStore * @param nodeShapeId * @param shape */ protected preprocessNodeShape( shapeStore: RdfStore, nodeShapeId: Term, shape: ShapeTemplate, ) { // Extract label following this strategy: // first look for rdfs:label // fallback to sh:targetClass (if any) // fallback to last part of the node shape ID or the ID itself if it's a blank node const rdfsLabel = getObjects(shapeStore, nodeShapeId, RDFS.terms.label)[0]; if (rdfsLabel) { shape.label = rdfsLabel.value; } else { const targetClass = getObjects( shapeStore, nodeShapeId, SHACL.targetClass, null, )[0]; if (targetClass) { // Make sure that IRIs are visible as node labels in mermaid diagrams shape.label = this.clean(targetClass.value); } else { shape.label = nodeShapeId.termType === "BlankNode" ? nodeShapeId.value : nodeShapeId.value.split("/")[nodeShapeId.value.split("/").length - 1]; } } //Check if it's closed or open let closedIndicator: Term = getObjects( shapeStore, nodeShapeId, SHACL.closed, null, )[0]; if (closedIndicator && closedIndicator.value === "true") { shape.closed = true; } //Process properties if it has any let properties = getObjects(shapeStore, nodeShapeId, SHACL.property, null); for (let prop of properties) { this.preprocessPropertyShape(shapeStore, prop, shape); } // process sh:and: just add all IDs to this array // Process everything you can find nested in AND clauses for (let andList of getObjects(shapeStore, nodeShapeId, SHACL.and, null)) { // Try to process it as a property shape //for every andList found, iterate through it and try to preprocess the property shape for (let and of this.rdfListToArray(shapeStore, andList)) { this.preprocessShape(shapeStore, and, shape); } } //Process zero or more sh:xone and sh:or lists in the same way -- explanation in README why they can be handled in the same way for (let xoneOrOrList of getObjects( shapeStore, nodeShapeId, SHACL.xone, null, ).concat(getObjects(shapeStore, nodeShapeId, SHACL.or, null))) { let atLeastOneList: Array<ShapeTemplate> = this.rdfListToArray( shapeStore, xoneOrOrList, ).map((val): ShapeTemplate => { let newShape = new ShapeTemplate(); //Create a new shape and process as usual -- but mind that we don't trigger a circular shape here... this.preprocessShape(shapeStore, val, newShape); return newShape; //Add this one to the shapesgraph }); shape.atLeastOneLists.push(atLeastOneList); } //And finally, we're just ignoring sh:not. Don't process this one } /** * @param shapeStore */ initializeFromStore(shapeStore: RdfStore): RDFMap<ShapeTemplate> { //get all named nodes of entities that are sh:NodeShapes which we'll recognize through their use of sh:property (we'll find other relevant shape nodes later on) //TODO: This is a limitation though: we only support NodeShapes with at least one sh:property set? Other NodeShapes in this context are otherwise just meaningless? const shapeNodes: Term[] = (<Term[]>[]) .concat(getSubjects(shapeStore, SHACL.property, null, null)) .concat(getSubjects(shapeStore, RDF.terms.type, SHACL.NodeShape, null)) .concat(getObjects(shapeStore, null, SHACL.node, null)) //DISTINCT .filter((value: Term, index: number, array: Array<Term>) => { return array.findIndex((x) => x.equals(value)) === index; }); let shapes = new RDFMap<ShapeTemplate>(); for (let shapeId of shapeNodes) { let shape = new ShapeTemplate(); //Don't process if shape is deactivated let deactivated = getObjects( shapeStore, shapeId, SHACL.deactivated, null, ); if (!(deactivated.length > 0 && deactivated[0].value === "true")) { this.preprocessNodeShape(shapeStore, shapeId, shape); shapes.set(shapeId, shape); } } return shapes; } /** * Processes all element from an RDF List, or detects it wasn't a list after all and it's just one element. * @param shapeStore * @param item * @returns */ protected * rdfListToGenerator( shapeStore: RdfStore, item: Term, ): Generator<Term> { if ( getObjects( shapeStore, item, df.namedNode("http://www.w3.org/1999/02/22-rdf-syntax-ns#first"), null, )[0] ) { yield getObjects( shapeStore, item, df.namedNode("http://www.w3.org/1999/02/22-rdf-syntax-ns#first"), null, )[0]; let rest = getObjects( shapeStore, item, df.namedNode("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"), null, )[0]; while ( rest && rest.value !== "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil" ) { yield getObjects( shapeStore, rest, df.namedNode("http://www.w3.org/1999/02/22-rdf-syntax-ns#first"), null, )[0]; rest = getObjects( shapeStore, rest, df.namedNode("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"), null, )[0]; } } else { // It's not a list. It's just one element. yield item; } return; } protected rdfListToArray(shapeStore: RdfStore, item: Term): Array<Term> { return Array.from(this.rdfListToGenerator(shapeStore, item)); } } const getSubjects = function ( store: RdfStore, predicate: Term | null, object: Term | null, graph?: Term | null, ) { return store.getQuads(null, predicate, object, graph).map((quad) => { return quad.subject; }); }; const getObjects = function ( store: RdfStore, subject: Term | null, predicate: Term | null, graph?: Term | null, ) { return store.getQuads(subject, predicate, null, graph).map((quad) => { return quad.object; }); };