UNPKG

rdf2hk

Version:

This library converts RDF to Hyperknowledge Description

534 lines (449 loc) 16.7 kB
/* * Copyright (c) 2016-present, IBM Research * Licensed under The MIT License [see LICENSE for details] */ "use strict"; const { Node, Connector, Link, Context, Reference, ConnectorClass, RoleTypes } = require("hklib"); const Constants = require("./constants"); const Utils = require("./utils"); const owl = require("./owl"); const rdfs = require("./rdfs"); const wikidata = require("./wikidata"); const hk = require("./hk"); const uuidv1 = require('uuid/v1'); const RELATION_QUALIFIER_URIS = new Set(); RELATION_QUALIFIER_URIS.add(owl.INVERSE_OF_URI); RELATION_QUALIFIER_URIS.add(rdfs.SUBPROPERTYOF_URI); const HK_NULL_URI = `<${Constants.HK_NULL}>`; const isUriOrBlankNode = Utils.isUriOrBlankNode; /** * Parse rdf to Hyperknowledge entities. * * @param {object} graph The graph (quads, if it contains named graph) to be parsed and converted to Hyperknowledge entities. * @param {boolean|object} [options] Parsing options, if it is a boolean, is equivalent to {createContext: true} which means it will generate context for each named graph. * @param {boolean} [options.createContext] Create the context entity for each named graph. Default is false. * @param {boolean} [options.namespaceContext] Contextualize entities based on their namespace. * @param {boolean} [options.subjectLabel] Set the subject role name `subject` * @param {boolean} [options.objectLabel] Set the object role name `object` * @param {boolean} [options.convertOwl] EXPERIMENTAL OWL rules. Default is false. * @param {boolean} [options.convertOwlTime] EXPERIMENTAL OWL Time rules. Default is false. * @param {boolean} [options.customRdfParser] Use the customizable parser. Default is false. * @param {boolean} [options.timeContext] Context for OWL Time entities and relationships, if convertOwlTime is true. * @param {boolean} [options.preserveBlankNodes] Preserve the blank node ids if true, otherwise replace it by a uuid inteded to be unique in the database. Default is false. * @param {boolean} [options.serialize] Serialize output, i. e. remove unnecessary methods and fields from the intances. * @param {boolean} [options.convertHK] If set, it will read the Hyperknowledge vocabulary and make special conversion. Default is true. * @param {boolean} [options.onlyHK] If set, it will ONLY read the Hyperknowledge vocabulary and convert those entities, this options override `convertHK`. Default is false. * @param {boolean} [options.textLiteralAsNode] If true, string literals will be converted to content nodes, which will be linked to subject using a link whose connector is the predicate. * @param {boolean} [options.textLiteralAsNodeEncoding] If 'property', textLiteralAsNode encoding will be made using node and link properties. If 'metaproperty' encoding will be made using node and link metaproperties. Default is 'metaproperty'. * @param {string} [options.strategy] "pre-existing-context", "new-context" or "automatically." * @param {array} [options.hierarchyConnectorIds] "List of predicates that should become hierarchy connectors." * @param {object|undefined} [customizableOptions] A dictionary of customizable options while parsing. * @param {array|undefined} [customizableOptions.contextualize] indicates the predicates that should create contexts based on the object. * @param {string|undefined} [customizableOptions.contextualize.p] a predicate that should create a context relation. * @param {string|undefined} [customizableOptions.contextualize.o] the object that should become a context. When undefined, it indicates that any object with that predicate should become context. */ function parseGraph(graph, options, customizableOptions) { if (typeof options === "boolean") { options = { createContext: options }; } else if (typeof options === Array) { // options = {ids: options}; } else if (!options) { options = {}; } let namespaceContext = options.namespaceContext || false; let createContext = options.createContext || namespaceContext; let strategy = options.strategy; const preserveBlankNodes = options.preserveBlankNodes || false; let setNodeContext = options.setNodeContext || false; let rootContext = options.context; let convertHK = options.convertHK && true; let onlyHK = options.onlyHK || false; let textLiteralAsNode = options.textLiteralAsNode || false; let textLiteralAsNodeEncoding = options.textLiteralAsNodeEncoding || 'metaproperty'; convertHK = convertHK || onlyHK; let serialize = options.serialize || false; const subjectLabel = options.subjectLabel || Constants.DEFAULT_SUBJECT_ROLE; const objectLabel = options.objectLabel || Constants.DEFAULT_OBJECT_ROLE; const hierarchyConnectorIds = options.hierarchyConnectorIds || [rdfs.TYPE_URI, rdfs.SUBCLASSOF_URI, rdfs.SUBPROPERTYOF_URI, wikidata.INSTANCE_OF_URI, wikidata.SUBCLASS_OF_URI]; let entities = {}; let connectors = {}; let blankNodesMap = {}; let refNodesMap = {}; // instantiate new parsers const parsers = []; registeredParsers.forEach(parser => { try { const instantiatedParser = new parser(entities, connectors, blankNodesMap, refNodesMap, options, customizableOptions); parsers.push(instantiatedParser); // console.log(`new instantiated parser ${instantiatedParser}`); } catch(err) { console.error(`There was an error while instatianting the parser ${parser}`); throw err; } }); let getParent = (iri, g) => { if (namespaceContext) { if (iri.includes("#")) { return `<${iri.split('#')[0].replace('<', '')}>` } } return ((g === HK_NULL_URI || g === null) && rootContext) ? rootContext : g; } let createReference = (s, g) => { const parent = getParent(s, g); let ref = new Reference(); ref.id = Utils.createRefUri(s, parent); ref.ref = s; ref.parent = parent; entities[ref.id] = ref; refNodesMap[ref.id] = ref; return ref; } // FIRST LOOP // Collect basic connectors // Collect contexts graph.forEachStatement((s, p, o, g) => { const parent = getParent(s, g); for (let i = 0; i < parsers.length; i++) { const parser = parsers[i]; if (parser.firstLoopShouldConvert(s, p, o, parent)) { let shouldContinue = parser.firstLoopCallback(s, p, o, parent); if (!shouldContinue) { return; } } } // Create connector? if (Utils.isUri(p) && Utils.isUriOrBlankNode(o)) { let connectorId = Utils.getIdFromResource(p); if (!connectors.hasOwnProperty(connectorId)) { let connector = new Connector(); connector.id = connectorId; connector.className = hierarchyConnectorIds.includes(p) ? ConnectorClass.HIERARCHY : ConnectorClass.FACTS; connector.addRole(subjectLabel, RoleTypes.SUBJECT); connector.addRole(objectLabel, RoleTypes.OBJECT); connectors[connectorId] = connector; entities[connectorId] = connector; } } const isPreExistingContext = strategy === 'pre-existing-context' && parent === rootContext; if (createContext && parent && parent !== HK_NULL_URI && !isPreExistingContext) { // Create context if (!entities.hasOwnProperty(parent)) { let context = new Context(); context.id = parent; entities[parent] = context; } } }); // SECOND LOOP // Create nodes graph.forEachStatement((s, p, o, g) => { const parent = getParent(s, g); // console.log(s, p, o); // Replace the blank node identitier to uuid // In order to make this id more robust along the base if (!preserveBlankNodes) { if (Utils.isBlankNode(s) && !blankNodesMap.hasOwnProperty(s)) { blankNodesMap[s] = `_:${uuidv1()}`; } if (Utils.isBlankNode(o) && !blankNodesMap.hasOwnProperty(o)) { blankNodesMap[o] = `_:${uuidv1()}`; } } for(let i = 0; i < parsers.length; i++) { const parser = parsers[i]; if (parser.secondLoopShouldConvert(s, p, o, parent)) { let shouldContinue = parser.secondLoopCallback(s, p, o, parent); if (!shouldContinue) { return; } } } let subjectId = Utils.getIdFromResource(s); if ( isUriOrBlankNode(s) && !entities.hasOwnProperty(subjectId)) { let node = new Node(); node.id = blankNodesMap.hasOwnProperty(s) ? blankNodesMap[s] : subjectId; entities[node.id] = node; node.parent = undefined; // Set the context to the graph name if (setNodeContext && parent) { node.parent = Utils.getIdFromResource(parent); } } let objectId = Utils.getIdFromResource(o); if ( isUriOrBlankNode(o) && !entities.hasOwnProperty(objectId)) { let node = new Node(); node.id = blankNodesMap.hasOwnProperty(o) ? blankNodesMap[o] : objectId; entities[node.id] = node; node.parent = undefined; // Set the context to the graph name if (setNodeContext && parent) { const parentId = Utils.getIdFromResource(parent); node.parent = parentId !== node.id ? parentId : null; } } }); // LAST LOOP // Create attributes, relations and ref nodes if needed graph.forEachStatement((s, p, o, g) => { const parent = getParent(s, g); const parentIdFromResource = Utils.getIdFromResource(parent); for(let i = 0; i < parsers.length; i++) { const parser = parsers[i]; if (parser.lastLoopShouldConvert(s, p, o, parent)) { let shouldContinue = parser.lastLoopCallback(s, p, o, parent); if (!shouldContinue) { return; } } } // Set relationship if (isUriOrBlankNode(o)) { let connectorId = Utils.getIdFromResource(p); if (connectors.hasOwnProperty(connectorId)) { let connector = connectors[connectorId]; let link = new Link(); let roles = connector.getRoles(); for (let i = 0; i < roles.length; i++) { let role = roles[i]; let roleType = connector.getRoleType(role); if (roleType === RoleTypes.SUBJECT || roleType === RoleTypes.CHILD) { let subjId = blankNodesMap.hasOwnProperty(s) ? blankNodesMap[s] : s; subjId = Utils.getIdFromResource(subjId); link.addBind(subjectLabel, subjId); } else if (roleType === RoleTypes.OBJECT || roleType === RoleTypes.PARENT) { let objId = blankNodesMap.hasOwnProperty(o) ? blankNodesMap[o] : o; objId = Utils.getIdFromResource(objId); link.addBind(objectLabel, objId); } } link.id = Utils.createSpoUri(s, p, o, parent); link.connector = connectorId; if (g) { link.parent = parentIdFromResource; } entities[link.id] = link; } } else { // Since it is a literal the it become a property let entity = null; // Get maped blank node if (!preserveBlankNodes && blankNodesMap.hasOwnProperty(s)) { s = blankNodesMap[s]; } let subjectId = Utils.getIdFromResource(s); if (!parentIdFromResource) { entity = entities[subjectId]; // we assume the entity must have been created } else { entity = entities[subjectId] || null; if(entity !== null) { if (entity.type !== Connector.type && entity.parent !==parentIdFromResource) { // The node already exists and it belongs to another context // This assign will force to look for a reference node entity = null; } } // Check if there is a reference to the resource if (!entity) { let refId = Utils.createRefUri(s, parent); entity = entities[refId] || null; } } // If at this point the entity was not set // create a reference to it if (!entity) { if (onlyHK) { // Do not create entities by inference // when conversion is to only convert // hyperknowledge entities return; } entity = createReference(s, parent); } // Convert the literal _setPropertyFromLiteral(entity, p, o, entities, connectors, subjectLabel, objectLabel, textLiteralAsNode, textLiteralAsNodeEncoding); } }); // Finish conversion // Add connectors for (let c in connectors) { entities[c] = connectors[c]; } parsers.forEach(parser => { if (parser.mustConvert) { parser.finish(entities); } }); // Serialize entities if (serialize) { for (let k in entities) { entities[k] = entities[k].serialize(); } } return entities; } function _setPropertyFromLiteral(entity, p, o, entities, connectors, subjectLabel, objectLabel, textLiteralAsNode = false, textLiteralAsNodeEncoding = 'property') { let typeInfo = {}; let value = Utils.getValueFromLiteral(o, typeInfo, true); let propertyName = Utils.getIdFromResource(p); if (typeInfo.lang) { value = `"${value}"@${typeInfo.lang}`; } if(typeof value === "string") { let literalSlices = value.split(`^^`); if (literalSlices[0] === `"${HK_NULL_URI}"`) { if (literalSlices[1] !== null) { entity.setMetaProperty(Utils.getIdFromResource(p), Utils.getIdFromResource(literalSlices[1])); } return; } if(textLiteralAsNode) { // add property or metaproperty in subject node const literalTypeId = Utils.getIdFromResource(hk.DATA_LITERAL_URI); const predicateId = Utils.getIdFromResource(p); if(textLiteralAsNodeEncoding === 'property') { entity.setProperty(literalTypeId, predicateId); } else if(textLiteralAsNodeEncoding === 'metaproperty') { entity.setMetaProperty(literalTypeId, predicateId); } // create content node with literal as data, if needed const contentNodeUri = Utils.createContentNodeUri(value); if(!entities.hasOwnProperty(contentNodeUri)) { const contentNode = new Node(contentNodeUri, entity.parent); contentNode.setProperty('mimeType', 'plain/text'); contentNode.setProperty('data', value); entities[contentNodeUri] = contentNode; } // create predicate connector, if needed const connectorId = Utils.getIdFromResource(p); if(!connectors.hasOwnProperty(connectorId)) { const contentConnector = new Connector(connectorId, ConnectorClass.FACTS); contentConnector.addRole(subjectLabel, RoleTypes.SUBJECT); contentConnector.addRole(objectLabel, RoleTypes.OBJECT); connectors[contentConnector.id] = contentConnector; entities[contentConnector.id] = contentConnector; } // create spo link between subject and content node const linkUri = Utils.createSpoUri(entity.id, p, value, entity.parent); const contentLink = new Link(linkUri, p, entity.parent); contentLink.addBind(subjectLabel, entity.id); contentLink.addBind(objectLabel, contentNodeUri); if(textLiteralAsNodeEncoding === 'property') { contentLink.setProperty(literalTypeId, predicateId); } else if(textLiteralAsNodeEncoding === 'metaproperty') { contentLink.setMetaProperty(literalTypeId, predicateId); } entities[linkUri] = contentLink; // create hierarchical connector, if needed const typeConnectorId = Utils.getIdFromResource(rdfs.TYPE_URI); if(!connectors.hasOwnProperty(typeConnectorId)) { const typeConnector = new Connector(typeConnectorId, ConnectorClass.HIERARCHY); typeConnector.addRole(subjectLabel, RoleTypes.SUBJECT); typeConnector.addRole(objectLabel, RoleTypes.OBJECT); connectors[typeConnector.id] = typeConnector; entities[typeConnector.id] = typeConnector; } // add literal node to body, if needed let typeNode = entities[literalTypeId]; if(!typeNode) { typeNode = new Node(literalTypeId, null); entities[literalTypeId] = typeNode; } // add reference to literal node within context, if needed if(entity.parent && entity.parent !== "null" && entity.parent !== HK_NULL_URI) { const typeReferenceUri = Utils.createRefUri(literalTypeId, entity.parent); if(!entities.hasOwnProperty(typeReferenceUri)) { typeNode = new Reference(typeReferenceUri, literalTypeId, entity.parent); entities[typeReferenceUri] = typeNode; } else { typeNode = entities[typeReferenceUri]; } } // create hierarchical link between content node and literal type const typeLinkUri = Utils.createSpoUri(contentNodeUri, rdfs.TYPE_URI, hk.DATA_LITERAL_URI, entity.parent); const typeLink = new Link(typeLinkUri, rdfs.TYPE_URI, entity.parent); typeLink.addBind(subjectLabel, contentNodeUri); typeLink.addBind(objectLabel, typeNode.id); entities[typeLinkUri] = typeLink; return; } } entity.setOrAppendToProperty(propertyName, value); if (typeInfo.type) { entity.setOrAppendToMetaProperty(propertyName, Utils.getIdFromResource(typeInfo.type)); } } const registeredParsers = new Set(); function registerParser(parser) { registeredParsers.add(parser); } exports.registerParser = registerParser; exports.parseGraph = parseGraph;