UNPKG

@rdfc/sparql-ingest-processor-ts

Version:
207 lines (206 loc) 10.7 kB
import { SDS } from "@treecg/types"; import { DataFactory } from "rdf-data-factory"; import { RdfStore } from "rdf-stores"; import { Parser } from "n3"; import { writeFile } from "fs/promises"; import { CREATE, UPDATE, DELETE } from "./SPARQLQueries.js"; import { doSPARQLRequest, sanitizeQuads, getObjects } from "./Utils.js"; import { getLoggerFor } from "./LogUtil.js"; const df = new DataFactory(); export async function sparqlIngest(memberStream, config, sparqlWriter) { const logger = getLoggerFor("sparqlIngest"); let transactionMembers = []; const requestsPerformance = []; memberStream.data(async (rawQuads) => { logger.debug(`Raw member data received: \n${rawQuads}`); const quads = new Parser().parse(rawQuads); logger.verbose(`Parsed ${quads.length} quads from received member data`); const store = RdfStore.createDefault(); quads.forEach(q => store.addQuad(q)); const memberIRI = getObjects(store, null, SDS.terms.payload, SDS.terms.custom("DataDescription"))[0]; logger.verbose(`Member IRI found: ${memberIRI ? memberIRI.value : "none"}`); if (memberIRI) { const sdsQuads = store.getQuads(null, null, null, SDS.terms.custom("DataDescription")); sdsQuads.forEach(q => store.removeQuad(q)); if (config.transactionConfig) { const transactionId = getObjects(store, null, df.namedNode(config.transactionConfig.transactionIdPath), null)[0]; if (transactionId) { store.removeQuad(df.quad(memberIRI, df.namedNode(config.transactionConfig.transactionIdPath), transactionId)); const isLastOfTransaction = getObjects(store, null, df.namedNode(config.transactionConfig.transactionEndPath), null)[0]; if (isLastOfTransaction) { logger.info(`Last member of ${transactionId.value} received!`); verifyTransaction(transactionMembers.map(ts => ts.store), config.transactionConfig.transactionIdPath, transactionId); store.removeQuad(df.quad(memberIRI, df.namedNode(config.transactionConfig.transactionEndPath), isLastOfTransaction)); transactionMembers.push({ memberId: memberIRI.value, transactionId: transactionId.value, store }); } else if (transactionMembers.length > 0) { verifyTransaction(transactionMembers.map(ts => ts.store), config.transactionConfig.transactionIdPath, transactionId); transactionMembers.push({ memberId: memberIRI.value, transactionId: transactionId.value, store }); return; } else { logger.info(`New transaction ${transactionId.value} started!`); if (transactionMembers.length > 0) throw new Error(`Received new transaction ${transactionId.value}, ` + `but older transaction ${transactionMembers[0].transactionId} hasn't been finalized `); transactionMembers.push({ memberId: memberIRI.value, transactionId: transactionId.value, store }); return; } } } let query; if (config.changeSemantics) { if (transactionMembers.length > 0) { query = [createTransactionQueries(transactionMembers, config)]; transactionMembers = []; } else { const ng = getNamedGraphIfAny(memberIRI, config.memberIsGraph, config.targetNamedGraph); const ctv = store.getQuads(null, df.namedNode(config.changeSemantics.changeTypePath))[0]; store.removeQuad(ctv); sanitizeQuads(store); if (ctv.object.value === config.changeSemantics.createValue) { logger.info(`Preparing 'INSERT DATA {}' SPARQL query for member ${memberIRI.value}`); query = CREATE(store, config.forVirtuoso, ng); } else if (ctv.object.value === config.changeSemantics.updateValue) { logger.info(`Preparing 'DELETE {} INSERT {} WHERE {}' SPARQL query for member ${memberIRI.value}`); query = UPDATE(store, config.forVirtuoso, ng); } else if (ctv.object.value === config.changeSemantics.deleteValue) { logger.info(`Preparing 'DELETE WHERE {}' SPARQL query for member ${memberIRI.value}`); query = [DELETE(store, [memberIRI.value], config.memberShapes, ng)]; } else { throw new Error(`[sparqlIngest] Unrecognized change type value: ${ctv.object.value}`); } } } else { if (transactionMembers.length > 0) { transactionMembers.forEach(ts => { ts.store.getQuads(null, null, null, null).forEach(q => store.addQuad(q)); }); logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL query for transaction member ${memberIRI.value}`); query = UPDATE(store, config.forVirtuoso, config.targetNamedGraph); } else { const ng = getNamedGraphIfAny(memberIRI, config.memberIsGraph, config.targetNamedGraph); logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL query for member ${memberIRI.value}`); query = UPDATE(store, config.forVirtuoso, ng); } } if (query && query.length > 0) { logger.debug(`Complete SPARQL query generated for received member: \n${query.join("\n")}`); if (config.graphStoreUrl) { try { const t0 = Date.now(); await doSPARQLRequest(query, config); const reqTime = Date.now() - t0; if (config.measurePerformance) { requestsPerformance.push(reqTime); } logger.info(`Executed query on remote SPARQL server ${config.graphStoreUrl} (took ${reqTime} ms)`); } catch (error) { if (!config.measurePerformance || config.measurePerformance.failureIsFatal) { throw error; } else { if (config.measurePerformance) { requestsPerformance.push(-1); } } } } if (sparqlWriter) { await sparqlWriter.push(query.join("\n")); } } else { logger.warn(`No query generated for member ${memberIRI.value}`); } } else { throw new Error(`[sparqlIngest] No member IRI found in received RDF data: \n${rawQuads}`); } }); memberStream.on("end", async () => { if (sparqlWriter) { await sparqlWriter.end(); } if (config.measurePerformance) { await writeFile(`${config.measurePerformance.outputPath}/${config.measurePerformance.name}.json`, JSON.stringify(requestsPerformance), "utf-8"); } }); } function verifyTransaction(stores, transactionIdPath, transactionId) { for (const store of stores) { const tIds = getObjects(store, null, df.namedNode(transactionIdPath), null); for (const tid of tIds) { if (!tid.equals(transactionId)) { throw new Error(`[sparqlIngest] Received non-matching transaction ID ${transactionId.value} ` + `with previous transaction: ${tid.value}`); } } } } function getNamedGraphIfAny(memberIRI, memberIsGraph, targetNamedGraph) { let ng; if (memberIsGraph) { ng = memberIRI.value; } else if (targetNamedGraph) { ng = targetNamedGraph; } return ng; } function createTransactionQueries(transactionMembers, config) { const logger = getLoggerFor("createTransactionQueries"); logger.info(`Creating multi-operation SPARQL UPDATE query for ${transactionMembers.length}` + ` members of transaction ${transactionMembers[0].transactionId}`); const createStore = RdfStore.createDefault(); const updateStore = RdfStore.createDefault(); const deleteStore = RdfStore.createDefault(); const deleteMembers = []; const transactionQueryBuilder = []; for (const tsm of transactionMembers) { const ctv = tsm.store.getQuads(null, df.namedNode(config.changeSemantics.changeTypePath))[0]; tsm.store.removeQuad(ctv); if (ctv.object.value === config.changeSemantics.createValue) { tsm.store.getQuads(null, null, null, null).forEach(q => createStore.addQuad(q)); } else if (ctv.object.value === config.changeSemantics.updateValue) { tsm.store.getQuads(null, null, null, null).forEach(q => updateStore.addQuad(q)); } else if (ctv.object.value === config.changeSemantics.deleteValue) { tsm.store.getQuads(null, null, null, null).forEach(q => deleteStore.addQuad(q)); deleteMembers.push(tsm.memberId); } else { throw new Error(`[sparqlIngest] Unrecognized change type value: ${ctv.object.value}`); } } if (createStore.size > 0) { transactionQueryBuilder.push(CREATE(createStore, config.forVirtuoso, config.targetNamedGraph).join("\n")); } if (updateStore.size > 0) { transactionQueryBuilder.push(UPDATE(updateStore, config.forVirtuoso, config.targetNamedGraph).join("\n")); } if (updateStore.size > 0) { transactionQueryBuilder.push(DELETE(deleteStore, deleteMembers, config.memberShapes, config.targetNamedGraph)); } return transactionQueryBuilder.join(";\n"); }