@rdfc/sparql-ingest-processor-ts
Version:
SPARQL Update function to be within RDF-Connect pipelines
207 lines (206 loc) • 10.7 kB
JavaScript
import { SDS } from "@treecg/types";
import { DataFactory } from "rdf-data-factory";
import { RdfStore } from "rdf-stores";
import { Parser } from "n3";
import { writeFile } from "fs/promises";
import { CREATE, UPDATE, DELETE } from "./SPARQLQueries.js";
import { doSPARQLRequest, sanitizeQuads, getObjects } from "./Utils.js";
import { getLoggerFor } from "./LogUtil.js";
const df = new DataFactory();
export async function sparqlIngest(memberStream, config, sparqlWriter) {
const logger = getLoggerFor("sparqlIngest");
let transactionMembers = [];
const requestsPerformance = [];
memberStream.data(async (rawQuads) => {
logger.debug(`Raw member data received: \n${rawQuads}`);
const quads = new Parser().parse(rawQuads);
logger.verbose(`Parsed ${quads.length} quads from received member data`);
const store = RdfStore.createDefault();
quads.forEach(q => store.addQuad(q));
const memberIRI = getObjects(store, null, SDS.terms.payload, SDS.terms.custom("DataDescription"))[0];
logger.verbose(`Member IRI found: ${memberIRI ? memberIRI.value : "none"}`);
if (memberIRI) {
const sdsQuads = store.getQuads(null, null, null, SDS.terms.custom("DataDescription"));
sdsQuads.forEach(q => store.removeQuad(q));
if (config.transactionConfig) {
const transactionId = getObjects(store, null, df.namedNode(config.transactionConfig.transactionIdPath), null)[0];
if (transactionId) {
store.removeQuad(df.quad(memberIRI, df.namedNode(config.transactionConfig.transactionIdPath), transactionId));
const isLastOfTransaction = getObjects(store, null, df.namedNode(config.transactionConfig.transactionEndPath), null)[0];
if (isLastOfTransaction) {
logger.info(`Last member of ${transactionId.value} received!`);
verifyTransaction(transactionMembers.map(ts => ts.store), config.transactionConfig.transactionIdPath, transactionId);
store.removeQuad(df.quad(memberIRI, df.namedNode(config.transactionConfig.transactionEndPath), isLastOfTransaction));
transactionMembers.push({
memberId: memberIRI.value,
transactionId: transactionId.value,
store
});
}
else if (transactionMembers.length > 0) {
verifyTransaction(transactionMembers.map(ts => ts.store), config.transactionConfig.transactionIdPath, transactionId);
transactionMembers.push({
memberId: memberIRI.value,
transactionId: transactionId.value,
store
});
return;
}
else {
logger.info(`New transaction ${transactionId.value} started!`);
if (transactionMembers.length > 0)
throw new Error(`Received new transaction ${transactionId.value}, `
+ `but older transaction ${transactionMembers[0].transactionId} hasn't been finalized `);
transactionMembers.push({
memberId: memberIRI.value,
transactionId: transactionId.value,
store
});
return;
}
}
}
let query;
if (config.changeSemantics) {
if (transactionMembers.length > 0) {
query = [createTransactionQueries(transactionMembers, config)];
transactionMembers = [];
}
else {
const ng = getNamedGraphIfAny(memberIRI, config.memberIsGraph, config.targetNamedGraph);
const ctv = store.getQuads(null, df.namedNode(config.changeSemantics.changeTypePath))[0];
store.removeQuad(ctv);
sanitizeQuads(store);
if (ctv.object.value === config.changeSemantics.createValue) {
logger.info(`Preparing 'INSERT DATA {}' SPARQL query for member ${memberIRI.value}`);
query = CREATE(store, config.forVirtuoso, ng);
}
else if (ctv.object.value === config.changeSemantics.updateValue) {
logger.info(`Preparing 'DELETE {} INSERT {} WHERE {}' SPARQL query for member ${memberIRI.value}`);
query = UPDATE(store, config.forVirtuoso, ng);
}
else if (ctv.object.value === config.changeSemantics.deleteValue) {
logger.info(`Preparing 'DELETE WHERE {}' SPARQL query for member ${memberIRI.value}`);
query = [DELETE(store, [memberIRI.value], config.memberShapes, ng)];
}
else {
throw new Error(`[sparqlIngest] Unrecognized change type value: ${ctv.object.value}`);
}
}
}
else {
if (transactionMembers.length > 0) {
transactionMembers.forEach(ts => {
ts.store.getQuads(null, null, null, null).forEach(q => store.addQuad(q));
});
logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL query for transaction member ${memberIRI.value}`);
query = UPDATE(store, config.forVirtuoso, config.targetNamedGraph);
}
else {
const ng = getNamedGraphIfAny(memberIRI, config.memberIsGraph, config.targetNamedGraph);
logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL query for member ${memberIRI.value}`);
query = UPDATE(store, config.forVirtuoso, ng);
}
}
if (query && query.length > 0) {
logger.debug(`Complete SPARQL query generated for received member: \n${query.join("\n")}`);
if (config.graphStoreUrl) {
try {
const t0 = Date.now();
await doSPARQLRequest(query, config);
const reqTime = Date.now() - t0;
if (config.measurePerformance) {
requestsPerformance.push(reqTime);
}
logger.info(`Executed query on remote SPARQL server ${config.graphStoreUrl} (took ${reqTime} ms)`);
}
catch (error) {
if (!config.measurePerformance || config.measurePerformance.failureIsFatal) {
throw error;
}
else {
if (config.measurePerformance) {
requestsPerformance.push(-1);
}
}
}
}
if (sparqlWriter) {
await sparqlWriter.push(query.join("\n"));
}
}
else {
logger.warn(`No query generated for member ${memberIRI.value}`);
}
}
else {
throw new Error(`[sparqlIngest] No member IRI found in received RDF data: \n${rawQuads}`);
}
});
memberStream.on("end", async () => {
if (sparqlWriter) {
await sparqlWriter.end();
}
if (config.measurePerformance) {
await writeFile(`${config.measurePerformance.outputPath}/${config.measurePerformance.name}.json`, JSON.stringify(requestsPerformance), "utf-8");
}
});
}
function verifyTransaction(stores, transactionIdPath, transactionId) {
for (const store of stores) {
const tIds = getObjects(store, null, df.namedNode(transactionIdPath), null);
for (const tid of tIds) {
if (!tid.equals(transactionId)) {
throw new Error(`[sparqlIngest] Received non-matching transaction ID ${transactionId.value} `
+ `with previous transaction: ${tid.value}`);
}
}
}
}
function getNamedGraphIfAny(memberIRI, memberIsGraph, targetNamedGraph) {
let ng;
if (memberIsGraph) {
ng = memberIRI.value;
}
else if (targetNamedGraph) {
ng = targetNamedGraph;
}
return ng;
}
function createTransactionQueries(transactionMembers, config) {
const logger = getLoggerFor("createTransactionQueries");
logger.info(`Creating multi-operation SPARQL UPDATE query for ${transactionMembers.length}`
+ ` members of transaction ${transactionMembers[0].transactionId}`);
const createStore = RdfStore.createDefault();
const updateStore = RdfStore.createDefault();
const deleteStore = RdfStore.createDefault();
const deleteMembers = [];
const transactionQueryBuilder = [];
for (const tsm of transactionMembers) {
const ctv = tsm.store.getQuads(null, df.namedNode(config.changeSemantics.changeTypePath))[0];
tsm.store.removeQuad(ctv);
if (ctv.object.value === config.changeSemantics.createValue) {
tsm.store.getQuads(null, null, null, null).forEach(q => createStore.addQuad(q));
}
else if (ctv.object.value === config.changeSemantics.updateValue) {
tsm.store.getQuads(null, null, null, null).forEach(q => updateStore.addQuad(q));
}
else if (ctv.object.value === config.changeSemantics.deleteValue) {
tsm.store.getQuads(null, null, null, null).forEach(q => deleteStore.addQuad(q));
deleteMembers.push(tsm.memberId);
}
else {
throw new Error(`[sparqlIngest] Unrecognized change type value: ${ctv.object.value}`);
}
}
if (createStore.size > 0) {
transactionQueryBuilder.push(CREATE(createStore, config.forVirtuoso, config.targetNamedGraph).join("\n"));
}
if (updateStore.size > 0) {
transactionQueryBuilder.push(UPDATE(updateStore, config.forVirtuoso, config.targetNamedGraph).join("\n"));
}
if (updateStore.size > 0) {
transactionQueryBuilder.push(DELETE(deleteStore, deleteMembers, config.memberShapes, config.targetNamedGraph));
}
return transactionQueryBuilder.join(";\n");
}