@rdfc/sparql-ingest-processor-ts
Version:
SPARQL Update function to be within RDF-Connect pipelines
106 lines (105 loc) • 4.27 kB
JavaScript
import { XSD } from "@treecg/types";
import { DataFactory } from "rdf-data-factory";
import { RdfStore } from "rdf-stores";
import { getLoggerFor } from "./LogUtil.js";
import { Agent } from "undici";
const df = new DataFactory();
export function getSubjects(store, predicate, object, graph) {
return store.getQuads(null, predicate, object, graph).map((quad) => {
return quad.subject;
});
}
export function getObjects(store, subject, predicate, graph) {
return store.getQuads(subject, predicate, null, graph).map((quad) => {
return quad.object;
});
}
export function splitStore(store, threshold) {
const stores = [];
if (store.size < threshold) {
stores.push(store);
}
else {
const quads = store.getQuads();
const bnSet = new Set();
let subStore = RdfStore.createDefault();
for (let i = 0; i < quads.length; i++) {
if (bnSet.has(`${quads[i].subject.value}${quads[i].predicate.value}${quads[i].object.value}${quads[i].graph.value}`)) {
continue;
}
if (subStore.size >= threshold) {
stores.push(subStore);
subStore = RdfStore.createDefault();
}
if (quads[i].subject.termType === "BlankNode") {
const subjectQuads = store.getQuads(quads[i].subject);
const objectQuads = store.getQuads(null, null, quads[i].subject);
[...subjectQuads, ...objectQuads].forEach((q) => {
subStore.addQuad(q);
bnSet.add(`${q.subject.value}${q.predicate.value}${q.object.value}${q.graph.value}`);
});
}
if (quads[i].object.termType === "BlankNode") {
const subjectQuads = store.getQuads(quads[i].object);
const objectQuads = store.getQuads(null, null, quads[i].object);
[...subjectQuads, ...objectQuads].forEach((q) => {
subStore.addQuad(q);
bnSet.add(`${q.subject.value}${q.predicate.value}${q.object.value}${q.graph.value}`);
});
}
subStore.addQuad(quads[i]);
}
stores.push(subStore);
}
return stores;
}
export function sanitizeQuads(store) {
for (const q of store.getQuads()) {
if (q.object.termType === "Literal" && q.object.datatype.value === XSD.integer) {
if (/\+\d+/.test(q.object.value) && q.object.value.startsWith("+")) {
store.removeQuad(q);
store.addQuad(df.quad(q.subject, q.predicate, df.literal(q.object.value.substring(1), df.namedNode(XSD.integer)), q.graph));
}
}
}
}
export async function doSPARQLRequest(query, config) {
const logger = getLoggerFor("doSPARQLRequest");
try {
let queries = [];
const jointQuery = query.join("\n");
if (config.forVirtuoso && Buffer.byteLength(jointQuery, 'utf8') > 1e6) {
queries = query;
}
else {
queries.push(jointQuery);
}
const timeout = config.measurePerformance?.queryTimeout || 1800;
for (const q of queries) {
logger.debug(`Executing SPARQL query: \n${q}`);
const res = await fetch(config.graphStoreUrl, {
method: "POST",
headers: {
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
},
body: `update=${fixedEncodeURIComponent(q)}${config.accessToken ? `&access-token=${config.accessToken}` : ''}`,
dispatcher: new Agent({
headersTimeout: timeout * 1000,
bodyTimeout: timeout * 1000,
}),
});
if (!res.ok) {
throw new Error(`HTTP request failed with code ${res.status} and message: \n${await res.text()}`);
}
}
}
catch (err) {
logger.error(`Error while executing SPARQL request: ${err.message} - ${err.cause}`);
throw err;
}
}
function fixedEncodeURIComponent(str) {
return encodeURIComponent(str).replace(/[!'()*]/g, function (c) {
return '%' + c.charCodeAt(0).toString(16);
});
}