UNPKG

@rdfc/sparql-ingest-processor-ts

Version:
106 lines (105 loc) 4.27 kB
import { XSD } from "@treecg/types"; import { DataFactory } from "rdf-data-factory"; import { RdfStore } from "rdf-stores"; import { getLoggerFor } from "./LogUtil.js"; import { Agent } from "undici"; const df = new DataFactory(); export function getSubjects(store, predicate, object, graph) { return store.getQuads(null, predicate, object, graph).map((quad) => { return quad.subject; }); } export function getObjects(store, subject, predicate, graph) { return store.getQuads(subject, predicate, null, graph).map((quad) => { return quad.object; }); } export function splitStore(store, threshold) { const stores = []; if (store.size < threshold) { stores.push(store); } else { const quads = store.getQuads(); const bnSet = new Set(); let subStore = RdfStore.createDefault(); for (let i = 0; i < quads.length; i++) { if (bnSet.has(`${quads[i].subject.value}${quads[i].predicate.value}${quads[i].object.value}${quads[i].graph.value}`)) { continue; } if (subStore.size >= threshold) { stores.push(subStore); subStore = RdfStore.createDefault(); } if (quads[i].subject.termType === "BlankNode") { const subjectQuads = store.getQuads(quads[i].subject); const objectQuads = store.getQuads(null, null, quads[i].subject); [...subjectQuads, ...objectQuads].forEach((q) => { subStore.addQuad(q); bnSet.add(`${q.subject.value}${q.predicate.value}${q.object.value}${q.graph.value}`); }); } if (quads[i].object.termType === "BlankNode") { const subjectQuads = store.getQuads(quads[i].object); const objectQuads = store.getQuads(null, null, quads[i].object); [...subjectQuads, ...objectQuads].forEach((q) => { subStore.addQuad(q); bnSet.add(`${q.subject.value}${q.predicate.value}${q.object.value}${q.graph.value}`); }); } subStore.addQuad(quads[i]); } stores.push(subStore); } return stores; } export function sanitizeQuads(store) { for (const q of store.getQuads()) { if (q.object.termType === "Literal" && q.object.datatype.value === XSD.integer) { if (/\+\d+/.test(q.object.value) && q.object.value.startsWith("+")) { store.removeQuad(q); store.addQuad(df.quad(q.subject, q.predicate, df.literal(q.object.value.substring(1), df.namedNode(XSD.integer)), q.graph)); } } } } export async function doSPARQLRequest(query, config) { const logger = getLoggerFor("doSPARQLRequest"); try { let queries = []; const jointQuery = query.join("\n"); if (config.forVirtuoso && Buffer.byteLength(jointQuery, 'utf8') > 1e6) { queries = query; } else { queries.push(jointQuery); } const timeout = config.measurePerformance?.queryTimeout || 1800; for (const q of queries) { logger.debug(`Executing SPARQL query: \n${q}`); const res = await fetch(config.graphStoreUrl, { method: "POST", headers: { 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', }, body: `update=${fixedEncodeURIComponent(q)}${config.accessToken ? `&access-token=${config.accessToken}` : ''}`, dispatcher: new Agent({ headersTimeout: timeout * 1000, bodyTimeout: timeout * 1000, }), }); if (!res.ok) { throw new Error(`HTTP request failed with code ${res.status} and message: \n${await res.text()}`); } } } catch (err) { logger.error(`Error while executing SPARQL request: ${err.message} - ${err.cause}`); throw err; } } function fixedEncodeURIComponent(str) { return encodeURIComponent(str).replace(/[!'()*]/g, function (c) { return '%' + c.charCodeAt(0).toString(16); }); }