UNPKG

hra-api

Version:

The Human Reference Atlas (HRA) API deployed to https://apps.humanatlas.io/api/

277 lines (267 loc) 13.8 kB
// src/server/create-dataset-graph.worker.js import fetch2, { Headers as Headers2, Request, Response } from "node-fetch"; import { workerData } from "worker_threads"; // src/library/shared/utils/add-to-endpoint.js import toNT from "@rdfjs/to-ntriples"; import stream from "stream-browserify"; function toTripleString(quad) { const subject = toNT(quad.subject).replace("_:_:", "_:"); const predicate = toNT(quad.predicate).replace("_:_:", "_:"); const object = toNT(quad.object).replace("_:_:", "_:"); return `${subject} ${predicate} ${object} . `; } function* sparqlUpdateIterator(graph, quads) { yield ` INSERT DATA { GRAPH <${graph}> { `; for (const quad of quads) { yield toTripleString(quad); } yield "}}\n"; } async function addToEndpoint(graph, quads, endpoint2) { return fetch(endpoint2, { method: "POST", headers: { "Content-Type": "application/sparql-update" }, body: stream.Readable.from(sparqlUpdateIterator(graph, quads)) }); } // src/library/shared/utils/fetch-linked-data.js import formats from "@rdfjs/formats-common"; import { isReadableStream } from "is-stream"; import jsonld from "jsonld"; import patchResponse from "nodeify-fetch/lib/patchResponse.browser.js"; var EXTENSION_MAPPING = { "json-ld": "application/ld+json", jsonld: "application/ld+json", json: "application/ld+json", nt: "application/n-triples", nq: "application/n-quads", n3: "text/n3", owl: "application/rdf+xml", rdf: "application/rdf+xml", xml: "application/rdf+xml", trig: "application/trig", turtle: "text/turtle", ttl: "text/turtle", html: "text/html", htm: "text/html" }; async function getQuads(url, preferredFormat = "text/turtle") { if (typeof url === "string" && url.startsWith("http")) { const parsers = formats.parsers; const otherFormats = Array.from(parsers.keys()).filter((k) => k !== preferredFormat).sort().reverse(); const res = await fetch(url, { headers: new Headers({ accept: [preferredFormat, ...otherFormats].join(", ") }) }); const type2 = res.headers.get("content-type").split(";")[0]; const extension = EXTENSION_MAPPING[url.split(".").slice(-1)[0]]; const guessedType = parsers.has(type2) ? type2 : parsers.has(extension) ? extension : void 0; if (type2 === "application/json" || guessedType === "application/ld+json") { const json = await res.json(); const quads = await jsonld.toRDF(json); return quads; } else if (guessedType) { let body = res.body; if (!isReadableStream(body)) { body = patchResponse(res).body; } const stream2 = parsers.import(guessedType, body, { baseIRI: url }); const quads = []; for await (const quad of stream2) { quads.push(quad); } return quads; } else { try { const json = JSON.parse(await res.text()); const quads = await jsonld.toRDF(json); return quads; } catch (err) { console.log(err); return Promise.reject(new Error(`unknown content type: ${type2}`)); } } } else { try { const json = typeof url === "string" ? JSON.parse(url) : url; const quads = await jsonld.toRDF(json); return quads; } catch (err) { return Promise.reject(new Error(`unknown content type: ${type}`)); } } } // src/library/shared/utils/sparql.js import jsonld2 from "jsonld"; import Papa from "papaparse"; jsonld2.documentLoader = async (documentUrl) => { const document = await fetch(documentUrl).then((r) => r.json()); return { contextUrl: null, document, documentUrl }; }; function fetchSparql(query, endpoint2, mimetype) { const body = new URLSearchParams({ query }); return fetch(endpoint2, { method: "POST", headers: { Accept: mimetype, "Content-Type": "application/x-www-form-urlencoded", "Content-Length": body.toString().length.toString() }, body }); } async function select(query, endpoint2) { const resp = await fetchSparql(query, endpoint2, "text/csv"); const text = await resp.text(); const { data } = Papa.parse(text, { header: true, skipEmptyLines: true, dynamicTyping: true }); return data || []; } async function update(updateQuery, endpoint2) { return fetch(endpoint2, { method: "POST", headers: { "Content-Type": "application/sparql-update" }, body: updateQuery }); } // src/library/shared/utils/named-graphs.js var QUERY = "SELECT DISTINCT ?g WHERE { GRAPH ?g { ?s ?p ?o . } }"; async function namedGraphs(endpoint2) { const graphs = await select(QUERY, endpoint2); return new Set(graphs.map((graph) => graph.g)); } // src/library/shared/utils/ensure-named-graphs.js async function ensureNamedGraphs(graphsToCheck, endpoint2) { const graphs = new Set(await namedGraphs(endpoint2)); let updateQuery = ""; for (const graphAndUrl of graphsToCheck) { const graph = graphAndUrl.split("@@")[0]; const url = graphAndUrl.split("@@").slice(-1)[0]; if (!graphs.has(graph)) { console.log((/* @__PURE__ */ new Date()).toISOString(), "Adding named graph:", graph); updateQuery += ` CLEAR GRAPH <${graph}>; LOAD <${url}> INTO GRAPH <${graph}>; `; graphs.add(graph); } } await update(updateQuery, endpoint2); return graphs; } // src/library/v1/queries/ds-graph-enrichment.rq var ds_graph_enrichment_default = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\nPREFIX rdfs: <http://www.w3\ .org/2000/01/rdf-schema#>\nPREFIX owl: <http://www.w3.org/2002/07/owl#>\nPREFIX ccf: <http://purl.org/ccf/>\nPREFIX HRA: <h\ ttps://purl.humanatlas.io/collection/hra-api>\nPREFIX DSGraphs: <https://purl.humanatlas.io/collection/ds-graphs>\nPREFIX \ DSGraphsExtra: <https://purl.humanatlas.io/graph/ds-graphs-enrichments>\nPREFIX has_characterizing_biomarker_set: <http:/\ /purl.obolibrary.org/obo/RO_0015004>\n\nINSERT {\n GRAPH DSGraphsExtra: {\n ?rui_location ccf:collides_with ?anatomical_\ structure ;\n ccf:collides_with ?as_parent ;\n ccf:collides_with_ct ?cell_type ;\n ccf:collides_with_bm ?bio\ marker .\n }\n}\nUSING HRA:\nUSING DSGraphs:\nUSING NAMED DSGraphsExtra:\nWHERE {\n {\n [] ccf:has_registration_location ?r\ ui_location .\n ?rui_location rdf:type ccf:SpatialEntity .\n\n FILTER NOT EXISTS {\n GRAPH DSGraphsExtra: {\n \ ?rui_location ccf:collides_with [] .\n }\n }\n }\n\n {\n ?rui_location ccf:collides_with ?anatomical_structure\ .\n }\n UNION\n {\n [] rdf:type ccf:SpatialPlacement ;\n ccf:placement_relative_to ?refOrgan ;\n ccf:plac\ ement_for ?rui_location .\n\n {\n ?refOrgan ccf:representation_of ?anatomical_structure .\n }\n UNION\n {\n \ ?refOrgan owl:sameAs [\n ccf:representation_of ?anatomical_structure ;\n ] .\n }\n }\n\n # Manually add pa\ ired organ parents\n OPTIONAL {\n VALUES (?as_parent ?anatomical_structure) {\n # Lymph Node\n (<http://purl.o\ bolibrary.org/obo/UBERON_0000029> <http://purl.obolibrary.org/obo/UBERON_0002509>)\n # Eye\n (<http://purl.oboli\ brary.org/obo/UBERON_0000970> <http://purl.obolibrary.org/obo/UBERON_0004548>)\n (<http://purl.obolibrary.org/obo/UB\ ERON_0000970> <http://purl.obolibrary.org/obo/UBERON_0004549>)\n # Fallopian Tube\n (<http://purl.obolibrary.org\ /obo/UBERON_0003889> <http://purl.obolibrary.org/obo/UBERON_0001303>)\n (<http://purl.obolibrary.org/obo/UBERON_0003\ 889> <http://purl.obolibrary.org/obo/UBERON_0001302>)\n # Kidney\n (<http://purl.obolibrary.org/obo/UBERON_00021\ 13> <http://purl.obolibrary.org/obo/UBERON_0004538>)\n (<http://purl.obolibrary.org/obo/UBERON_0002113> <http://purl\ .obolibrary.org/obo/UBERON_0004539>)\n # Knee\n (<http://purl.obolibrary.org/obo/UBERON_0001465> <http://purl.or\ g/sig/ont/fma/fma24978>)\n (<http://purl.obolibrary.org/obo/UBERON_0001465> <http://purl.org/sig/ont/fma/fma24977>)\n\ # Mammary Gland\n (<http://purl.obolibrary.org/obo/UBERON_0001911> <http://purl.org/sig/ont/fma/fma57991>)\n \ (<http://purl.obolibrary.org/obo/UBERON_0001911> <http://purl.org/sig/ont/fma/fma57987>)\n # Ovary\n (<http:/\ /purl.obolibrary.org/obo/UBERON_0000992> <http://purl.obolibrary.org/obo/UBERON_0002119>)\n (<http://purl.obolibrary\ .org/obo/UBERON_0000992> <http://purl.obolibrary.org/obo/UBERON_0002118>)\n # Palatine Tonsil\n (<http://purl.ob\ olibrary.org/obo/UBERON_0002373> <http://purl.org/sig/ont/fma/fma54974>)\n (<http://purl.obolibrary.org/obo/UBERON_0\ 002373> <http://purl.org/sig/ont/fma/fma54973>)\n # Renal Pelvis\n (<http://purl.obolibrary.org/obo/UBERON_00012\ 24> <http://purl.obolibrary.org/obo/UBERON_0018116>)\n (<http://purl.obolibrary.org/obo/UBERON_0001224> <http://purl\ .obolibrary.org/obo/UBERON_0018115>)\n # Ureter\n (<http://purl.obolibrary.org/obo/UBERON_0000056> <http://purl.\ obolibrary.org/obo/UBERON_0001223>)\n (<http://purl.obolibrary.org/obo/UBERON_0000056> <http://purl.obolibrary.org/o\ bo/UBERON_0001222>)\n # Lung (Edge case: we have reversed the relationship between lung and respiratory system for r\ easons)\n (<http://purl.obolibrary.org/obo/UBERON_0002048> <http://purl.obolibrary.org/obo/UBERON_0001004>)\n (<\ http://purl.obolibrary.org/obo/UBERON_0001004> <http://purl.obolibrary.org/obo/UBERON_0002048>)\n }\n \n OPTIONAL \ {\n ?parent_descriptor rdf:type ccf:CellMarkerDescriptor ;\n ccf:primary_anatomical_structure\ ?as_parent ;\n ccf:primary_cell_type ?cell_type .\n OPTIONAL {\n ?parent_descriptor cc\ f:biomarker ?biomarker .\n }\n }\n }\n\n OPTIONAL {\n ?descriptor rdf:type ccf:CellMarkerDescriptor ;\n \ ccf:primary_anatomical_structure ?anatomical_structure ;\n ccf:primary_cell_type ?cell_type .\n OPT\ IONAL {\n ?descriptor ccf:biomarker ?biomarker .\n }\n }\n}\n"; // src/library/v1/queries/update-dataset-info.rq var update_dataset_info_default = 'PREFIX hraApi: <urn:hra-api#>\nPREFIX schema: <http://schema.org/>\nPREFIX DSGraphs: <u\ rn:hra-api:TOKEN:ds-info>\n\nWITH DSGraphs:\nDELETE {\n DSGraphs: a hraApi:Dataset ;\n hraApi:status ?status ;\n hraApi\ :message ?message ;\n hraApi:updateTime ?updateTime .\n}\nINSERT {\n DSGraphs: a hraApi:Dataset ;\n hraApi:status ?new\ Status ;\n hraApi:message ?newMessage ;\n hraApi:updateTime ?newUpdateTime .\n}\nWHERE {\n OPTIONAL {\n DSGraphs: a \ hraApi:Dataset ;\n hraApi:status ?status ;\n hraApi:message ?message ;\n hraApi:updateTime ?updateTime .\n }\ \n\n BIND("{{STATUS}}" as ?newStatus)\n BIND("{{MESSAGE}}" as ?newMessage)\n BIND(NOW() as ?newUpdateTime)\n}\n'; // src/library/v1/utils/dataset-graph.js var DEFAULT_GRAPHS = [ "https://purl.humanatlas.io/collection/hra-api@@https://cdn.humanatlas.io/digital-objects/collection/hra-api/latest/gr\ aph.ttl", "https://purl.humanatlas.io/graph/hra-ccf-patches@@https://cdn.humanatlas.io/digital-objects/graph/hra-ccf-patches/lat\ est/graph.ttl", "https://purl.humanatlas.io/graph/hra-pop@@https://cdn.humanatlas.io/digital-objects/graph/hra-pop/latest/graph.ttl", "https://purl.humanatlas.io/collection/ds-graphs@@https://cdn.humanatlas.io/digital-objects/collection/ds-graphs/lates\ t/graph.ttl", "https://purl.humanatlas.io/graph/ds-graphs-enrichments@@https://cdn.humanatlas.io/digital-objects/graph/ds-graphs-enr\ ichments/latest/graph.ttl" ]; async function updateDatasetInfo(status, message, token2, endpoint2) { console.log((/* @__PURE__ */ new Date()).toISOString(), token2, status, message); const updateQuery = update_dataset_info_default.replace("urn:hra-api:TOKEN:ds-info", `urn:hra-api:${token2}:ds-info`). replace("{{STATUS}}", status).replace("{{MESSAGE}}", message); return update(updateQuery, endpoint2); } async function createDatasetGraph(token2, request2, endpoint2) { try { const graphs = await ensureNamedGraphs(DEFAULT_GRAPHS, endpoint2); const dsGraph = `urn:hra-api:${token2}:ds-graph`; const dsGraphEnrichments = `urn:hra-api:${token2}:ds-graph-enrichments`; if (!graphs.has(dsGraph)) { for (const source of request2.dataSources) { await updateDatasetInfo("Loading", `Adding dataset`, token2, endpoint2); const quads = await getQuads(source); await addToEndpoint(dsGraph, quads, endpoint2); } await updateDatasetInfo("Loading", `Enriching dataset`, token2, endpoint2); await enrichDatasetGraph(dsGraph, dsGraphEnrichments, endpoint2); } await updateDatasetInfo("Ready", `Dataset ready`, token2, endpoint2); } catch (err) { console.error("ERROR", token2, request2, endpoint2, err); await updateDatasetInfo("Error", `Error processing dataset`, token2, endpoint2); } } async function enrichDatasetGraph(dsGraph, dsGraphEnrichments, endpoint2) { const updateQuery = ds_graph_enrichment_default.replace("PREFIX DSGraphs: <https://purl.humanatlas.io/collection/ds-gr\ aphs>", `PREFIX DSGraphs: <${dsGraph}>`).replace( "PREFIX DSGraphsExtra: <https://purl.humanatlas.io/graph/ds-graphs-enrichments>", `PREFIX DSGraphsExtra: <${dsGraphEnrichments}>` ); const result = await update(updateQuery, endpoint2); if (!result.ok) { console.log("error enriching", dsGraph, "code:", result.status); console.error(await result.text()); } return result; } // src/server/create-dataset-graph.worker.js globalThis.fetch = fetch2; globalThis.Headers = Headers2; globalThis.Request = Request; globalThis.Response = Response; var { token, request, endpoint } = workerData; await createDatasetGraph(token, request, endpoint);