hra-api
Version:
The Human Reference Atlas (HRA) API deployed to https://apps.humanatlas.io/api/
277 lines (267 loc) • 13.8 kB
JavaScript
// src/server/create-dataset-graph.worker.js
import fetch2, { Headers as Headers2, Request, Response } from "node-fetch";
import { workerData } from "worker_threads";
// src/library/shared/utils/add-to-endpoint.js
import toNT from "@rdfjs/to-ntriples";
import stream from "stream-browserify";
function toTripleString(quad) {
const subject = toNT(quad.subject).replace("_:_:", "_:");
const predicate = toNT(quad.predicate).replace("_:_:", "_:");
const object = toNT(quad.object).replace("_:_:", "_:");
return `${subject} ${predicate} ${object} .
`;
}
function* sparqlUpdateIterator(graph, quads) {
yield `
INSERT DATA {
GRAPH <${graph}> {
`;
for (const quad of quads) {
yield toTripleString(quad);
}
yield "}}\n";
}
async function addToEndpoint(graph, quads, endpoint2) {
return fetch(endpoint2, {
method: "POST",
headers: {
"Content-Type": "application/sparql-update"
},
body: stream.Readable.from(sparqlUpdateIterator(graph, quads))
});
}
// src/library/shared/utils/fetch-linked-data.js
import formats from "@rdfjs/formats-common";
import { isReadableStream } from "is-stream";
import jsonld from "jsonld";
import patchResponse from "nodeify-fetch/lib/patchResponse.browser.js";
var EXTENSION_MAPPING = {
"json-ld": "application/ld+json",
jsonld: "application/ld+json",
json: "application/ld+json",
nt: "application/n-triples",
nq: "application/n-quads",
n3: "text/n3",
owl: "application/rdf+xml",
rdf: "application/rdf+xml",
xml: "application/rdf+xml",
trig: "application/trig",
turtle: "text/turtle",
ttl: "text/turtle",
html: "text/html",
htm: "text/html"
};
async function getQuads(url, preferredFormat = "text/turtle") {
if (typeof url === "string" && url.startsWith("http")) {
const parsers = formats.parsers;
const otherFormats = Array.from(parsers.keys()).filter((k) => k !== preferredFormat).sort().reverse();
const res = await fetch(url, {
headers: new Headers({
accept: [preferredFormat, ...otherFormats].join(", ")
})
});
const type2 = res.headers.get("content-type").split(";")[0];
const extension = EXTENSION_MAPPING[url.split(".").slice(-1)[0]];
const guessedType = parsers.has(type2) ? type2 : parsers.has(extension) ? extension : void 0;
if (type2 === "application/json" || guessedType === "application/ld+json") {
const json = await res.json();
const quads = await jsonld.toRDF(json);
return quads;
} else if (guessedType) {
let body = res.body;
if (!isReadableStream(body)) {
body = patchResponse(res).body;
}
const stream2 = parsers.import(guessedType, body, { baseIRI: url });
const quads = [];
for await (const quad of stream2) {
quads.push(quad);
}
return quads;
} else {
try {
const json = JSON.parse(await res.text());
const quads = await jsonld.toRDF(json);
return quads;
} catch (err) {
console.log(err);
return Promise.reject(new Error(`unknown content type: ${type2}`));
}
}
} else {
try {
const json = typeof url === "string" ? JSON.parse(url) : url;
const quads = await jsonld.toRDF(json);
return quads;
} catch (err) {
return Promise.reject(new Error(`unknown content type: ${type}`));
}
}
}
// src/library/shared/utils/sparql.js
import jsonld2 from "jsonld";
import Papa from "papaparse";
jsonld2.documentLoader = async (documentUrl) => {
const document = await fetch(documentUrl).then((r) => r.json());
return {
contextUrl: null,
document,
documentUrl
};
};
function fetchSparql(query, endpoint2, mimetype) {
const body = new URLSearchParams({ query });
return fetch(endpoint2, {
method: "POST",
headers: {
Accept: mimetype,
"Content-Type": "application/x-www-form-urlencoded",
"Content-Length": body.toString().length.toString()
},
body
});
}
async function select(query, endpoint2) {
const resp = await fetchSparql(query, endpoint2, "text/csv");
const text = await resp.text();
const { data } = Papa.parse(text, { header: true, skipEmptyLines: true, dynamicTyping: true });
return data || [];
}
async function update(updateQuery, endpoint2) {
return fetch(endpoint2, {
method: "POST",
headers: {
"Content-Type": "application/sparql-update"
},
body: updateQuery
});
}
// src/library/shared/utils/named-graphs.js
var QUERY = "SELECT DISTINCT ?g WHERE { GRAPH ?g { ?s ?p ?o . } }";
async function namedGraphs(endpoint2) {
const graphs = await select(QUERY, endpoint2);
return new Set(graphs.map((graph) => graph.g));
}
// src/library/shared/utils/ensure-named-graphs.js
async function ensureNamedGraphs(graphsToCheck, endpoint2) {
const graphs = new Set(await namedGraphs(endpoint2));
let updateQuery = "";
for (const graphAndUrl of graphsToCheck) {
const graph = graphAndUrl.split("@@")[0];
const url = graphAndUrl.split("@@").slice(-1)[0];
if (!graphs.has(graph)) {
console.log((/* @__PURE__ */ new Date()).toISOString(), "Adding named graph:", graph);
updateQuery += `
CLEAR GRAPH <${graph}>;
LOAD <${url}> INTO GRAPH <${graph}>;
`;
graphs.add(graph);
}
}
await update(updateQuery, endpoint2);
return graphs;
}
// src/library/v1/queries/ds-graph-enrichment.rq
var ds_graph_enrichment_default = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\nPREFIX rdfs: <http://www.w3\
.org/2000/01/rdf-schema#>\nPREFIX owl: <http://www.w3.org/2002/07/owl#>\nPREFIX ccf: <http://purl.org/ccf/>\nPREFIX HRA: <h\
ttps://purl.humanatlas.io/collection/hra-api>\nPREFIX DSGraphs: <https://purl.humanatlas.io/collection/ds-graphs>\nPREFIX \
DSGraphsExtra: <https://purl.humanatlas.io/graph/ds-graphs-enrichments>\nPREFIX has_characterizing_biomarker_set: <http:/\
/purl.obolibrary.org/obo/RO_0015004>\n\nINSERT {\n GRAPH DSGraphsExtra: {\n ?rui_location ccf:collides_with ?anatomical_\
structure ;\n ccf:collides_with ?as_parent ;\n ccf:collides_with_ct ?cell_type ;\n ccf:collides_with_bm ?bio\
marker .\n }\n}\nUSING HRA:\nUSING DSGraphs:\nUSING NAMED DSGraphsExtra:\nWHERE {\n {\n [] ccf:has_registration_location ?r\
ui_location .\n ?rui_location rdf:type ccf:SpatialEntity .\n\n FILTER NOT EXISTS {\n GRAPH DSGraphsExtra: {\n \
?rui_location ccf:collides_with [] .\n }\n }\n }\n\n {\n ?rui_location ccf:collides_with ?anatomical_structure\
.\n }\n UNION\n {\n [] rdf:type ccf:SpatialPlacement ;\n ccf:placement_relative_to ?refOrgan ;\n ccf:plac\
ement_for ?rui_location .\n\n {\n ?refOrgan ccf:representation_of ?anatomical_structure .\n }\n UNION\n {\n \
?refOrgan owl:sameAs [\n ccf:representation_of ?anatomical_structure ;\n ] .\n }\n }\n\n # Manually add pa\
ired organ parents\n OPTIONAL {\n VALUES (?as_parent ?anatomical_structure) {\n # Lymph Node\n (<http://purl.o\
bolibrary.org/obo/UBERON_0000029> <http://purl.obolibrary.org/obo/UBERON_0002509>)\n # Eye\n (<http://purl.oboli\
brary.org/obo/UBERON_0000970> <http://purl.obolibrary.org/obo/UBERON_0004548>)\n (<http://purl.obolibrary.org/obo/UB\
ERON_0000970> <http://purl.obolibrary.org/obo/UBERON_0004549>)\n # Fallopian Tube\n (<http://purl.obolibrary.org\
/obo/UBERON_0003889> <http://purl.obolibrary.org/obo/UBERON_0001303>)\n (<http://purl.obolibrary.org/obo/UBERON_0003\
889> <http://purl.obolibrary.org/obo/UBERON_0001302>)\n # Kidney\n (<http://purl.obolibrary.org/obo/UBERON_00021\
13> <http://purl.obolibrary.org/obo/UBERON_0004538>)\n (<http://purl.obolibrary.org/obo/UBERON_0002113> <http://purl\
.obolibrary.org/obo/UBERON_0004539>)\n # Knee\n (<http://purl.obolibrary.org/obo/UBERON_0001465> <http://purl.or\
g/sig/ont/fma/fma24978>)\n (<http://purl.obolibrary.org/obo/UBERON_0001465> <http://purl.org/sig/ont/fma/fma24977>)\n\
# Mammary Gland\n (<http://purl.obolibrary.org/obo/UBERON_0001911> <http://purl.org/sig/ont/fma/fma57991>)\n \
(<http://purl.obolibrary.org/obo/UBERON_0001911> <http://purl.org/sig/ont/fma/fma57987>)\n # Ovary\n (<http:/\
/purl.obolibrary.org/obo/UBERON_0000992> <http://purl.obolibrary.org/obo/UBERON_0002119>)\n (<http://purl.obolibrary\
.org/obo/UBERON_0000992> <http://purl.obolibrary.org/obo/UBERON_0002118>)\n # Palatine Tonsil\n (<http://purl.ob\
olibrary.org/obo/UBERON_0002373> <http://purl.org/sig/ont/fma/fma54974>)\n (<http://purl.obolibrary.org/obo/UBERON_0\
002373> <http://purl.org/sig/ont/fma/fma54973>)\n # Renal Pelvis\n (<http://purl.obolibrary.org/obo/UBERON_00012\
24> <http://purl.obolibrary.org/obo/UBERON_0018116>)\n (<http://purl.obolibrary.org/obo/UBERON_0001224> <http://purl\
.obolibrary.org/obo/UBERON_0018115>)\n # Ureter\n (<http://purl.obolibrary.org/obo/UBERON_0000056> <http://purl.\
obolibrary.org/obo/UBERON_0001223>)\n (<http://purl.obolibrary.org/obo/UBERON_0000056> <http://purl.obolibrary.org/o\
bo/UBERON_0001222>)\n # Lung (Edge case: we have reversed the relationship between lung and respiratory system for r\
easons)\n (<http://purl.obolibrary.org/obo/UBERON_0002048> <http://purl.obolibrary.org/obo/UBERON_0001004>)\n (<\
http://purl.obolibrary.org/obo/UBERON_0001004> <http://purl.obolibrary.org/obo/UBERON_0002048>)\n }\n \n OPTIONAL \
{\n ?parent_descriptor rdf:type ccf:CellMarkerDescriptor ;\n ccf:primary_anatomical_structure\
?as_parent ;\n ccf:primary_cell_type ?cell_type .\n OPTIONAL {\n ?parent_descriptor cc\
f:biomarker ?biomarker .\n }\n }\n }\n\n OPTIONAL {\n ?descriptor rdf:type ccf:CellMarkerDescriptor ;\n \
ccf:primary_anatomical_structure ?anatomical_structure ;\n ccf:primary_cell_type ?cell_type .\n OPT\
IONAL {\n ?descriptor ccf:biomarker ?biomarker .\n }\n }\n}\n";
// src/library/v1/queries/update-dataset-info.rq
var update_dataset_info_default = 'PREFIX hraApi: <urn:hra-api#>\nPREFIX schema: <http://schema.org/>\nPREFIX DSGraphs: <u\
rn:hra-api:TOKEN:ds-info>\n\nWITH DSGraphs:\nDELETE {\n DSGraphs: a hraApi:Dataset ;\n hraApi:status ?status ;\n hraApi\
:message ?message ;\n hraApi:updateTime ?updateTime .\n}\nINSERT {\n DSGraphs: a hraApi:Dataset ;\n hraApi:status ?new\
Status ;\n hraApi:message ?newMessage ;\n hraApi:updateTime ?newUpdateTime .\n}\nWHERE {\n OPTIONAL {\n DSGraphs: a \
hraApi:Dataset ;\n hraApi:status ?status ;\n hraApi:message ?message ;\n hraApi:updateTime ?updateTime .\n }\
\n\n BIND("{{STATUS}}" as ?newStatus)\n BIND("{{MESSAGE}}" as ?newMessage)\n BIND(NOW() as ?newUpdateTime)\n}\n';
// src/library/v1/utils/dataset-graph.js
var DEFAULT_GRAPHS = [
"https://purl.humanatlas.io/collection/hra-api@@https://cdn.humanatlas.io/digital-objects/collection/hra-api/latest/gr\
aph.ttl",
"https://purl.humanatlas.io/graph/hra-ccf-patches@@https://cdn.humanatlas.io/digital-objects/graph/hra-ccf-patches/lat\
est/graph.ttl",
"https://purl.humanatlas.io/graph/hra-pop@@https://cdn.humanatlas.io/digital-objects/graph/hra-pop/latest/graph.ttl",
"https://purl.humanatlas.io/collection/ds-graphs@@https://cdn.humanatlas.io/digital-objects/collection/ds-graphs/lates\
t/graph.ttl",
"https://purl.humanatlas.io/graph/ds-graphs-enrichments@@https://cdn.humanatlas.io/digital-objects/graph/ds-graphs-enr\
ichments/latest/graph.ttl"
];
async function updateDatasetInfo(status, message, token2, endpoint2) {
console.log((/* @__PURE__ */ new Date()).toISOString(), token2, status, message);
const updateQuery = update_dataset_info_default.replace("urn:hra-api:TOKEN:ds-info", `urn:hra-api:${token2}:ds-info`).
replace("{{STATUS}}", status).replace("{{MESSAGE}}", message);
return update(updateQuery, endpoint2);
}
async function createDatasetGraph(token2, request2, endpoint2) {
try {
const graphs = await ensureNamedGraphs(DEFAULT_GRAPHS, endpoint2);
const dsGraph = `urn:hra-api:${token2}:ds-graph`;
const dsGraphEnrichments = `urn:hra-api:${token2}:ds-graph-enrichments`;
if (!graphs.has(dsGraph)) {
for (const source of request2.dataSources) {
await updateDatasetInfo("Loading", `Adding dataset`, token2, endpoint2);
const quads = await getQuads(source);
await addToEndpoint(dsGraph, quads, endpoint2);
}
await updateDatasetInfo("Loading", `Enriching dataset`, token2, endpoint2);
await enrichDatasetGraph(dsGraph, dsGraphEnrichments, endpoint2);
}
await updateDatasetInfo("Ready", `Dataset ready`, token2, endpoint2);
} catch (err) {
console.error("ERROR", token2, request2, endpoint2, err);
await updateDatasetInfo("Error", `Error processing dataset`, token2, endpoint2);
}
}
async function enrichDatasetGraph(dsGraph, dsGraphEnrichments, endpoint2) {
const updateQuery = ds_graph_enrichment_default.replace("PREFIX DSGraphs: <https://purl.humanatlas.io/collection/ds-gr\
aphs>", `PREFIX DSGraphs: <${dsGraph}>`).replace(
"PREFIX DSGraphsExtra: <https://purl.humanatlas.io/graph/ds-graphs-enrichments>",
`PREFIX DSGraphsExtra: <${dsGraphEnrichments}>`
);
const result = await update(updateQuery, endpoint2);
if (!result.ok) {
console.log("error enriching", dsGraph, "code:", result.status);
console.error(await result.text());
}
return result;
}
// src/server/create-dataset-graph.worker.js
globalThis.fetch = fetch2;
globalThis.Headers = Headers2;
globalThis.Request = Request;
globalThis.Response = Response;
var { token, request, endpoint } = workerData;
await createDatasetGraph(token, request, endpoint);