@mnakhla/rocketrml
Version:
This is a forked rocketrml mapper for the RDF mapping language
649 lines (624 loc) • 18.2 kB
JavaScript
const tags = require('language-tags');
const helper = require('./helper');
const { RR, RDF } = require('../helper/vocabulary');
const prefixhelper = require('../helper/prefixHelper');
const functionHelper = require('../function/function');
const XMLParser = require('./XMLParser');
const JSONParser = require('./JSONParser');
const CSVParser = require('./CSVParser');
const XMLParserCPP = require('./XmlParserCpp');
const FontoxpathParser = require('./FontoxpathParser');
const { getDataFromParser } = helper;
let count = 0;
const parseFile = async (
data,
currObject,
prefixes,
source,
iterator,
options,
ql,
) => {
count = 0;
let Parser;
switch (ql) {
case 'XPath':
if (
options &&
((options.xmlPerformanceMode && options.xmlPerformanceMode === true) ||
(options.xpathLib && options.xpathLib === 'pugixml'))
) {
Parser = new XMLParserCPP(source, iterator, options);
} else if (
options &&
options.xpathLib &&
options.xpathLib === 'fontoxpath'
) {
Parser = new FontoxpathParser(source, iterator, options);
} else {
Parser = new XMLParser(source, iterator, options);
}
break;
case 'JSONPath':
Parser = new JSONParser(source, iterator, options);
break;
case 'CSV':
Parser = new CSVParser(source, iterator, options);
break;
default:
throw `Cannot process: ${ql}`;
}
const result = await iterateFile(Parser, data, currObject, prefixes, options);
if (Parser.free) {
Parser.free();
}
return result;
};
/*
Parser: the parser object
data: the whole ttl mapfile in json
currObject: the current object from the mapfile that is parsed
prefixes: all prefixes,
options: the options,
ql: the querylanguage
*/
const writeParentPath = (Parser, index, parents, obj, options) => {
if (!obj.$parentPaths && parents.length > 0) {
obj.$parentPaths = {};
}
for (const parent of parents) {
if (!obj.$parentPaths[parent]) {
obj.$parentPaths[parent] = getDataFromParser(
Parser,
index,
parent,
options,
);
}
}
};
const iterateFile = async (Parser, data, currObject, prefixes, options) => {
const parents = [];
for (const d of data) {
if (
d.parentTriplesMap &&
d.parentTriplesMap['@id'] === currObject['@id'] &&
d.joinCondition
) {
const joinCondition = d.joinCondition;
const parentPaths = helper
.addArray(joinCondition)
.map(({ parent }) => parent);
parents.push(...parentPaths);
}
}
// get subjectmapping
const subjectMap = currObject.subjectMap;
if (!subjectMap || Array.isArray(subjectMap)) {
throw 'Error: exacltly one subjectMap needed!';
}
// get all possible things in subjectmap
let type;
if (subjectMap.class) {
if (Array.isArray(subjectMap.class)) {
type = [];
subjectMap.class.forEach((sm) => {
type.push(prefixhelper.replacePrefixWithURL(sm['@id'], prefixes));
});
} else {
type = prefixhelper.replacePrefixWithURL(
subjectMap.class['@id'],
prefixes,
);
}
}
const functionClassMap =
subjectMap.class && Object.keys(subjectMap.class).length > 1
? subjectMap.class
: undefined;
let result = [];
const iteratorNumber = Parser.getCount();
if (subjectMap.reference) {
for (let i = 0; i < iteratorNumber; i++) {
if (functionClassMap) {
type = await helper.subjFunctionExecution(
Parser,
functionClassMap,
prefixes,
data,
i,
options,
);
}
let obj = {_index: i};
count++;
let nodes = getDataFromParser(Parser, i, subjectMap.reference, options);
nodes = helper.addArray(nodes);
// eslint-disable-next-line no-loop-func
// needs to be done in sequence, since result.push() is done.
// for await () is bad practice when we use it with something other than an asynchronous iterator - https://stackoverflow.com/questions/59694309/for-await-of-vs-promise-all
for (let temp of nodes) {
if (type) {
obj['@type'] = type;
}
temp = helper.isURL(temp) ? temp : helper.addBase(temp, prefixes);
if (temp.indexOf(' ') === -1) {
obj['@id'] = temp;
obj = await doObjectMappings(
Parser,
i,
currObject,
data,
prefixes,
obj,
options,
);
if (!obj['@id']) {
obj['@id'] = `${currObject['@id']}_${count}`;
}
writeParentPath(Parser, i, parents, obj, options);
result.push(obj);
}
}
}
} else if (subjectMap.template) {
count++;
for (let i = 0; i < iteratorNumber; i++) {
if (functionClassMap) {
type = await helper.subjFunctionExecution(
Parser,
functionClassMap,
prefixes,
data,
i,
options,
);
}
let obj = {_index: i};
const ids = calculateTemplate(
Parser,
i,
subjectMap.template,
prefixes,
undefined,
options,
);
for (let id of ids) {
if (subjectMap.termType) {
const template = prefixhelper.replacePrefixWithURL(
subjectMap.termType['@id'],
prefixes,
);
switch (template) {
case RR.BlankNode:
id = `_:${id}`;
break;
case RR.IRI:
if (
(!subjectMap.template && !subjectMap.reference) ||
(subjectMap.template && subjectMap.reference)
) {
throw 'Must use exactly one of - rr:template and rr:reference in SubjectMap!';
}
if (!helper.isURL(id)) {
id = helper.addBase(id, prefixes);
}
break;
case RR.Literal:
break;
default:
throw `Don't know: ${subjectMap.termType['@id']}`;
}
}
obj['@id'] = id;
if (type) {
obj['@type'] = type;
}
obj = await doObjectMappings(
Parser,
i,
currObject,
data,
prefixes,
obj,
options,
);
if (!obj['@id']) {
obj['@id'] = `${currObject['@id']}_${count}`;
}
writeParentPath(Parser, i, parents, obj, options);
result.push(obj);
}
}
} else if (subjectMap.functionValue) {
for (let i = 0; i < iteratorNumber; i++) {
count++;
let obj = {_index: i};
const subjVal = await helper.subjFunctionExecution(
Parser,
subjectMap,
prefixes,
data,
i,
options,
);
obj['@id'] = subjVal;
if (type) {
obj['@type'] = type;
}
obj = await doObjectMappings(
Parser,
i,
currObject,
data,
prefixes,
obj,
options,
);
writeParentPath(Parser, i, parents, obj, options);
result.push(obj);
}
} else if (
subjectMap.constant ||
(subjectMap.termType &&
prefixhelper.replacePrefixWithURL(
subjectMap.termType['@id'],
prefixes,
) === RR.BlankNode)
) {
// BlankNode with no template or id
for (let i = 0; i < iteratorNumber; i++) {
if (functionClassMap) {
type = await helper.subjFunctionExecution(
Parser,
functionClassMap,
prefixes,
data,
i,
options,
);
}
count++;
let obj = {_index: i};
if (subjectMap.constant) {
obj['@id'] = helper.getConstant(subjectMap.constant, prefixes);
}
if (type) {
obj['@type'] = type;
}
obj = await doObjectMappings(
Parser,
i,
currObject,
data,
prefixes,
obj,
options,
);
if (!obj['@id']) {
obj['@id'] = `_:${encodeURIComponent(`${currObject['@id']}_${count}`)}`;
}
writeParentPath(Parser, i, parents, obj, options);
result.push(obj);
}
} else {
throw new Error('Unsupported subjectmap');
}
result = helper.cutArray(result);
return result;
};
const doObjectMappings = async (
Parser,
index,
currObject,
data,
prefixes,
obj,
options,
) => {
if (currObject.predicateObjectMap) {
let objectMapArray = currObject.predicateObjectMap;
objectMapArray = helper.addArray(objectMapArray);
for (const mapping of objectMapArray) {
const predicate = helper.getPredicate(mapping, prefixes, data);
if (Array.isArray(predicate)) {
for (const p of predicate) {
await handleSingleMapping(
Parser,
index,
obj,
mapping,
p,
prefixes,
data,
options,
);
}
} else {
await handleSingleMapping(
Parser,
index,
obj,
mapping,
predicate,
prefixes,
data,
options,
);
}
}
}
obj = helper.cutArray(obj);
return obj;
};
const useLanguageMap = (Parser, index, termMap, prefixes, options) => {
if (termMap.constant) {
return termMap.constant;
}
if (termMap.reference) {
const vals = getDataFromParser(Parser, index, termMap.reference, options);
return helper.addArray(vals)[0];
}
if (termMap.template) {
const temp = calculateTemplate(
Parser,
index,
termMap.template,
prefixes,
undefined,
options,
);
return helper.addArray(temp)[0];
}
throw new Error('TermMap has neither constant, reference or template');
};
const handleSingleMapping = async (
Parser,
index,
obj,
mapping,
predicate,
prefixes,
data,
options,
) => {
predicate = prefixhelper.replacePrefixWithURL(predicate, prefixes);
let object;
if (mapping.object) {
object = {
'@id': prefixhelper.replacePrefixWithURL(mapping.object['@id'], prefixes),
};
}
const objectmaps = [];
if (mapping.objectMap) {
if (Array.isArray(mapping.objectMap)) {
for (const t of mapping.objectMap) {
objectmaps.push(t);
}
} else {
objectmaps.push(mapping.objectMap);
}
}
if (object) {
helper.addToObj(obj, predicate, object);
} else {
await Promise.all(
objectmaps.map(async (objectmap) => {
const reference = objectmap.reference;
let constant = objectmap.constant;
let language = objectmap.language;
const datatype = helper.isURL(objectmap.datatype)
? objectmap.datatype
: prefixhelper.replacePrefixWithURL(objectmap.datatype, prefixes);
const template = objectmap.template;
let termtype = objectmap.termType;
if (objectmap.languageMap) {
language = useLanguageMap(
Parser,
index,
objectmap.languageMap,
prefixes,
options,
);
}
if (language) {
if (!tags(language).valid()) {
throw `Language tag: ${language} invalid!`;
}
}
const functionValue = objectmap.functionValue;
if (template) {
// we have a template definition
const temp = calculateTemplate(
Parser,
index,
template,
prefixes,
termtype,
options,
);
temp.forEach((t) => {
if (termtype) {
termtype = prefixhelper.replacePrefixWithURL(termtype, prefixes);
switch (termtype) {
case RR.BlankNode:
t = {
'@id': `_:${t}`,
};
break;
case RR.IRI:
if (!helper.isURL(t)) {
t = {
'@id': helper.addBase(t, prefixes),
};
} else {
t = {
'@id': t,
};
}
break;
case RR.Literal:
break;
default:
throw `Don't know: ${termtype['@id']}`;
}
} else {
t = {
'@id': t,
};
}
t = helper.cutArray(t);
helper.setObjPredicate(obj, predicate, t, language, datatype);
});
} else if (reference) {
// we have a reference definition
let ns = getDataFromParser(Parser, index, reference, options);
let arr = [];
ns = helper.addArray(ns);
ns.forEach((n) => {
arr.push(n);
});
if (
prefixhelper.replacePrefixWithURL(termtype, prefixes) === RR.IRI
) {
arr = arr.map((val) => {
if (!helper.isURL(val)) {
return {
'@id': helper.addBase(val, prefixes),
};
}
return {
'@id': val,
};
});
}
if (arr && arr.length > 0) {
arr = helper.cutArray(arr);
helper.setObjPredicate(obj, predicate, arr, language, datatype);
}
} else if (constant) {
// we have a constant definition
constant = helper.cutArray(constant);
constant = helper.getConstant(constant, prefixes);
if (
prefixhelper.replacePrefixWithURL(predicate, prefixes) !==
RDF.type &&
termtype &&
prefixhelper.replacePrefixWithURL(termtype, prefixes) === RR.IRI
) {
if (!helper.isURL(constant)) {
constant = {
'@id': helper.addBase(constant, prefixes),
};
} else {
constant = {
'@id': constant,
};
}
}
helper.setObjPredicate(obj, predicate, constant, language, datatype);
} else if (
objectmap.parentTriplesMap &&
objectmap.parentTriplesMap['@id']
) {
// we have a parentTriplesmap
if (!obj.$parentTriplesMap) {
obj.$parentTriplesMap = {};
}
if (objectmap.joinCondition) {
const joinConditions = helper.addArray(objectmap.joinCondition);
if (!obj.$parentTriplesMap[predicate]) {
obj.$parentTriplesMap[predicate] = [];
}
obj.$parentTriplesMap[predicate].push({
joinCondition: joinConditions.map((cond) => ({
parentPath: cond.parent,
child: getDataFromParser(Parser, index, cond.child, options),
})),
mapID: objectmap['@id'],
});
} else if (obj.$parentTriplesMap[predicate]) {
obj.$parentTriplesMap[predicate].push({
mapID: objectmap['@id'],
});
} else {
obj.$parentTriplesMap[predicate] = [];
obj.$parentTriplesMap[predicate].push({
mapID: objectmap['@id'],
});
}
} else if (functionValue) {
const definition = functionHelper.findDefinition(
data,
functionValue.predicateObjectMap,
prefixes,
);
const parameters = functionHelper.findParameters(
data,
functionValue.predicateObjectMap,
prefixes,
);
const calcParameters = await helper.calculateParams(
Parser,
parameters,
index,
options,
data,
prefixes,
);
const result = await functionHelper.executeFunction(
definition,
calcParameters,
options,
);
helper.setObjPredicate(obj, predicate, result, language, datatype);
}
}),
);
}
};
const calculateTemplate = (
Parser,
index,
template,
prefixes,
termType,
options,
) => {
if (termType) {
termType = prefixhelper.replacePrefixWithURL(termType, prefixes);
}
const beg = helper.locations('{', template);
const end = helper.locations('}', template);
const words = [];
const toInsert = [];
const templates = [];
if (beg.length === 0 || beg.length !== end.length) {
return [template];
}
for (const i in beg) {
words.push(template.substr(beg[i] + 1, end[i] - beg[i] - 1));
}
words.forEach((w) => {
const temp = helper.addArray(getDataFromParser(Parser, index, w, options));
toInsert.push(temp);
});
const allComb = helper.allPossibleCases(toInsert);
for (const combin in allComb) {
let finTemp = template;
for (const found in allComb[combin]) {
if (!termType || termType !== RR.Literal) {
allComb[combin][found] = helper.toURIComponent(allComb[combin][found]);
}
finTemp = finTemp.replace(`{${words[found]}}`, allComb[combin][found]);
}
templates.push(finTemp);
}
for (const t in templates) {
templates[t] = helper.replaceEscapedChar(
prefixhelper.replacePrefixWithURL(templates[t], prefixes),
);
}
return templates;
};
module.exports.parseFile = parseFile;