rdf-string-ttl
Version:
Convenience functions for creating and serializing RDF terms and quads following Turtle/SPARQL syntax
246 lines • 10.5 kB
JavaScript
Object.defineProperty(exports, "__esModule", { value: true });
exports.termToString = termToString;
exports.getLiteralValue = getLiteralValue;
exports.getLiteralType = getLiteralType;
exports.getLiteralLanguage = getLiteralLanguage;
exports.getLiteralDirection = getLiteralDirection;
exports.stringToTerm = stringToTerm;
exports.quadToStringQuad = quadToStringQuad;
exports.stringQuadToQuad = stringQuadToQuad;
const rdf_data_factory_1 = require("rdf-data-factory");
const FACTORY = new rdf_data_factory_1.DataFactory();
/**
* Utility methods for converting between string-based RDF representations and RDFJS objects.
*
* RDF Terms are represented as follows:
* * Blank nodes: '_:myBlankNode'
* * Variables: '_myVariable'
* * Literals: '"myString"', '"myLanguageString"@en-us', '"myLanguageString"@en-us--ltr',
* '"<p>e</p>"^^<http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML>'
* * URIs: '<http://example.org>'
*
* Quads/triples are represented as hashes with 'subject', 'predicate', 'object' and 'graph' (optional)
* as keys, and string-based RDF terms as values.
*/
/**
* Convert an RDFJS term to a string-based representation.
* @param {RDF.Term} term An RDFJS term.
* @return {string} A string-based term representation.
*/
function termToString(term) {
// TODO: remove nasty any casts when this TS bug has been fixed: https://github.com/microsoft/TypeScript/issues/26933
if (!term) {
return undefined;
}
switch (term.termType) {
case 'NamedNode': return (`<${escapeIRI(term.value)}>`);
case 'BlankNode': return (`_:${term.value}`);
case 'Literal': {
const literalValue = term;
return (`"${escapeStringRDF(literalValue.value)}"${literalValue.datatype &&
literalValue.datatype.value !== 'http://www.w3.org/2001/XMLSchema#string' &&
literalValue.datatype.value !== 'http://www.w3.org/1999/02/22-rdf-syntax-ns#langString' &&
literalValue.datatype.value !== 'http://www.w3.org/1999/02/22-rdf-syntax-ns#dirLangString' ?
`^^<${literalValue.datatype.value}>` :
''}${literalValue.language ? `@${literalValue.language}` : ''}${literalValue.direction ? `--${literalValue.direction}` : ''}`);
}
case 'Quad': return (`<<${termToString(term.subject)} ${termToString(term.predicate)} ${termToString(term.object)}${term.graph.termType === 'DefaultGraph' ? '' : ` ${termToString(term.graph)}`}>>`);
case 'Variable': return (`?${term.value}`);
case 'DefaultGraph': return term.value;
}
}
/**
* Get the string value of a literal.
* @param {string} literalValue An RDF literal enclosed by '"'.
* @return {string} The literal value inside the '"'.
*/
function getLiteralValue(literalValue) {
const match = /^"([^]*)"((\^\^.*)|(@.*))?$/u.exec(literalValue);
if (!match) {
throw new Error(`${literalValue} is not a literal`);
}
// eslint-disable-next-line unicorn/prefer-string-replace-all
return match[1].replace(/\\"/ug, '"');
}
/**
* Get the datatype of the given literal.
* @param {string} literalValue An RDF literal.
* @return {string} The datatype of the literal.
*/
function getLiteralType(literalValue) {
const match = /^"[^]*"(?:\^\^<([^"]+)>|(@)[^@"]+)?$/u.exec(literalValue);
if (!match) {
throw new Error(`${literalValue} is not a literal`);
}
return match[1] || (match[2] ?
'http://www.w3.org/1999/02/22-rdf-syntax-ns#langString' :
'http://www.w3.org/2001/XMLSchema#string');
}
/**
* Get the language of the given literal.
* @param {string} literalValue An RDF literal.
* @return {string} The language of the literal.
*/
function getLiteralLanguage(literalValue) {
const match = /^"[^]*"(?:@([^@"]+)|\^\^[^"]+)?$/u.exec(literalValue);
if (!match) {
throw new Error(`${literalValue} is not a literal`);
}
if (match[1]) {
let ret = match[1].toLowerCase();
// Remove everything after --, since this indicates the base direction, which will be parsed later.
const doubleDashPos = ret.indexOf('--');
if (doubleDashPos >= 0) {
ret = ret.slice(0, doubleDashPos);
}
return ret;
}
return '';
}
/**
* Get the direction of the given literal.
* @param {string} literalValue An RDF literal.
* @return {string} The direction of the literal.
*/
function getLiteralDirection(literalValue) {
const doubleDashPos = literalValue.indexOf('--', literalValue.lastIndexOf('"'));
if (doubleDashPos >= 0) {
const direction = literalValue.slice(doubleDashPos + 2, literalValue.length);
if (direction === 'ltr' || direction === 'rtl') {
return direction;
}
throw new Error(`${literalValue} is not a literal with a valid direction`);
}
return '';
}
/**
* Transform a string-based RDF term to an RDFJS term.
* @param {string} value A string-based RDF-term.
* @param {RDF.DataFactory} dataFactory An optional datafactory to create terms with.
* @return {RDF.Term} An RDF-JS term.
*/
function stringToTerm(value, dataFactory) {
dataFactory = dataFactory || FACTORY;
if (!value || (value.length === 0)) {
return dataFactory.defaultGraph();
}
switch (value[0]) {
case '_': return dataFactory.blankNode(value.slice(2));
case '?':
if (!dataFactory.variable) {
throw new Error(`Missing 'variable()' method on the given DataFactory`);
}
return dataFactory.variable(value.slice(1));
case '"': {
const language = getLiteralLanguage(value);
const direction = getLiteralDirection(value);
const type = dataFactory.namedNode(getLiteralType(value));
return dataFactory.literal(getLiteralValue(value), language ? { language, direction } : type);
}
case '<':
default:
if (value.startsWith('<<') && value.endsWith('>>')) {
// Iterate character-by-character to detect spaces that are *not* wrapped in <<>>
const terms = value.slice(2, -2);
const stringTerms = [];
let ignoreTags = 0;
let lastIndex = 0;
for (let i = 0; i < terms.length; i++) {
const char = terms[i];
if (char === '<') {
ignoreTags++;
}
if (char === '>') {
if (ignoreTags === 0) {
throw new Error(`Found closing tag without opening tag in ${value}`);
}
else {
ignoreTags--;
}
}
if (char === ' ' && ignoreTags === 0) {
stringTerms.push(terms.slice(lastIndex, i));
lastIndex = i + 1;
}
}
if (ignoreTags !== 0) {
throw new Error(`Found opening tag without closing tag in ${value}`);
}
stringTerms.push(terms.slice(lastIndex, terms.length));
// We require 3 or 4 components
if (stringTerms.length !== 3 && stringTerms.length !== 4) {
throw new Error(`Nested quad syntax error ${value}`);
}
return dataFactory.quad(stringToTerm(stringTerms[0]), stringToTerm(stringTerms[1]), stringToTerm(stringTerms[2]), stringTerms[3] ? stringToTerm(stringTerms[3]) : undefined);
}
if (!value.startsWith('<') || !value.endsWith('>')) {
throw new Error(`Detected invalid iri for named node (must be wrapped in <>): ${value}`);
}
return dataFactory.namedNode(value.slice(1, -1));
}
}
/**
* Convert an RDFJS quad to a string-based quad representation.
* @param {Quad} q An RDFJS quad.
* @return {IStringQuad} A hash with string-based quad terms.
* @template Q The type of quad, defaults to RDF.Quad.
*/
function quadToStringQuad(quad) {
return {
subject: termToString(quad.subject),
predicate: termToString(quad.predicate),
object: termToString(quad.object),
graph: termToString(quad.graph),
};
}
/**
* Convert a string-based quad representation to an RDFJS quad.
* @param {IStringQuad} stringQuad A hash with string-based quad terms.
* @param {RDF.DataFactory} dataFactory An optional datafactory to create terms with.
* @return {Q} An RDFJS quad.
* @template Q The type of quad, defaults to RDF.Quad.
*/
function stringQuadToQuad(stringQuad, dataFactory) {
dataFactory = dataFactory || FACTORY;
return dataFactory.quad(stringToTerm(stringQuad.subject, dataFactory), stringToTerm(stringQuad.predicate, dataFactory), stringToTerm(stringQuad.object, dataFactory), stringToTerm(stringQuad.graph, dataFactory));
}
function escapeIRI(iriValue) {
return iriValue.replace(escapePattern, replaceEscapedCharacter);
}
function escapeStringRDF(stringValue) {
if (escapePattern.test(stringValue)) {
stringValue = stringValue.replace(escapePattern, replaceEscapedCharacter);
}
return stringValue;
}
// Characters in literals and IRIs that require escaping
// Also containing potential surrogate pairs
/* eslint-disable require-unicode-regexp */ /* eslint-disable unicorn/better-regex */
const escapePattern = /["\\\t\n\r\b\f\u0000-\u0019]|[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
/* eslint-enable require-unicode-regexp */ /* eslint-enable unicorn/better-regex */
const escapes = new Map([
['\\', '\\\\'],
['"', '\\"'],
['\t', '\\t'],
['\n', '\\n'],
['\r', '\\r'],
['\b', '\\b'],
['\f', '\\f'],
]);
function replaceEscapedCharacter(character) {
// Try simplest case first, get replacement for character
const result = escapes.get(character);
if (!result) {
if (character.length === 1) {
// Single unicode charachters, i.e. not a surrogate pair
const code = character.charCodeAt(0).toString(16);
return `${'\\u0000'.slice(0, -code.length)}${code}`;
}
// Surrogate pairs
const code = ((character.charCodeAt(0) - 55296) * 1024 + character.charCodeAt(1) + 9216).toString(16);
return `${'\\U00000000'.slice(0, -code.length)}${code}`;
}
return result;
}
//# sourceMappingURL=TermUtil.js.map
;