UNPKG

rdf-string-ttl

Version:

Convenience functions for creating and serializing RDF terms and quads following Turtle/SPARQL syntax

246 lines 10.5 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.termToString = termToString; exports.getLiteralValue = getLiteralValue; exports.getLiteralType = getLiteralType; exports.getLiteralLanguage = getLiteralLanguage; exports.getLiteralDirection = getLiteralDirection; exports.stringToTerm = stringToTerm; exports.quadToStringQuad = quadToStringQuad; exports.stringQuadToQuad = stringQuadToQuad; const rdf_data_factory_1 = require("rdf-data-factory"); const FACTORY = new rdf_data_factory_1.DataFactory(); /** * Utility methods for converting between string-based RDF representations and RDFJS objects. * * RDF Terms are represented as follows: * * Blank nodes: '_:myBlankNode' * * Variables: '_myVariable' * * Literals: '"myString"', '"myLanguageString"@en-us', '"myLanguageString"@en-us--ltr', * '"<p>e</p>"^^<http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML>' * * URIs: '<http://example.org>' * * Quads/triples are represented as hashes with 'subject', 'predicate', 'object' and 'graph' (optional) * as keys, and string-based RDF terms as values. */ /** * Convert an RDFJS term to a string-based representation. * @param {RDF.Term} term An RDFJS term. * @return {string} A string-based term representation. */ function termToString(term) { // TODO: remove nasty any casts when this TS bug has been fixed: https://github.com/microsoft/TypeScript/issues/26933 if (!term) { return undefined; } switch (term.termType) { case 'NamedNode': return (`<${escapeIRI(term.value)}>`); case 'BlankNode': return (`_:${term.value}`); case 'Literal': { const literalValue = term; return (`"${escapeStringRDF(literalValue.value)}"${literalValue.datatype && literalValue.datatype.value !== 'http://www.w3.org/2001/XMLSchema#string' && literalValue.datatype.value !== 'http://www.w3.org/1999/02/22-rdf-syntax-ns#langString' && literalValue.datatype.value !== 'http://www.w3.org/1999/02/22-rdf-syntax-ns#dirLangString' ? `^^<${literalValue.datatype.value}>` : ''}${literalValue.language ? `@${literalValue.language}` : ''}${literalValue.direction ? `--${literalValue.direction}` : ''}`); } case 'Quad': return (`<<${termToString(term.subject)} ${termToString(term.predicate)} ${termToString(term.object)}${term.graph.termType === 'DefaultGraph' ? '' : ` ${termToString(term.graph)}`}>>`); case 'Variable': return (`?${term.value}`); case 'DefaultGraph': return term.value; } } /** * Get the string value of a literal. * @param {string} literalValue An RDF literal enclosed by '"'. * @return {string} The literal value inside the '"'. */ function getLiteralValue(literalValue) { const match = /^"([^]*)"((\^\^.*)|(@.*))?$/u.exec(literalValue); if (!match) { throw new Error(`${literalValue} is not a literal`); } // eslint-disable-next-line unicorn/prefer-string-replace-all return match[1].replace(/\\"/ug, '"'); } /** * Get the datatype of the given literal. * @param {string} literalValue An RDF literal. * @return {string} The datatype of the literal. */ function getLiteralType(literalValue) { const match = /^"[^]*"(?:\^\^<([^"]+)>|(@)[^@"]+)?$/u.exec(literalValue); if (!match) { throw new Error(`${literalValue} is not a literal`); } return match[1] || (match[2] ? 'http://www.w3.org/1999/02/22-rdf-syntax-ns#langString' : 'http://www.w3.org/2001/XMLSchema#string'); } /** * Get the language of the given literal. * @param {string} literalValue An RDF literal. * @return {string} The language of the literal. */ function getLiteralLanguage(literalValue) { const match = /^"[^]*"(?:@([^@"]+)|\^\^[^"]+)?$/u.exec(literalValue); if (!match) { throw new Error(`${literalValue} is not a literal`); } if (match[1]) { let ret = match[1].toLowerCase(); // Remove everything after --, since this indicates the base direction, which will be parsed later. const doubleDashPos = ret.indexOf('--'); if (doubleDashPos >= 0) { ret = ret.slice(0, doubleDashPos); } return ret; } return ''; } /** * Get the direction of the given literal. * @param {string} literalValue An RDF literal. * @return {string} The direction of the literal. */ function getLiteralDirection(literalValue) { const doubleDashPos = literalValue.indexOf('--', literalValue.lastIndexOf('"')); if (doubleDashPos >= 0) { const direction = literalValue.slice(doubleDashPos + 2, literalValue.length); if (direction === 'ltr' || direction === 'rtl') { return direction; } throw new Error(`${literalValue} is not a literal with a valid direction`); } return ''; } /** * Transform a string-based RDF term to an RDFJS term. * @param {string} value A string-based RDF-term. * @param {RDF.DataFactory} dataFactory An optional datafactory to create terms with. * @return {RDF.Term} An RDF-JS term. */ function stringToTerm(value, dataFactory) { dataFactory = dataFactory || FACTORY; if (!value || (value.length === 0)) { return dataFactory.defaultGraph(); } switch (value[0]) { case '_': return dataFactory.blankNode(value.slice(2)); case '?': if (!dataFactory.variable) { throw new Error(`Missing 'variable()' method on the given DataFactory`); } return dataFactory.variable(value.slice(1)); case '"': { const language = getLiteralLanguage(value); const direction = getLiteralDirection(value); const type = dataFactory.namedNode(getLiteralType(value)); return dataFactory.literal(getLiteralValue(value), language ? { language, direction } : type); } case '<': default: if (value.startsWith('<<') && value.endsWith('>>')) { // Iterate character-by-character to detect spaces that are *not* wrapped in <<>> const terms = value.slice(2, -2); const stringTerms = []; let ignoreTags = 0; let lastIndex = 0; for (let i = 0; i < terms.length; i++) { const char = terms[i]; if (char === '<') { ignoreTags++; } if (char === '>') { if (ignoreTags === 0) { throw new Error(`Found closing tag without opening tag in ${value}`); } else { ignoreTags--; } } if (char === ' ' && ignoreTags === 0) { stringTerms.push(terms.slice(lastIndex, i)); lastIndex = i + 1; } } if (ignoreTags !== 0) { throw new Error(`Found opening tag without closing tag in ${value}`); } stringTerms.push(terms.slice(lastIndex, terms.length)); // We require 3 or 4 components if (stringTerms.length !== 3 && stringTerms.length !== 4) { throw new Error(`Nested quad syntax error ${value}`); } return dataFactory.quad(stringToTerm(stringTerms[0]), stringToTerm(stringTerms[1]), stringToTerm(stringTerms[2]), stringTerms[3] ? stringToTerm(stringTerms[3]) : undefined); } if (!value.startsWith('<') || !value.endsWith('>')) { throw new Error(`Detected invalid iri for named node (must be wrapped in <>): ${value}`); } return dataFactory.namedNode(value.slice(1, -1)); } } /** * Convert an RDFJS quad to a string-based quad representation. * @param {Quad} q An RDFJS quad. * @return {IStringQuad} A hash with string-based quad terms. * @template Q The type of quad, defaults to RDF.Quad. */ function quadToStringQuad(quad) { return { subject: termToString(quad.subject), predicate: termToString(quad.predicate), object: termToString(quad.object), graph: termToString(quad.graph), }; } /** * Convert a string-based quad representation to an RDFJS quad. * @param {IStringQuad} stringQuad A hash with string-based quad terms. * @param {RDF.DataFactory} dataFactory An optional datafactory to create terms with. * @return {Q} An RDFJS quad. * @template Q The type of quad, defaults to RDF.Quad. */ function stringQuadToQuad(stringQuad, dataFactory) { dataFactory = dataFactory || FACTORY; return dataFactory.quad(stringToTerm(stringQuad.subject, dataFactory), stringToTerm(stringQuad.predicate, dataFactory), stringToTerm(stringQuad.object, dataFactory), stringToTerm(stringQuad.graph, dataFactory)); } function escapeIRI(iriValue) { return iriValue.replace(escapePattern, replaceEscapedCharacter); } function escapeStringRDF(stringValue) { if (escapePattern.test(stringValue)) { stringValue = stringValue.replace(escapePattern, replaceEscapedCharacter); } return stringValue; } // Characters in literals and IRIs that require escaping // Also containing potential surrogate pairs /* eslint-disable require-unicode-regexp */ /* eslint-disable unicorn/better-regex */ const escapePattern = /["\\\t\n\r\b\f\u0000-\u0019]|[\uD800-\uDBFF][\uDC00-\uDFFF]/g; /* eslint-enable require-unicode-regexp */ /* eslint-enable unicorn/better-regex */ const escapes = new Map([ ['\\', '\\\\'], ['"', '\\"'], ['\t', '\\t'], ['\n', '\\n'], ['\r', '\\r'], ['\b', '\\b'], ['\f', '\\f'], ]); function replaceEscapedCharacter(character) { // Try simplest case first, get replacement for character const result = escapes.get(character); if (!result) { if (character.length === 1) { // Single unicode charachters, i.e. not a surrogate pair const code = character.charCodeAt(0).toString(16); return `${'\\u0000'.slice(0, -code.length)}${code}`; } // Surrogate pairs const code = ((character.charCodeAt(0) - 55296) * 1024 + character.charCodeAt(1) + 9216).toString(16); return `${'\\U00000000'.slice(0, -code.length)}${code}`; } return result; } //# sourceMappingURL=TermUtil.js.map