@shexjs/term
Version:
Shape Expressions triple expression evaluator - simple regexp returning 1 error.
238 lines (216 loc) • 8.27 kB
text/typescript
/**
* Terms used in ShEx.
*
* There are three representations of RDF terms used in ShEx NamedNode validation and applications:
* 1. LD (short for JSON-LD) @ids used in ShExJ.
* "http://a.example/some/Iri
* "_:someBlankNode
* { "value": "1.0", "datatype": "http://www.w3.org/2001/XMLSchema#float" }
* { "value": "chat", "language": "fr" }
* 2. RdfJs Terms [RdfJsTerm] specification used in validation
* { "termType": "NamedNode": "value": "http://a.example/some/Iri" }
* { "termType": "BlankNode": "value": "someBlankNode" }
* { "termType": "Literal": "value": "1.0", "datatype": "http://www.w3.org/2001/XMLSchema#float" }
* { "termType": "Literal": "value": "chat", "language": "fr" }
* 3. Turtle representation is used for human interfaces
* <http://a.example/some/Iri>, p:IRI, p:, :
* _:someBlankNode, []
* "1.0"^^<http://www.w3.org/2001/XMLSchema#float>, "1.0"^^xsd:float, 1.0
* "chat"@fr
* "1.0"^^http://www.w3.org/2001/XMLSchema#float
*
* [RdfJsTerm](https://rdf.js.org/data-model-spec/#term-interface)
*/
/*
RdfLangString,
XsdString,
Terminals,
rdfJsTerm2Turtle,
shExJsTerm2Turtle,
shExJsTerm2Ld,
ld2RdfJsTerm,
rdfJsTerm2Ld,
iri2Turtle,
*/
import * as ShExJ from 'shexj';
import {Term as RdfJsTerm} from 'rdf-js';
const RelativizeIri = require("relativize-url").relativize;
// import {relativize as RelativizeIri} from "relativize-url"; // someone should lecture the maintainer
import {DataFactory} from 'rdf-data-factory';
const RdfJsFactory = new DataFactory();
import {ObjectLiteral, objectValue} from "shexj";
export {};
export interface SchemaIndex {
shapeExprs: { [id: string]: ShExJ.ShapeDecl };
tripleExprs: { [id: string]: ShExJ.tripleExpr };
labelToTcs: { [id: string]: ShExJ.TripleConstraint[] }
}
export interface InternalSchema extends ShExJ.Schema {
_index?: SchemaIndex
}
export interface ShapeMapEntry {
node: string;
shape: string;
}
export type ShapeMap = ShapeMapEntry[];
interface PrefixMap {
[id: string]: string;
}
export interface Meta {
base: string;
prefixes: PrefixMap;
}
export const RdfLangString = "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString";
export const XsdString = "http://www.w3.org/2001/XMLSchema#string";
const PN_CHARS_BASE = "A-Za-z\u{C0}-\u{D6}\u{D8}-\u{F6}\u{F8}-\u{2FF}\u{370}-\u{37D}\u{37F}-\u{1FFF}\u{200C}-\u{200D}\u{2070}-\u{218F}\u{2C00}-\u{2FEF}\u{3001}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFFD}"; // escape anything outside BMP: \u{10000}-\u{EFFFF}
const PN_CHARS_U = PN_CHARS_BASE + "_";
const PN_CHARS_WO_HYPHEN = PN_CHARS_U + "0-9\u{B7}\u{300}-\u{36F}\u{203F}-\u{2040}";
const PN_PREFIX = [PN_CHARS_BASE, PN_CHARS_WO_HYPHEN + '.-', PN_CHARS_WO_HYPHEN + '-'];
const PN_LOCAL = [
PN_CHARS_U + ":0-9",
PN_CHARS_WO_HYPHEN + ".:-",
PN_CHARS_WO_HYPHEN + ":-"
];
export const Terminals: {
[key: string]: {
[key: string]: string | string[]
}
} = {
Turtle: {
PN_CHARS_BASE,
PN_CHARS_U,
PN_CHARS_WO_HYPHEN,
PN_PREFIX,
PN_LOCAL,
}
};
export function rdfJsTerm2Turtle (node: RdfJsTerm, meta?: Meta): string {
switch (node.termType) {
case ("NamedNode"):
return iri2Turtle(node.value, meta);
case ("BlankNode"):
return "_:" + node.value;
case ("Literal"):
return "\"" + node.value.replace(/"/g, '\\"') + "\"" + (
node.datatype.value === RdfLangString
? "@" + node.language
: node.datatype.value === XsdString
? ""
: "^^" + node.datatype.value
);
default: throw Error(`rdfJsTerm2Turtle: unknown RDFJS node type: ${JSON.stringify(node)}`)
}
}
export function shExJsTerm2Turtle (node: any, meta: Meta = {base: "", prefixes: {}}, aForType?: boolean): any {
if (typeof node === "string") {
if (node.startsWith("_:")) {
return node;
} else {
return iri2Turtle(node, meta, aForType);
}
} else if (typeof node === "object" && "value" in node) {
let value = node.value;
const type = node.type;
const language = node.language;
// Escape special characters
if (escape.test(value))
value = value.replace(escapeAll, characterReplacer);
// Write the literal, possibly with type or language
if (language)
return '"' + value + '"@' + language;
else if (type && type !== "http://www.w3.org/2001/XMLSchema#string")
return '"' + value + '"^^' + iri2Turtle(type, meta, false);
else
return '"' + value + '"';
} else {
throw Error("Unknown internal term type: " + JSON.stringify(node));
}
}
// Characters in literals that require escaping
const escape = /["\\\t\n\r\b\f\u0000-\u0019\ud800-\udbff]/;
const escapeAll = /["\\\t\n\r\b\f\u0000-\u0019]|[\ud800-\udbff][\udc00-\udfff]/g;
const escapeReplacements: {
[key: string]: string
} = {
'\\': '\\\\', '"': '\\"', '\t': '\\t',
'\n': '\\n', '\r': '\\r', '\b': '\\b', '\f': '\\f',
};
// Replaces a character by its escaped version
function characterReplacer (character: string): string {
// Replace a single character by its escaped version
let result = escapeReplacements[character]; // @@ const should be let
if (result === undefined) {
// Replace a single character with its 4-bit unicode escape sequence
if (character.length === 1) {
result = character.charCodeAt(0).toString(16);
result = '\\u0000'.substr(0, 6 - result.length) + result;
}
// Replace a surrogate pair with its 8-bit unicode escape sequence
else {
result = ((character.charCodeAt(0) - 0xD800) * 0x400 +
character.charCodeAt(1) + 0x2400).toString(16);
result = '\\U00000000'.substr(0, 10 - result.length) + result;
}
}
return result;
}
export function ld2RdfJsTerm (ld: objectValue): RdfJsTerm {
switch (typeof ld) {
case 'object':
const copy = JSON.parse(JSON.stringify(ld));
if (!copy.value)
throw Error(`JSON-LD-style object literal has no value: ${JSON.stringify(ld)}`)
const value = copy.value;
delete copy.value;
if (copy.language)
return RdfJsFactory.literal(value, copy.language);
if (copy.type)
return RdfJsFactory.literal(value, RdfJsFactory.namedNode(copy.type));
if (Object.keys(copy).length > 0)
throw Error(`Unrecognized attributes in JSON-LD-style object literal: ${JSON.stringify(Object.keys(copy))}`)
return RdfJsFactory.literal(value);
case 'string':
return ld.startsWith('_:')
? RdfJsFactory.blankNode(ld.substr(2))
: RdfJsFactory.namedNode(ld);
default: throw Error(`Unrecognized JSON-LD-style term: ${JSON.stringify(ld)}`)
}
}
export function rdfJsTerm2Ld (term: RdfJsTerm): objectValue {
switch (term.termType) {
case "NamedNode": return term.value;
case "BlankNode": return "_:" + term.value;
case "Literal":
const ret: ObjectLiteral = { value: term.value };
const dt = term.datatype.value;
const lang = term.language;
if (dt &&
dt !== "http://www.w3.org/2001/XMLSchema#string" &&
dt !== "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString")
ret.type = dt;
if (lang)
ret.language = lang;
return ret;
default:
throw Error(`Unrecognized termType ${term.termType} ${term.value}`);
}
}
function iri2Turtle (iri: string, meta: Meta = { base: "", prefixes: {}}, aForType: boolean = true) {
const {base, prefixes = {}} = meta;
if (aForType && iri === "http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
return "a";
const rel = "<" + (base.length > 0 ? RelativizeIri(iri, base) : iri) + ">";
for (const prefix in prefixes) {
const ns = prefixes[prefix];
if (iri.startsWith(ns)) {
const localName = iri.substr(ns.length);
const first = localName.slice(0, 1).replaceAll(new RegExp("[^" + Terminals.Turtle.PN_LOCAL[0] + "]", "g"), s => '\\' + s);
const middle = localName.slice(1, localName.length - 1).replaceAll(new RegExp("[^" + Terminals.Turtle.PN_LOCAL[1] + "]", "g"), s => '\\' + s);
const last = localName.length > 1 ? localName.slice(localName.length - 1).replaceAll(new RegExp("[^" + Terminals.Turtle.PN_LOCAL[2] + "]", "g"), s => '\\' + s) : '';
const pName = prefix + ':' + first + middle + last;
if (pName.length < rel.length)
return pName;
}
}
return rel;
}