rdfa-streaming-parser
Version:
A fast and lightweight streaming RDFa parser
227 lines • 8.84 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.Util = void 0;
const relative_to_absolute_iri_1 = require("relative-to-absolute-iri");
const RdfaProfile_1 = require("./RdfaProfile");
const rdf_data_factory_1 = require("rdf-data-factory");
/**
* A collection of utility functions.
*/
class Util {
constructor(dataFactory, baseIRI) {
this.dataFactory = dataFactory || new rdf_data_factory_1.DataFactory();
this.baseIRI = this.dataFactory.namedNode(baseIRI || '');
this.baseIRIDocument = this.baseIRI;
}
/**
* Retrieve the prefixes of the current tag's attributes.
* @param {{[p: string]: string}} attributes A tag's attributes.
* @param {{[p: string]: string}} parentPrefixes The prefixes from the parent tag.
* @param {boolean} xmlnsPrefixMappings If prefixes should be extracted from xmlnsPrefixMappings.
* @return {{[p: string]: string}} The new prefixes.
*/
static parsePrefixes(attributes, parentPrefixes, xmlnsPrefixMappings) {
const additionalPrefixes = {};
if (xmlnsPrefixMappings) {
for (const attribute in attributes) {
if (attribute.startsWith('xmlns')) {
additionalPrefixes[attribute.substr(6)] = attributes[attribute];
}
}
}
if (attributes.prefix || Object.keys(additionalPrefixes).length > 0) {
const prefixes = Object.assign(Object.assign({}, parentPrefixes), additionalPrefixes);
if (attributes.prefix) {
let prefixMatch;
// tslint:disable-next-line:no-conditional-assignment
while (prefixMatch = Util.PREFIX_REGEX.exec(attributes.prefix)) {
prefixes[prefixMatch[1]] = prefixMatch[2];
}
}
return prefixes;
}
else {
return parentPrefixes;
}
}
/**
* Expand the given term value based on the given prefixes.
* @param {string} term A term value.
* @param {{[p: string]: string}[]} prefixes The available prefixes.
* @return {string} An expanded URL, or the term as-is.
*/
static expandPrefixedTerm(term, activeTag) {
// Check if the term is prefixed
const colonIndex = term.indexOf(':');
let prefix;
let local;
if (colonIndex >= 0) {
prefix = term.substr(0, colonIndex);
local = term.substr(colonIndex + 1);
}
// Expand default namespace
if (prefix === '') {
return 'http://www.w3.org/1999/xhtml/vocab#' + local;
}
// Try to expand the prefix
if (prefix) {
const prefixElement = activeTag.prefixesAll[prefix];
if (prefixElement) {
return prefixElement + local;
}
}
// Try to expand the term
if (term) {
const expandedTerm = activeTag.prefixesAll[term.toLocaleLowerCase()];
if (expandedTerm) {
return expandedTerm;
}
}
return term;
}
/**
* Check if the given IRI is valid.
* @param {string} iri A potential IRI.
* @return {boolean} If the given IRI is valid.
*/
static isValidIri(iri) {
return Util.IRI_REGEX.test(iri);
}
/**
* Determine the RDFa profile from the given content type.
* Defaults to the default RDFa profile (all features enabled) for unknown content types.
* @param {string} contentType A content type.
* @returns {RdfaProfile} An RDFa profile.
*/
static contentTypeToProfile(contentType) {
return RdfaProfile_1.RDFA_CONTENTTYPES[contentType] || '';
}
/**
* Get the base IRI.
* @param {string} baseIriValue A base IRI value.
* @return A base IRI named node.
*/
getBaseIRI(baseIriValue) {
let href = baseIriValue;
const fragmentIndex = href.indexOf('#');
if (fragmentIndex >= 0) {
href = href.substr(0, fragmentIndex);
}
return this.dataFactory.namedNode((0, relative_to_absolute_iri_1.resolve)(href, this.baseIRI.value));
}
/**
* If the term is a boolean, return the baseIRI, otherwise return the term as-is.
* @param {Term | boolean} term A term or boolean, where the boolean indicates the baseIRI.
* @param {IActiveTag} activeTag An active tag.
* @returns {Term} A term.
*/
getResourceOrBaseIri(term, activeTag) {
return term === true ? this.getBaseIriTerm(activeTag) : term;
}
/**
* Get the active base IRI as an RDF term.
* @param {IActiveTag} activeTag The active tag.
* @return {NamedNode} The base IRI term.
*/
getBaseIriTerm(activeTag) {
return activeTag.localBaseIRI || this.baseIRI;
}
createVocabIris(terms, activeTag, allowTerms, allowBlankNode) {
return terms.split(/\s+/)
.filter((term) => term && (allowTerms || term.indexOf(':') >= 0))
.map((property) => this.createIri(property, activeTag, true, true, allowBlankNode))
.filter((term) => term != null);
}
/**
* Create a new literal node.
* @param {string} literal The literal value.
* @param {IActiveTag} activeTag The current active tag.
* @return {Literal} A new literal node.
*/
createLiteral(literal, activeTag) {
var _a;
if (activeTag.interpretObjectAsTime && !activeTag.datatype) {
for (const entry of Util.TIME_REGEXES) {
if (literal.match(entry.regex)) {
activeTag.datatype = this.dataFactory.namedNode(Util.XSD + entry.type);
break;
}
}
}
return this.dataFactory.literal(literal, activeTag.datatype || ((_a = activeTag.language) === null || _a === void 0 ? void 0 : _a.toLowerCase()));
}
/**
* Create a blank node.
* @returns {BlankNode} A new blank node.
*/
createBlankNode() {
if (this.blankNodeFactory) {
return this.blankNodeFactory();
}
return this.dataFactory.blankNode();
}
createIri(term, activeTag, vocab, allowSafeCurie, allowBlankNode) {
term = term || '';
if (!allowSafeCurie) {
if (!vocab) {
term = (0, relative_to_absolute_iri_1.resolve)(term, this.getBaseIriTerm(activeTag).value);
}
if (!Util.isValidIri(term)) {
return null;
}
return this.dataFactory.namedNode(term);
}
// Handle strict CURIEs
if (term.length > 0 && term[0] === '[' && term[term.length - 1] === ']') {
term = term.substr(1, term.length - 2);
// Strict CURIEs MUST have a prefix separator
if (term.indexOf(':') < 0) {
return null;
}
}
// Handle blank nodes
if (term.startsWith('_:')) {
return allowBlankNode ? this.dataFactory.blankNode(term.substr(2) || 'b_identity') : null;
}
// Handle vocab IRIs
if (vocab) {
if (activeTag.vocab && term.indexOf(':') < 0) {
return this.dataFactory.namedNode(activeTag.vocab + term);
}
}
// Handle prefixed IRIs
let iri = Util.expandPrefixedTerm(term, activeTag);
// Resolve against baseIRI if in base-mode, or if the term was a prefixed relative IRI
if (!vocab) {
iri = (0, relative_to_absolute_iri_1.resolve)(iri, this.getBaseIriTerm(activeTag).value);
}
else if (term !== iri) {
iri = (0, relative_to_absolute_iri_1.resolve)(iri, this.baseIRIDocument.value);
}
if (!Util.isValidIri(iri)) {
return null;
}
return this.dataFactory.namedNode(iri);
}
}
exports.Util = Util;
Util.RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
Util.XSD = 'http://www.w3.org/2001/XMLSchema#';
Util.RDFA = 'http://www.w3.org/ns/rdfa#';
Util.PREFIX_REGEX = /\s*([^:\s]*)*:\s*([^\s]*)*\s*/g;
Util.TIME_REGEXES = [
{
regex: /^-?P([0-9]+Y)?([0-9]+M)?([0-9]+D)?(T([0-9]+H)?([0-9]+M)?([0-9]+(\.[0-9])?S)?)?$/,
type: 'duration',
},
{
regex: /^[0-9]+-[0-9][0-9]-[0-9][0-9]T[0-9][0-9]:[0-9][0-9]:[0-9][0-9]((Z?)|([\+-][0-9][0-9]:[0-9][0-9]))$/,
type: 'dateTime',
},
{ regex: /^[0-9]+-[0-9][0-9]-[0-9][0-9]Z?$/, type: 'date' },
{ regex: /^[0-9][0-9]:[0-9][0-9]:[0-9][0-9]((Z?)|([\+-][0-9][0-9]:[0-9][0-9]))$/, type: 'time' },
{ regex: /^[0-9]+-[0-9][0-9]$/, type: 'gYearMonth' },
{ regex: /^[0-9]+$/, type: 'gYear' },
];
Util.IRI_REGEX = /^([A-Za-z][A-Za-z0-9+-.]*|_):[^ "<>{}|\\\[\]`]*$/;
//# sourceMappingURL=Util.js.map