rdflib
Version:
an RDF library for node.js. Suitable for client and server side.
458 lines (440 loc) • 18 kB
JavaScript
/**
* RDF/XML PARSER
*
* Parser believed to be in full positive RDF/XML parsing compliance
* with the possible exception of handling deprecated RDF attributes
* appropriately. Parser is believed to comply fully with other W3C
* and industry standards where appropriate (DOM, ECMAScript, &c.)
*
* Author: David Sheets <dsheets@mit.edu>
*
* W3C® SOFTWARE NOTICE AND LICENSE
* http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231
* This work (and included software, documentation such as READMEs, or
* other related items) is being provided by the copyright holders under
* the following license. By obtaining, using and/or copying this work,
* you (the licensee) agree that you have read, understood, and will
* comply with the following terms and conditions.
*
* Permission to copy, modify, and distribute this software and its
* documentation, with or without modification, for any purpose and
* without fee or royalty is hereby granted, provided that you include
* the following on ALL copies of the software and documentation or
* portions thereof, including modifications:
*
* 1. The full text of this NOTICE in a location viewable to users of
* the redistributed or derivative work.
* 2. Any pre-existing intellectual property disclaimers, notices, or terms and
* conditions. If none exist, the W3C Software Short Notice should be
* included (hypertext is preferred, text is permitted) within the body
* of any redistributed or derivative code.
* 3. Notice of any changes or modifications to the files, including the
* date changes were made. (We recommend you provide URIs to the location
* from which the code is derived.)
*
* THIS SOFTWARE AND DOCUMENTATION IS PROVIDED "AS IS," AND COPYRIGHT
* HOLDERS MAKE NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY OR FITNESS
* FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE OR
* DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS,
* TRADEMARKS OR OTHER RIGHTS.
*
* COPYRIGHT HOLDERS WILL NOT BE LIABLE FOR ANY DIRECT, INDIRECT, SPECIAL
* OR CONSEQUENTIAL DAMAGES ARISING OUT OF ANY USE OF THE SOFTWARE OR
* DOCUMENTATION.
*
* The name and trademarks of copyright holders may NOT be used in
* advertising or publicity pertaining to the software without specific,
* written prior permission. Title to copyright in this software and any
* associated documentation will at all times remain with copyright
* holders.
*/
/**
* @class RDFParser resource object tied to an RDFStore
*
* @author David Sheets <dsheets@mit.edu>
*
*/
import * as uriUtil from './uri'
export default class RDFParser {
/*
* @constructor
* @param {RDFStore} store An RDFStore object
*/
constructor(store) {
/** Our triple store reference @private */
this.store = store /** Our identified blank nodes @private */
this.bnodes = {} /** A context for context-aware stores @private */
this.why = null /** Reification flag */
this.reify = false
}
/** Standard namespaces that we know how to handle @final
* @member RDFParser
*/
static ns = {'RDF': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'RDFS': 'http://www.w3.org/2000/01/rdf-schema#'}
/** DOM Level 2 node type magic numbers @final
* @member RDFParser
*/
static nodeType = {'ELEMENT': 1, 'ATTRIBUTE': 2, 'TEXT': 3,
'CDATA_SECTION': 4, 'ENTITY_REFERENCE': 5,
'ENTITY': 6, 'PROCESSING_INSTRUCTION': 7,
'COMMENT': 8, 'DOCUMENT': 9, 'DOCUMENT_TYPE': 10,
'DOCUMENT_FRAGMENT': 11, 'NOTATION': 12}
/**
* Frame class for namespace and base URI lookups
* Base lookups will always resolve because the parser knows
* the default base.
*
* @private
*/
frameFactory(parser, parent, element) {
return {'NODE': 1, 'ARC': 2, 'parent': parent, 'parser': parser, 'store': parser.store, 'element': element,
'lastChild': 0, 'base': null, 'lang': null, 'node': null, 'nodeType': null, 'listIndex': 1, 'rdfid': null, 'datatype': null, 'collection': false, /** Terminate the frame and notify the store that we're done */
'terminateFrame': function () {
if (this.collection) {
this.node.close()
}
}, /** Add a symbol of a certain type to the this frame */'addSymbol': function (type, uri) {
uri = uriUtil.join(uri, this.base)
this.node = this.store.sym(uri)
this.nodeType = type
}, /** Load any constructed triples into the store */'loadTriple': function () {
if (this.parent.parent.collection) {
this.parent.parent.node.append(this.node)
} else {
this.store.add(this.parent.parent.node, this.parent.node, this.node, this.parser.why)
}
if (this.parent.rdfid != null) {
// reify
var triple = this.store.sym(uriUtil.join('#' + this.parent.rdfid, this.base))
this.store.add(triple, this.store.sym(RDFParser.ns.RDF + 'type'), this.store.sym(RDFParser.ns.RDF + 'Statement'), this.parser.why)
this.store.add(triple, this.store.sym(RDFParser.ns.RDF + 'subject'), this.parent.parent.node, this.parser.why)
this.store.add(triple, this.store.sym(RDFParser.ns.RDF + 'predicate'), this.parent.node, this.parser.why)
this.store.add(triple, this.store.sym(RDFParser.ns.RDF + 'object'), this.node, this.parser.why)
}
}, /** Check if it's OK to load a triple */'isTripleToLoad': function () {
return (this.parent != null && this.parent.parent != null && this.nodeType === this.NODE && this.parent.nodeType ===
this.ARC && this.parent.parent.nodeType === this.NODE)
}, /** Add a symbolic node to this frame */'addNode': function (uri) {
this.addSymbol(this.NODE, uri)
if (this.isTripleToLoad()) {
this.loadTriple()
}
}, /** Add a collection node to this frame */'addCollection': function () {
this.nodeType = this.NODE
this.node = this.store.collection()
this.collection = true
if (this.isTripleToLoad()) {
this.loadTriple()
}
}, /** Add a collection arc to this frame */'addCollectionArc': function () {
this.nodeType = this.ARC
}, /** Add a bnode to this frame */'addBNode': function (id) {
if (id != null) {
if (this.parser.bnodes[id] != null) {
this.node = this.parser.bnodes[id]
} else {
this.node = this.parser.bnodes[id] = this.store.bnode()
}
} else {
this.node = this.store.bnode()
}
this.nodeType = this.NODE
if (this.isTripleToLoad()) {
this.loadTriple()
}
}, /** Add an arc or property to this frame */'addArc': function (uri) {
if (uri === RDFParser.ns.RDF + 'li') {
uri = RDFParser.ns.RDF + '_' + this.parent.listIndex
this.parent.listIndex++
}
this.addSymbol(this.ARC, uri)
}, /** Add a literal to this frame */'addLiteral': function (value) {
if (this.parent.datatype && this.parent.datatype !== RDFParser.ns.RDF +'langString') {
this.node = this.store.literal(value, this.store.sym(this.parent.datatype))
} else {
this.node = this.store.literal(value, this.lang)
}
this.nodeType = this.NODE
if (this.isTripleToLoad()) {
this.loadTriple()
}
}
}
}
// from the OpenLayers source .. needed to get around IE problems.
getAttributeNodeNS(node, uri, name) {
var attributeNode = null
if (node.getAttributeNodeNS) {
attributeNode = node.getAttributeNodeNS(uri, name)
} else {
var attributes = node.attributes
var potentialNode, fullName
for (var i = 0;i < attributes.length; ++i) {
potentialNode = attributes[i]
if (potentialNode.namespaceURI === uri) {
fullName = (potentialNode.prefix) ? (potentialNode.prefix + ':' + name) : name
if (fullName === potentialNode.nodeName) {
attributeNode = potentialNode
break
}
}
}
}
return attributeNode
}
/**
* Build our initial scope frame and parse the DOM into triples
* @param {HTMLDocument} document The DOM to parse
* @param {String} base The base URL to use
* @param {Object} why The context to which this resource belongs
*/
parse(document, base, why) {
var children = document.childNodes // clean up for the next run
this.cleanParser() // figure out the root element
var root
if (document.nodeType === RDFParser.nodeType.DOCUMENT) {
for (var c = 0;c < children.length;c++) {
if (children[c].nodeType === RDFParser.nodeType.ELEMENT) {
root = children[c]
break
}
}
} else if (document.nodeType === RDFParser.nodeType.ELEMENT) {
root = document
} else {
throw new Error("RDFParser: can't find root in " + base + '. Halting. ')
// return false
}
this.why = why // our topmost frame
var f = this.frameFactory(this)
this.base = base
f.base = base
f.lang = null // was '' but can't have langs like that 2015 (!)
this.parseDOM(this.buildFrame(f, root))
return true
}
parseDOM(frame) {
// a DOM utility function used in parsing
var rdfid
var elementURI = function (el) {
var result = ''
if (el.namespaceURI == null) {
throw new Error('RDF/XML syntax error: No namespace for ' + el.localName + ' in ' + this.base)
}
if (el.namespaceURI) {
result = result + el.namespaceURI
}
if (el.localName) {
result = result + el.localName
} else if (el.nodeName) {
if (el.nodeName.indexOf(':') >= 0)result = result + el.nodeName.split(':')[1]
else result = result + el.nodeName
}
return result
}.bind(this)
var dig = true // if we'll dig down in the tree on the next iter
while (frame.parent) {
var dom = frame.element
var attrs = dom.attributes
if (dom.nodeType === RDFParser.nodeType.TEXT || dom.nodeType === RDFParser.nodeType.CDATA_SECTION) {
// we have a literal
if (frame.parent.nodeType === frame.NODE) {
// must have had attributes, store as rdf:value
frame.addArc(RDFParser.ns.RDF + 'value')
frame = this.buildFrame(frame)
}
frame.addLiteral(dom.nodeValue)
} else if (elementURI(dom) !== RDFParser.ns.RDF + 'RDF') {
// not root
if (frame.parent && frame.parent.collection) {
// we're a collection element
frame.addCollectionArc()
frame = this.buildFrame(frame, frame.element)
frame.parent.element = null
}
if (!frame.parent || !frame.parent.nodeType || frame.parent.nodeType === frame.ARC) {
// we need a node
var about = this.getAttributeNodeNS(dom, RDFParser.ns.RDF, 'about')
rdfid = this.getAttributeNodeNS(dom, RDFParser.ns.RDF, 'ID')
if (about && rdfid) {
throw new Error('RDFParser: ' + dom.nodeName + ' has both rdf:id and rdf:about.' +
' Halting. Only one of these' + ' properties may be specified on a' + ' node.')
}
if (!about && rdfid) {
frame.addNode('#' + rdfid.nodeValue)
dom.removeAttributeNode(rdfid)
} else if (about == null && rdfid == null) {
var bnid = this.getAttributeNodeNS(dom, RDFParser.ns.RDF, 'nodeID')
if (bnid) {
frame.addBNode(bnid.nodeValue)
dom.removeAttributeNode(bnid)
} else {
frame.addBNode()
}
} else {
frame.addNode(about.nodeValue)
dom.removeAttributeNode(about)
}
// Typed nodes
var rdftype = this.getAttributeNodeNS(dom, RDFParser.ns.RDF, 'type')
if (RDFParser.ns.RDF + 'Description' !== elementURI(dom)) {
rdftype = {'nodeValue': elementURI(dom)}
}
if (rdftype != null) {
this.store.add(frame.node, this.store.sym(RDFParser.ns.RDF + 'type'), this.store.sym(uriUtil.join(rdftype.nodeValue,
frame.base)), this.why)
if (rdftype.nodeName) {
dom.removeAttributeNode(rdftype)
}
}
// Property Attributes
for (var x = attrs.length - 1; x >= 0; x--) {
this.store.add(frame.node, this.store.sym(elementURI(attrs[x])), this.store.literal(attrs[x].nodeValue,
frame.lang), this.why)
}
} else {
// we should add an arc (or implicit bnode+arc)
frame.addArc(elementURI(dom)) // save the arc's rdf:ID if it has one
if (this.reify) {
rdfid = this.getAttributeNodeNS(dom, RDFParser.ns.RDF, 'ID')
if (rdfid) {
frame.rdfid = rdfid.nodeValue
dom.removeAttributeNode(rdfid)
}
}
var parsetype = this.getAttributeNodeNS(dom, RDFParser.ns.RDF, 'parseType')
var datatype = this.getAttributeNodeNS(dom, RDFParser.ns.RDF, 'datatype')
if (datatype) {
frame.datatype = datatype.nodeValue
dom.removeAttributeNode(datatype)
}
if (parsetype) {
var nv = parsetype.nodeValue
if (nv === 'Literal') {
frame.datatype = RDFParser.ns.RDF + 'XMLLiteral'
frame = this.buildFrame(frame)
// Don't include the literal node, only its children
// see https://github.com/linkeddata/rdflib.js/issues/75
frame.addLiteral(dom.innerHTML || dom.childNodes)
dig = false
} else if (nv === 'Resource') {
frame = this.buildFrame(frame, frame.element)
frame.parent.element = null
frame.addBNode()
} else if (nv === 'Collection') {
frame = this.buildFrame(frame, frame.element)
frame.parent.element = null
frame.addCollection()
}
dom.removeAttributeNode(parsetype)
}
if (attrs.length !== 0) {
var resource = this.getAttributeNodeNS(dom, RDFParser.ns.RDF, 'resource')
var bnid2 = this.getAttributeNodeNS(dom, RDFParser.ns.RDF, 'nodeID')
frame = this.buildFrame(frame)
if (resource) {
frame.addNode(resource.nodeValue)
dom.removeAttributeNode(resource)
} else {
if (bnid2) {
frame.addBNode(bnid2.nodeValue)
dom.removeAttributeNode(bnid2)
} else {
frame.addBNode()
}
}
for (var x1 = attrs.length - 1; x1 >= 0; x1--) {
var f = this.buildFrame(frame)
f.addArc(elementURI(attrs[x1]))
if (elementURI(attrs[x1]) === RDFParser.ns.RDF + 'type') {
(this.buildFrame(f)).addNode(attrs[x1].nodeValue)
} else {
(this.buildFrame(f)).addLiteral(attrs[x1].nodeValue)
}
}
} else if (dom.childNodes.length === 0) {
(this.buildFrame(frame)).addLiteral('')
}
}
} // rdf:RDF
// dig dug
dom = frame.element
while (frame.parent) {
var pframe = frame
while (dom == null) {
frame = frame.parent
dom = frame.element
}
var candidate = dom.childNodes && dom.childNodes[frame.lastChild]
if (!candidate || !dig) {
frame.terminateFrame()
if (!(frame = frame.parent)) {
break
} // done
dom = frame.element
dig = true
} else if ((candidate.nodeType !== RDFParser.nodeType.ELEMENT &&
candidate.nodeType !== RDFParser.nodeType.TEXT &&
candidate.nodeType !== RDFParser.nodeType.CDATA_SECTION) ||
((candidate.nodeType === RDFParser.nodeType.TEXT ||
candidate.nodeType === RDFParser.nodeType.CDATA_SECTION) &&
dom.childNodes.length !== 1)) {
frame.lastChild++
} else {
// not a leaf
frame.lastChild++
frame = this.buildFrame(pframe, dom.childNodes[frame.lastChild - 1])
break
}
}
} // while
}
/**
* Cleans out state from a previous parse run
* @private
*/
cleanParser() {
this.bnodes = {}
this.why = null
}
/**
* Builds scope frame
* @private
*/
buildFrame(parent, element) {
var frame = this.frameFactory(this, parent, element)
if (parent) {
frame.base = parent.base
frame.lang = parent.lang
}
if (!element || element.nodeType === RDFParser.nodeType.TEXT ||
element.nodeType === RDFParser.nodeType.CDATA_SECTION) {
return frame
}
var attrs = element.attributes
var base = element.getAttributeNode('xml:base')
if (base != null) {
frame.base = base.nodeValue
element.removeAttribute('xml:base')
}
var lang = element.getAttributeNode('xml:lang')
if (lang != null) {
frame.lang = lang.nodeValue
element.removeAttribute('xml:lang')
}
// remove all extraneous xml and xmlns attributes
for (var x = attrs.length - 1;x >= 0;x--) {
if (attrs[x].nodeName.substr(0, 3) === 'xml') {
if (attrs[x].name.slice(0, 6) === 'xmlns:') {
var uri = attrs[x].nodeValue // alert('base for namespac attr:'+this.base)
if (this.base) uri = uriUtil.join(uri, this.base)
this.store.setPrefixForURI(attrs[x].name.slice(6), uri)
}
// alert('rdfparser: xml atribute: '+attrs[x].name) //@@
element.removeAttributeNode(attrs[x])
}
}
return frame
}
}