rdfxml-streaming-parser
Version:
Streaming RDF/XML parser
194 lines (193 loc) • 7.43 kB
TypeScript
import * as RDF from "@rdfjs/types";
import { SaxesTagNS } from "@rubensworks/saxes";
import { Transform } from "readable-stream";
import { IriValidationStrategy } from "validate-iri";
import EventEmitter = NodeJS.EventEmitter;
export declare class RdfXmlParser extends Transform implements RDF.Sink<EventEmitter, RDF.Stream> {
static readonly MIME_TYPE = "application/rdf+xml";
static readonly RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
static readonly XML = "http://www.w3.org/XML/1998/namespace";
static readonly ITS = "http://www.w3.org/2005/11/its";
static readonly FORBIDDEN_NODE_ELEMENTS: string[];
static readonly FORBIDDEN_PROPERTY_ELEMENTS: string[];
static readonly NCNAME_MATCHER: RegExp;
readonly trackPosition?: boolean;
private readonly options;
private readonly dataFactory;
private readonly baseIRI;
private readonly defaultGraph?;
private readonly allowDuplicateRdfIds?;
private readonly saxParser;
private readonly validateUri;
private readonly iriValidationStrategy;
private readonly activeTagStack;
private readonly nodeIds;
constructor(args?: IRdfXmlParserArgs);
/**
* Parses the given text stream into a quad stream.
* @param {NodeJS.EventEmitter} stream A text stream.
* @return {RDF.Stream} A quad stream.
*/
import(stream: EventEmitter): RDF.Stream;
_transform(chunk: any, encoding: BufferEncoding, callback: (error?: Error | null, data?: any) => void): void;
/**
* Create a new parse error instance.
* @param {string} message An error message.
* @return {Error} An error instance.
*/
newParseError(message: string): Error;
/**
* Convert the given value to a IRI by taking into account the baseIRI.
*
* This will follow the RDF/XML spec for converting values with baseIRIs to a IRI.
*
* @param {string} value The value to convert to an IRI.
* @param {IActiveTag} activeTag The active tag.
* @return {NamedNode} an IRI.
*/
valueToUri(value: string, activeTag: IActiveTag): RDF.NamedNode;
/**
* Convert the given value URI string to a named node.
*
* This throw an error if the URI is invalid.
*
* @param {string} uri A URI string.
* @return {NamedNode} a named node.
*/
uriToNamedNode(uri: string): RDF.NamedNode;
/**
* Validate the given value as an NCName: https://www.w3.org/TR/xml-names/#NT-NCName
* If it is invalid, an error will thrown emitted.
* @param {string} value A value.
*/
validateNcname(value: string): void;
/**
* Create a new literal term.
* @param value The literal value.
* @param activeTag The active tag.
*/
createLiteral(value: string, activeTag: IActiveTag): RDF.Literal;
protected attachSaxListeners(): void;
/**
* Handle the given tag.
* @param {SaxesTagNS} tag A SAX tag.
*/
protected onTag(tag: SaxesTagNS): void;
/**
* Handle the given node element in resource-mode.
* @param {SaxesTagNS} tag A SAX tag.
* @param {IActiveTag} activeTag The currently active tag.
* @param {IActiveTag} parentTag The parent tag or null.
* @param {boolean} rootTag If we are currently processing the root tag.
*/
protected onTagResource(tag: SaxesTagNS, activeTag: IActiveTag, parentTag: IActiveTag, rootTag: boolean): void;
/**
* Handle the given property element in property-mode.
* @param {SaxesTagNS} tag A SAX tag.
* @param {IActiveTag} activeTag The currently active tag.
* @param {IActiveTag} parentTag The parent tag or null.
*/
protected onTagProperty(tag: SaxesTagNS, activeTag: IActiveTag, parentTag: IActiveTag): void;
/**
* Emit the given triple to the stream.
* @param {Term} subject A subject term.
* @param {Term} predicate A predicate term.
* @param {Term} object An object term.
* @param {Term} statementId An optional resource that identifies the triple.
* If truthy, then the given triple will also be emitted reified.
* @param childrenTripleTerms An optional array to push quads into instead of emitting them.
* @param reifier The reifier to emit this triple under.
*/
protected emitTriple(subject: RDF.Quad_Subject, predicate: RDF.Quad_Predicate, object: RDF.Quad_Object, statementId?: RDF.NamedNode, childrenTripleTerms?: RDF.Quad[], reifier?: RDF.NamedNode | RDF.BlankNode): void;
/**
* Register the given term as a node ID.
* If one was already registered, this will emit an error.
*
* This is used to check duplicate occurrences of rdf:ID in scope of the baseIRI.
* @param {Term} term An RDF term.
*/
protected claimNodeId(term: RDF.Term): void;
/**
* Handle the given text string.
* @param {string} text A parsed text string.
*/
protected onText(text: string): void;
/**
* Handle the closing of the last tag.
*/
protected onCloseTag(): void;
/**
* Fetch local DOCTYPE ENTITY's and make the parser recognise them.
* @param {string} doctype The read doctype.
*/
protected onDoctype(doctype: string): void;
private setDirection;
private setVersion;
}
export interface IRdfXmlParserArgs {
/**
* A custom RDFJS DataFactory to construct terms and triples.
*/
dataFactory?: RDF.DataFactory;
/**
* An initital default base IRI.
*/
baseIRI?: string;
/**
* The default graph for constructing quads.
*/
defaultGraph?: RDF.Term;
/**
* If the internal position (line, column) should be tracked an emitted in error messages.
*/
trackPosition?: boolean;
/**
* By default multiple occurrences of the same `rdf:ID` value are not allowed.
* By setting this option to `true`, this uniqueness check can be disabled.
*/
allowDuplicateRdfIds?: boolean;
/**
* Enables validation of all URIs. Will throw an Error in case of an invalid URI.
* By default, it is equal to true.
*/
validateUri?: boolean;
/**
* Allows to customize the used IRI validation strategy using the `IriValidationStrategy` enumeration.
* By default, the "pragmatic" strategy is used.
*/
iriValidationStrategy?: IriValidationStrategy;
}
export interface IActiveTag {
subject?: RDF.NamedNode | RDF.BlankNode;
predicate?: RDF.NamedNode;
predicateEmitted?: boolean;
predicateSubPredicates?: RDF.NamedNode[];
predicateSubObjects?: (RDF.NamedNode | RDF.BlankNode | RDF.Literal)[];
hadChildren?: boolean;
text?: string;
language?: string;
direction?: 'ltr' | 'rtl';
datatype?: RDF.NamedNode;
nodeId?: RDF.BlankNode;
childrenParseType?: ParseType;
baseIRI?: string;
listItemCounter?: number;
reifiedStatementId?: RDF.NamedNode;
childrenTagsToString?: boolean;
childrenStringTags?: string[];
childrenStringEmitClosingTag?: string;
childrenCollectionSubject?: RDF.NamedNode | RDF.BlankNode;
childrenCollectionPredicate?: RDF.NamedNode;
childrenTagsToTripleTerms?: boolean;
childrenTripleTerms?: RDF.Quad[];
reifier?: RDF.NamedNode | RDF.BlankNode;
rdfVersion?: string;
namespaces?: {
key: string;
value: string;
}[];
}
export declare enum ParseType {
RESOURCE = 0,
PROPERTY = 1
}