rdfa-streaming-parser
Version:
A fast and lightweight streaming RDFa parser
121 lines (120 loc) • 4.78 kB
TypeScript
import EventEmitter = NodeJS.EventEmitter;
import { Parser as HtmlParser } from "htmlparser2";
import * as RDF from "@rdfjs/types";
import { Transform } from "readable-stream";
import { IActiveTag } from "./IActiveTag";
import { IHtmlParseListener } from "./IHtmlParseListener";
import { IRdfaPattern } from "./IRdfaPattern";
import { IRdfaFeatures, RdfaProfile } from "./RdfaProfile";
/**
* A stream transformer that parses RDFa (text) streams to an {@link RDF.Stream}.
*/
export declare class RdfaParser extends Transform implements RDF.Sink<EventEmitter, RDF.Stream> {
private readonly options;
private readonly util;
private readonly defaultGraph?;
private readonly parser;
private readonly features;
private readonly htmlParseListener?;
private readonly rdfaPatterns;
private readonly pendingRdfaPatternCopies;
private readonly activeTagStack;
constructor(options?: IRdfaParserOptions);
/**
* Parses the given text stream into a quad stream.
* @param {NodeJS.EventEmitter} stream A text stream.
* @return {RDF.Stream} A quad stream.
*/
import(stream: EventEmitter): RDF.Stream;
_transform(chunk: any, encoding: string, callback: (error?: Error | null, data?: any) => void): void;
_flush(callback: (error?: Error | null, data?: any) => void): void;
onTagOpen(name: string, attributes: {
[s: string]: string;
}): void;
onText(data: string): void;
onTagClose(): void;
onEnd(): void;
/**
* If the new subject can be inherited from the parent object
* if the resource defines no new subject.
* @param {string} name The current tag name.
* @returns {boolean} If the subject can be inherited.
*/
protected isInheritSubjectInHeadBody(name: string): boolean;
/**
* Add a list mapping for the given predicate and object in the active tag.
* @param {IActiveTag} activeTag The active tag.
* @param {Term | boolean} subject A subject term, this will only be used to create a separate list
* if activeTag.explicitNewSubject is true.
* @param {Term} predicate A predicate term.
* @param {Term | boolean} currentObjectResource The current object resource.
*/
protected addListMapping(activeTag: IActiveTag, subject: RDF.Quad_Subject | boolean, predicate: RDF.Quad_Predicate, currentObjectResource: RDF.Quad_Object | boolean): void;
/**
* Emit the given triple to the stream.
* @param {Term} subject A subject term.
* @param {Term} predicate A predicate term.
* @param {Term} object An object term.
*/
protected emitTriple(subject: RDF.Quad_Subject, predicate: RDF.Quad_Predicate, object: RDF.Quad_Object): void;
/**
* Emit an instantiation of the given pattern with the given parent tag.
* @param {IActiveTag} parentTag The parent tag to instantiate in.
* @param {IRdfaPattern} pattern The pattern to instantiate.
* @param {string} rootPatternId The pattern id.
*/
protected emitPatternCopy(parentTag: IActiveTag, pattern: IRdfaPattern, rootPatternId: string): void;
/**
* Emit an instantiation of the given pattern with the given parent tag.
*
* This should probably not be called directly,
* call {@link emitPatternCopy} instead.
*
* @param {IRdfaPattern} pattern The pattern to instantiate.
* @param {boolean} root If this is the root call for the given pattern.
* @param {string} rootPatternId The pattern id.
*/
protected emitPatternCopyAbsolute(pattern: IRdfaPattern, root: boolean, rootPatternId: string): void;
protected initializeParser(xmlMode: boolean): HtmlParser;
}
export interface IRdfaParserOptions {
/**
* A custom RDFJS DataFactory to construct terms and triples.
*/
dataFactory?: RDF.DataFactory;
/**
* An initital default base IRI.
*/
baseIRI?: string;
/**
* A default language for string literals.
*/
language?: string;
/**
* The initial vocabulary.
*/
vocab?: string;
/**
* The default graph for constructing quads.
*/
defaultGraph?: RDF.Quad_Graph;
/**
* A hash of features that should be enabled.
* Defaults to the features defined by the profile.
*/
features?: IRdfaFeatures;
/**
* The RDFa profile to use.
* Defaults to a profile with all possible features enabled.
*/
profile?: RdfaProfile;
/**
* The content type of the document that should be parsed.
* This can be used as an alternative to the 'profile' option.
*/
contentType?: string;
/**
* An optional listener for the internal HTML parse events.
*/
htmlParseListener?: IHtmlParseListener;
}