microdata-rdf-streaming-parser
Version:
A fast and lightweight streaming Microdata to RDF parser
111 lines (110 loc) • 4.52 kB
TypeScript
import type * as RDF from '@rdfjs/types';
import { Parser as HtmlParser } from 'htmlparser2';
import { Transform } from 'readable-stream';
import type { IHtmlParseListener } from './IHtmlParseListener';
import type { IItemScope } from './IItemScope';
import type { IVocabRegistry } from './IVocabRegistry';
import EventEmitter = NodeJS.EventEmitter;
/**
* A stream transformer that parses Microdata (text) streams to an {@link RDF.Stream}.
*/
export declare class MicrodataRdfParser extends Transform implements RDF.Sink<EventEmitter, RDF.Stream> {
private static readonly ITEM_PROPERTY_HANDLERS;
private readonly options;
private readonly util;
private readonly defaultGraph?;
private readonly parser;
private readonly htmlParseListener?;
private readonly vocabRegistry;
private itemScopeStack;
private textBufferStack;
private isEmittingReferences;
private readonly pendingItemRefsDomain;
private readonly pendingItemRefsRangeFinalized;
private readonly pendingItemRefsRangeCollecting;
private emittingReferencesItemScopeIdGenerator;
constructor(options?: IMicrodataRdfParserOptions);
/**
* Parses the given text stream into a quad stream.
* @param {NodeJS.EventEmitter} stream A text stream.
* @return {RDF.Stream} A quad stream.
*/
import(stream: EventEmitter): RDF.Stream;
_transform(chunk: any, encoding: string, callback: (error?: Error | null, data?: any) => void): void;
_flush(callback: (error?: Error | null, data?: any) => void): void;
/**
* Get the current item scope for the current depth.
* This will skip all undefined item scopes.
* @param parent If we should start looking one level higher in the stack.
*/
protected getItemScope(parent?: boolean): IItemScope | undefined;
/**
* Get the current stack depth.
*/
protected getDepth(): number;
onTagOpen(name: string, attributes: Record<string, string>): void;
onText(data: string): void;
onTagClose(): void;
onEnd(): void;
/**
* Initialize a new HtmlParser.
* @param xmlMode If the parser should be setup in strict mode.
*/
protected initializeParser(xmlMode: boolean): HtmlParser;
/**
* Handle the given item properties.
* @param itempropValue The value of itemprop or itemprop-reverse.
* @param reverse If the item properties are reversed (itemprop-reverse).
* @param itemScope The current item scope.
* @param tagName The current tag name.
* @param tagAttributes The current tag attributes.
*/
protected handleItemProperties(itempropValue: string, reverse: boolean, itemScope: IItemScope | undefined, tagName: string, tagAttributes: Record<string, string>): void;
/**
* Emit the given object for the given predicates.
* @param itemScope The current item scope.
* @param predicates An array of predicates.
* @param object An object.
* @param reverse If the triples should be reversed.
*/
protected emitPredicateTriples(itemScope: IItemScope, predicates: RDF.NamedNode[], object: RDF.Quad_Object, reverse: boolean): void;
/**
* Emit the given triple to the stream.
* @param {Quad_Subject} subject A subject term.
* @param {Quad_Predicate} predicate A predicate term.
* @param {Quad_Object} object An object term.
*/
protected emitTriple(subject: RDF.Quad_Subject, predicate: RDF.Quad_Predicate, object: RDF.Quad_Object): void;
/**
* Attempt to emit all pending itemrefs for the given reference.
* @param reference An item reference id.
* @param itemScopeDomain An optional item scope. If defined, only refs from this scope will be emitted.
*/
protected tryToEmitReferences(reference: string, itemScopeDomain?: IItemScope): void;
}
export interface IMicrodataRdfParserOptions {
/**
* A custom RDFJS DataFactory to construct terms and triples.
*/
dataFactory?: RDF.DataFactory;
/**
* An initital default base IRI.
*/
baseIRI?: string;
/**
* The default graph for constructing quads.
*/
defaultGraph?: RDF.Quad_Graph;
/**
* An optional listener for the internal HTML parse events.
*/
htmlParseListener?: IHtmlParseListener;
/**
* If the parser should assume strict X(HT)ML documents.
*/
xmlMode?: boolean;
/**
* A vocabulary registry to define specific behaviour for given URI prefixes.
*/
vocabRegistry?: IVocabRegistry;
}