UNPKG

microdata-rdf-streaming-parser

Version:
111 lines (110 loc) 4.52 kB
import type * as RDF from '@rdfjs/types'; import { Parser as HtmlParser } from 'htmlparser2'; import { Transform } from 'readable-stream'; import type { IHtmlParseListener } from './IHtmlParseListener'; import type { IItemScope } from './IItemScope'; import type { IVocabRegistry } from './IVocabRegistry'; import EventEmitter = NodeJS.EventEmitter; /** * A stream transformer that parses Microdata (text) streams to an {@link RDF.Stream}. */ export declare class MicrodataRdfParser extends Transform implements RDF.Sink<EventEmitter, RDF.Stream> { private static readonly ITEM_PROPERTY_HANDLERS; private readonly options; private readonly util; private readonly defaultGraph?; private readonly parser; private readonly htmlParseListener?; private readonly vocabRegistry; private itemScopeStack; private textBufferStack; private isEmittingReferences; private readonly pendingItemRefsDomain; private readonly pendingItemRefsRangeFinalized; private readonly pendingItemRefsRangeCollecting; private emittingReferencesItemScopeIdGenerator; constructor(options?: IMicrodataRdfParserOptions); /** * Parses the given text stream into a quad stream. * @param {NodeJS.EventEmitter} stream A text stream. * @return {RDF.Stream} A quad stream. */ import(stream: EventEmitter): RDF.Stream; _transform(chunk: any, encoding: string, callback: (error?: Error | null, data?: any) => void): void; _flush(callback: (error?: Error | null, data?: any) => void): void; /** * Get the current item scope for the current depth. * This will skip all undefined item scopes. * @param parent If we should start looking one level higher in the stack. */ protected getItemScope(parent?: boolean): IItemScope | undefined; /** * Get the current stack depth. */ protected getDepth(): number; onTagOpen(name: string, attributes: Record<string, string>): void; onText(data: string): void; onTagClose(): void; onEnd(): void; /** * Initialize a new HtmlParser. * @param xmlMode If the parser should be setup in strict mode. */ protected initializeParser(xmlMode: boolean): HtmlParser; /** * Handle the given item properties. * @param itempropValue The value of itemprop or itemprop-reverse. * @param reverse If the item properties are reversed (itemprop-reverse). * @param itemScope The current item scope. * @param tagName The current tag name. * @param tagAttributes The current tag attributes. */ protected handleItemProperties(itempropValue: string, reverse: boolean, itemScope: IItemScope | undefined, tagName: string, tagAttributes: Record<string, string>): void; /** * Emit the given object for the given predicates. * @param itemScope The current item scope. * @param predicates An array of predicates. * @param object An object. * @param reverse If the triples should be reversed. */ protected emitPredicateTriples(itemScope: IItemScope, predicates: RDF.NamedNode[], object: RDF.Quad_Object, reverse: boolean): void; /** * Emit the given triple to the stream. * @param {Quad_Subject} subject A subject term. * @param {Quad_Predicate} predicate A predicate term. * @param {Quad_Object} object An object term. */ protected emitTriple(subject: RDF.Quad_Subject, predicate: RDF.Quad_Predicate, object: RDF.Quad_Object): void; /** * Attempt to emit all pending itemrefs for the given reference. * @param reference An item reference id. * @param itemScopeDomain An optional item scope. If defined, only refs from this scope will be emitted. */ protected tryToEmitReferences(reference: string, itemScopeDomain?: IItemScope): void; } export interface IMicrodataRdfParserOptions { /** * A custom RDFJS DataFactory to construct terms and triples. */ dataFactory?: RDF.DataFactory; /** * An initital default base IRI. */ baseIRI?: string; /** * The default graph for constructing quads. */ defaultGraph?: RDF.Quad_Graph; /** * An optional listener for the internal HTML parse events. */ htmlParseListener?: IHtmlParseListener; /** * If the parser should assume strict X(HT)ML documents. */ xmlMode?: boolean; /** * A vocabulary registry to define specific behaviour for given URI prefixes. */ vocabRegistry?: IVocabRegistry; }