UNPKG

@giancosta86/wiki-transform

Version:

Stream transforming raw XML into wiki pages

91 lines 2.93 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.WikiTransform = void 0; const node_stream_1 = require("node:stream"); const sax_1 = require("sax"); const format_error_1 = require("@giancosta86/format-error"); const core_1 = require("./core"); class WikiTransform extends node_stream_1.Transform { logger; pageTag; characterBuffer = []; bufferingCharacters = false; currentTitle; currentText; saxError; saxStream = (0, sax_1.createStream)(true) .on("opentag", tag => { switch (tag.name) { case this.pageTag: this.currentTitle = undefined; this.currentText = undefined; break; case "title": case "text": this.bufferingCharacters = true; break; } }) .on("text", characters => { if (this.bufferingCharacters) { this.characterBuffer.push(characters); } }) .on("cdata", characters => { if (this.bufferingCharacters) { this.characterBuffer.push(characters); } }) .on("closetag", tag => { switch (tag) { case "title": this.bufferingCharacters = false; this.currentTitle = this.characterBuffer.join(""); this.characterBuffer.length = 0; break; case "text": this.bufferingCharacters = false; this.currentText = this.characterBuffer.join(""); this.characterBuffer.length = 0; break; case this.pageTag: if (!this.currentTitle) { this.logger?.info("Page without title!"); return; } if (!this.currentText) { this.logger?.info(`Page '${this.currentTitle}' has no text!`); return; } this.push({ title: this.currentTitle, text: this.currentText }); } }) .on("error", err => { this.logger?.error(`Error while parsing page: ${(0, format_error_1.formatError)(err)}`); if (!this.saxError) { this.saxError = err; } }); constructor(options) { super({ objectMode: true, highWaterMark: options?.highWaterMark, signal: options?.signal }); this.logger = options?.logger; this.pageTag = options?.pageTag ?? core_1.DEFAULT_PAGE_TAG; } _transform(chunk, encoding, callback) { this.saxStream.write(chunk, encoding); callback(this.saxError); } _flush(callback) { this.saxStream.end(); callback(this.saxError); } } exports.WikiTransform = WikiTransform; //# sourceMappingURL=transform.js.map