@giancosta86/wiki-transform
Version:
Stream transforming raw XML into wiki pages
91 lines • 2.93 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.WikiTransform = void 0;
const node_stream_1 = require("node:stream");
const sax_1 = require("sax");
const format_error_1 = require("@giancosta86/format-error");
const core_1 = require("./core");
class WikiTransform extends node_stream_1.Transform {
logger;
pageTag;
characterBuffer = [];
bufferingCharacters = false;
currentTitle;
currentText;
saxError;
saxStream = (0, sax_1.createStream)(true)
.on("opentag", tag => {
switch (tag.name) {
case this.pageTag:
this.currentTitle = undefined;
this.currentText = undefined;
break;
case "title":
case "text":
this.bufferingCharacters = true;
break;
}
})
.on("text", characters => {
if (this.bufferingCharacters) {
this.characterBuffer.push(characters);
}
})
.on("cdata", characters => {
if (this.bufferingCharacters) {
this.characterBuffer.push(characters);
}
})
.on("closetag", tag => {
switch (tag) {
case "title":
this.bufferingCharacters = false;
this.currentTitle = this.characterBuffer.join("");
this.characterBuffer.length = 0;
break;
case "text":
this.bufferingCharacters = false;
this.currentText = this.characterBuffer.join("");
this.characterBuffer.length = 0;
break;
case this.pageTag:
if (!this.currentTitle) {
this.logger?.info("Page without title!");
return;
}
if (!this.currentText) {
this.logger?.info(`Page '${this.currentTitle}' has no text!`);
return;
}
this.push({
title: this.currentTitle,
text: this.currentText
});
}
})
.on("error", err => {
this.logger?.error(`Error while parsing page: ${(0, format_error_1.formatError)(err)}`);
if (!this.saxError) {
this.saxError = err;
}
});
constructor(options) {
super({
objectMode: true,
highWaterMark: options?.highWaterMark,
signal: options?.signal
});
this.logger = options?.logger;
this.pageTag = options?.pageTag ?? core_1.DEFAULT_PAGE_TAG;
}
_transform(chunk, encoding, callback) {
this.saxStream.write(chunk, encoding);
callback(this.saxError);
}
_flush(callback) {
this.saxStream.end();
callback(this.saxError);
}
}
exports.WikiTransform = WikiTransform;
//# sourceMappingURL=transform.js.map