@xcrap/parser
Version:
Xcrap Parser is a package of the Xcrap framework, it was developed to take care of the data extraction part of text files (currently supporting only HTML and JSON) using declarative models.
71 lines (70 loc) • 2.66 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.HtmlParsingModel = void 0;
const node_html_parser_1 = __importDefault(require("node-html-parser"));
const errors_1 = require("../errors");
const parser_1 = require("./parser");
class HtmlParsingModel {
constructor(shape) {
this.shape = shape;
}
async parse(source, options = parser_1.nodeHtmlParserOptions) {
const root = node_html_parser_1.default.parse(source, options);
const data = {};
for (const key in this.shape) {
const value = this.shape[key];
const isNestedValue = "model" in value;
if (isNestedValue) {
data[key] = await this.parseNestedValue(value, root);
}
else {
data[key] = await this.parseBaseValue(value, root);
}
}
return data;
}
async parseBaseValue(value, root) {
if (value.multiple) {
if (!value.query) {
throw new errors_1.MultipleQueryError();
}
const elements = root.querySelectorAll(value.query);
if (value.limit !== undefined) {
elements.splice(value.limit);
}
return await Promise.all(elements.map(element => value.extractor(element)));
}
else {
const element = value.query ? root.querySelector(value.query) : root;
if (!element) {
if (value.default === undefined) {
throw new errors_1.HTMLElementNotFoundError(value.query);
}
return value.default;
}
return await value.extractor(element);
}
}
async parseNestedValue(value, root) {
if (value.multiple) {
const elements = root.querySelectorAll(value.query);
if (value.limit !== undefined) {
elements.splice(value.limit);
}
return await Promise.all(elements.map(element => value.model.parse(element.outerHTML)));
}
else {
const element = root.querySelector(value.query);
if (!element) {
throw new errors_1.HTMLElementNotFoundError(value.query);
}
const source = value.extractor ? await value.extractor(element) : element.outerHTML;
const data = await value.model.parse(source);
return data;
}
}
}
exports.HtmlParsingModel = HtmlParsingModel;