@alline/hook-wiki
Version:
Wikipedia hook for Alline.
119 lines • 4.67 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.validateText = exports.transformUrlHook = exports.transformEpisodeHook = void 0;
const cheerio_table_parser_1 = require("@joshuaavalon/cheerio-table-parser");
const lodash_1 = __importDefault(require("lodash"));
const moment_1 = __importDefault(require("moment"));
const cheerio_1 = __importDefault(require("cheerio"));
const mapMultipleRows = (table, multipleRow) => {
if (lodash_1.default.isUndefined(multipleRow)) {
return lodash_1.default.range(table.length).map(i => [i]);
}
const rowMap = [];
let index = 0;
let prev = undefined;
table.forEach((cols, row) => {
if (!lodash_1.default.isArray(rowMap[index])) {
rowMap[index] = [];
}
const current = cols[multipleRow];
if (lodash_1.default.eq(current, prev) || row === 0) {
rowMap[index].push(row);
}
else {
index++;
rowMap[index] = [row];
}
prev = current;
});
return rowMap;
};
const normalizeElement = (element) => {
cheerio_1.default("sup", element).remove();
cheerio_1.default("style", element).remove();
cheerio_1.default("ruby", element).each((_, ruby) => {
if (cheerio_1.default("rp", ruby).length > 0) {
return;
}
cheerio_1.default("rt", ruby).prepend("(").append(")");
});
cheerio_1.default("br", element).replaceWith("\n");
cheerio_1.default("hr", element).replaceWith("\n");
cheerio_1.default("li", element).append("\n");
};
const parseSimpleString = (elements) => lodash_1.default.flatMap(elements, e => {
normalizeElement(e);
return cheerio_1.default(e).text().replace(/\n+/g, " ");
});
const parseSimpleDate = (values) => moment_1.default(cheerio_1.default(values[0]).text()).format("YYYY-MM-DD");
const parseSpaceString = (elements) => {
const result = lodash_1.default.flatMap(elements, e => cheerio_1.default(e)
.text()
.split(/\s+/)
.filter(v => v));
return lodash_1.default.uniq(result);
};
const createData = (data, option, table, rowMap) => {
const { mapping = {}, parsers = {} } = option;
const { title: parseTitle = parseSimpleString, aired: parseAired = parseSimpleDate, directors: parseDirectors = parseSpaceString, writers: parseWriters = parseSpaceString } = parsers;
const { title, aired, directors, writers } = mapping;
const values = [];
rowMap.forEach(row => {
table[row].forEach((col, i) => {
if (!lodash_1.default.isArray(values[i])) {
values[i] = [];
}
values[i] = [...values[i], col];
});
});
if (lodash_1.default.isNumber(title)) {
data.title = parseTitle(values[title]);
}
if (lodash_1.default.isNumber(aired)) {
data.aired = parseAired(values[aired]);
}
if (lodash_1.default.isNumber(directors)) {
data.directors = parseDirectors(values[directors]);
}
if (lodash_1.default.isNumber(writers)) {
data.writers = parseWriters(values[writers]);
}
return data;
};
exports.transformEpisodeHook = (option) => (data, ctx) => {
const { logger, value: dom } = ctx;
const { id, offset = 0, multipleRow, validate } = option;
const tables = dom(`#${id}`).parent().nextAll("table.wikitable").toArray();
if (tables.length <= offset) {
logger.error("Offset greater than table count.", {
id,
offset,
tableCount: tables.length
});
throw new Error(`There are only ${tables.length} table(s).`);
}
const tableNode = tables[offset];
const table = cheerio_table_parser_1.parseTable(tableNode, { parser: e => e });
validate === null || validate === void 0 ? void 0 : validate(table);
const rowNum = ctx.episode;
const rowMap = mapMultipleRows(table, multipleRow)[rowNum];
return createData(data, option, table, rowMap);
};
exports.transformUrlHook = (language, topic) => (_, ctx) => {
const { logger } = ctx;
const url = `https://${language}.wikipedia.org/wiki/${topic}`;
logger.debug("wikiTransformUrlHook", { url });
return url;
};
exports.validateText = (cells) => (table) => {
cells.forEach(([row, col, except]) => {
const value = cheerio_1.default(table[row][col]).text();
if (value !== except) {
throw new Error(`Except "${value}" to be "${except}"`);
}
});
};
//# sourceMappingURL=index.js.map