UNPKG

@alline/hook-wiki

Version:

Wikipedia hook for Alline.

119 lines 4.67 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.validateText = exports.transformUrlHook = exports.transformEpisodeHook = void 0; const cheerio_table_parser_1 = require("@joshuaavalon/cheerio-table-parser"); const lodash_1 = __importDefault(require("lodash")); const moment_1 = __importDefault(require("moment")); const cheerio_1 = __importDefault(require("cheerio")); const mapMultipleRows = (table, multipleRow) => { if (lodash_1.default.isUndefined(multipleRow)) { return lodash_1.default.range(table.length).map(i => [i]); } const rowMap = []; let index = 0; let prev = undefined; table.forEach((cols, row) => { if (!lodash_1.default.isArray(rowMap[index])) { rowMap[index] = []; } const current = cols[multipleRow]; if (lodash_1.default.eq(current, prev) || row === 0) { rowMap[index].push(row); } else { index++; rowMap[index] = [row]; } prev = current; }); return rowMap; }; const normalizeElement = (element) => { cheerio_1.default("sup", element).remove(); cheerio_1.default("style", element).remove(); cheerio_1.default("ruby", element).each((_, ruby) => { if (cheerio_1.default("rp", ruby).length > 0) { return; } cheerio_1.default("rt", ruby).prepend("(").append(")"); }); cheerio_1.default("br", element).replaceWith("\n"); cheerio_1.default("hr", element).replaceWith("\n"); cheerio_1.default("li", element).append("\n"); }; const parseSimpleString = (elements) => lodash_1.default.flatMap(elements, e => { normalizeElement(e); return cheerio_1.default(e).text().replace(/\n+/g, " "); }); const parseSimpleDate = (values) => moment_1.default(cheerio_1.default(values[0]).text()).format("YYYY-MM-DD"); const parseSpaceString = (elements) => { const result = lodash_1.default.flatMap(elements, e => cheerio_1.default(e) .text() .split(/\s+/) .filter(v => v)); return lodash_1.default.uniq(result); }; const createData = (data, option, table, rowMap) => { const { mapping = {}, parsers = {} } = option; const { title: parseTitle = parseSimpleString, aired: parseAired = parseSimpleDate, directors: parseDirectors = parseSpaceString, writers: parseWriters = parseSpaceString } = parsers; const { title, aired, directors, writers } = mapping; const values = []; rowMap.forEach(row => { table[row].forEach((col, i) => { if (!lodash_1.default.isArray(values[i])) { values[i] = []; } values[i] = [...values[i], col]; }); }); if (lodash_1.default.isNumber(title)) { data.title = parseTitle(values[title]); } if (lodash_1.default.isNumber(aired)) { data.aired = parseAired(values[aired]); } if (lodash_1.default.isNumber(directors)) { data.directors = parseDirectors(values[directors]); } if (lodash_1.default.isNumber(writers)) { data.writers = parseWriters(values[writers]); } return data; }; exports.transformEpisodeHook = (option) => (data, ctx) => { const { logger, value: dom } = ctx; const { id, offset = 0, multipleRow, validate } = option; const tables = dom(`#${id}`).parent().nextAll("table.wikitable").toArray(); if (tables.length <= offset) { logger.error("Offset greater than table count.", { id, offset, tableCount: tables.length }); throw new Error(`There are only ${tables.length} table(s).`); } const tableNode = tables[offset]; const table = cheerio_table_parser_1.parseTable(tableNode, { parser: e => e }); validate === null || validate === void 0 ? void 0 : validate(table); const rowNum = ctx.episode; const rowMap = mapMultipleRows(table, multipleRow)[rowNum]; return createData(data, option, table, rowMap); }; exports.transformUrlHook = (language, topic) => (_, ctx) => { const { logger } = ctx; const url = `https://${language}.wikipedia.org/wiki/${topic}`; logger.debug("wikiTransformUrlHook", { url }); return url; }; exports.validateText = (cells) => (table) => { cells.forEach(([row, col, except]) => { const value = cheerio_1.default(table[row][col]).text(); if (value !== except) { throw new Error(`Except "${value}" to be "${except}"`); } }); }; //# sourceMappingURL=index.js.map