UNPKG

@fcc-cdc/it-events

Version:
99 lines (98 loc) 6.15 kB
"use strict"; var __await = (this && this.__await) || function (v) { return this instanceof __await ? (this.v = v, this) : new __await(v); } var __asyncValues = (this && this.__asyncValues) || function (o) { if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined."); var m = o[Symbol.asyncIterator], i; return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i); function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; } function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); } }; var __asyncDelegator = (this && this.__asyncDelegator) || function (o) { var i, p; return i = {}, verb("next"), verb("throw", function (e) { throw e; }), verb("return"), i[Symbol.iterator] = function () { return this; }, i; function verb(n, f) { i[n] = o[n] ? function (v) { return (p = !p) ? { value: __await(o[n](v)), done: false } : f ? f(v) : v; } : f; } }; var __asyncGenerator = (this && this.__asyncGenerator) || function (thisArg, _arguments, generator) { if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined."); var g = generator.apply(thisArg, _arguments || []), i, q = []; return i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i; function verb(n) { if (g[n]) i[n] = function (v) { return new Promise(function (a, b) { q.push([n, v, a, b]) > 1 || resume(n, v); }); }; } function resume(n, v) { try { step(g[n](v)); } catch (e) { settle(q[0][3], e); } } function step(r) { r.value instanceof __await ? Promise.resolve(r.value.v).then(fulfill, reject) : settle(q[0][2], r); } function fulfill(value) { resume("next", value); } function reject(value) { resume("throw", value); } function settle(f, v) { if (f(v), q.shift(), q.length) resume(q[0][0], q[0][1]); } }; Object.defineProperty(exports, "__esModule", { value: true }); exports.CommonAgendaCrawler = exports.TimePattern = void 0; const jsdom_1 = require("jsdom"); const web_utility_1 = require("web-utility"); const utility_1 = require("../utility"); const core_1 = require("./core"); exports.TimePattern = /\d{1,2}\s*[::]\s*\d{2}/; const HeadingSelector = `h1, h2, h3, h4, h5, h6, strong, b`; class CommonAgendaCrawler extends core_1.AgendaCrawler { getList(URI) { return __asyncGenerator(this, arguments, function* getList_1() { const { window: { document } } = yield __await(jsdom_1.JSDOM.fromURL(URI)); this.document = document; const timeBoxes = Array.from((0, web_utility_1.walkDOM)(document.body, 3), ({ nodeValue, parentElement }) => exports.TimePattern.test(nodeValue) && { selector: (0, utility_1.getCSSSelector)(parentElement, document.body, utility_1.CSSSelectorPrecision.Medium) }).filter(Boolean); const timeBoxCount = (0, web_utility_1.countBy)(timeBoxes, ({ selector }) => selector); const [[agendaTimeSelector]] = Object.entries(timeBoxCount).sort(([, a], [, b]) => b - a); const [first, second] = document.querySelectorAll(agendaTimeSelector); const agendaBox = (0, utility_1.sameParentOf)(first, second); const agendaBoxSelector = (0, utility_1.getCSSSelector)(agendaBox, document.body, utility_1.CSSSelectorPrecision.High); for (let i = 0; i < agendaBox.childElementCount; i++) if (agendaBox.tagName.toLowerCase() === 'tbody') yield __await(yield* __asyncDelegator(__asyncValues(this.getItems(agendaBox.children[i])))); else yield yield __await(yield __await(this.getItem(`${agendaBoxSelector} > :nth-child(${i + 1})`))); this.document = undefined; }); } async getItem(selector) { var _a, _b; const agendaItem = (_a = this.document) === null || _a === void 0 ? void 0 : _a.querySelector(selector); if (!agendaItem) return {}; let time = ''; const [head, body] = Array.from((0, web_utility_1.walkDOM)(agendaItem, 3)).reduce((group, { parentElement, nodeValue }) => { const isHeading = parentElement.matches(HeadingSelector) || !!parentElement.closest(HeadingSelector); if (exports.TimePattern.test(nodeValue)) time = nodeValue.trim(); else group[isHeading ? 0 : 1].push(nodeValue.trim()); return group; }, [[], []]); const [startTime, endTime] = time.split(/[^\d::]+/), [name, title] = head.sort((a, b) => (0, web_utility_1.byteLength)(a) - (0, web_utility_1.byteLength)(b)), [position, summary] = body.sort((a, b) => (0, web_utility_1.byteLength)(a) - (0, web_utility_1.byteLength)(b)), avatar = (_b = agendaItem.querySelector('img[src]')) === null || _b === void 0 ? void 0 : _b.src; return { mentor: { name, position, avatar }, title, summary, startTime, endTime }; } getItems({ children }) { const [time, ...agendas] = [...children]; const [startTime, endTime] = time.textContent.trim().split(/[^\d::]+/); return agendas.map(agendaItem => { var _a; const [name, position, title, summary] = agendaItem.textContent .trim() .split('\n') .sort((a, b) => (0, web_utility_1.byteLength)(a) - (0, web_utility_1.byteLength)(b)), avatar = (_a = agendaItem.querySelector('img[src]')) === null || _a === void 0 ? void 0 : _a.src; return { mentor: { name, position, avatar }, title, summary, startTime, endTime }; }); } } exports.CommonAgendaCrawler = CommonAgendaCrawler;