UNPKG

@hardbulls/wbsc-crawler

Version:

Tool to crawl events, leagues and statistics from WBSC based websites.

133 lines (132 loc) 5.74 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); __setModuleDefault(result, mod); return result; }; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; Object.defineProperty(exports, "__esModule", { value: true }); exports.StatisticsCrawler = void 0; const Selector_1 = require("./Parser/Selector"); const fetch_1 = require("./fetch"); exports.StatisticsCrawler = { crawl: (options) => __awaiter(void 0, void 0, void 0, function* () { let battingStatistics = {}; let pitchingStatistics = {}; let fieldingStatistics = {}; if (options.batting) { battingStatistics = yield crawlUrl(options.batting); } if (options.pitching) { pitchingStatistics = yield crawlUrl(options.pitching); } if (options.fielding) { fieldingStatistics = yield crawlUrl(options.fielding); } const names = [ ...new Set([ ...Object.keys(battingStatistics), ...Object.keys(pitchingStatistics), ...Object.keys(fieldingStatistics), ]), ]; const result = []; for (const name of names) { const playerStatistics = { name, statistics: { batting: (battingStatistics[name] && battingStatistics[name].statistics) || {}, pitching: (pitchingStatistics[name] && pitchingStatistics[name].statistics) || {}, fielding: (fieldingStatistics[name] && fieldingStatistics[name].statistics) || {}, }, }; result.push(playerStatistics); } return result; }), }; const crawlUrl = (url) => __awaiter(void 0, void 0, void 0, function* () { const { JSDOM } = yield Promise.resolve().then(() => __importStar(require("jsdom"))); const html = yield (yield (0, fetch_1.fetchUrl)(url, { method: "GET" })).text(); const dom = new JSDOM(html); const table = (0, Selector_1.querySelectorOrThrow)(dom.window.document, "table.table-condensed"); const headers = table.querySelectorAll("thead > tr > th"); const rows = table.querySelectorAll("tbody > tr"); const headerMapping = Array.from(headers).reduce((result, header, index) => { const text = header.textContent; if (text) { result[index] = text.toLowerCase(); } return result; }, {}); const NAME_CLASS = "player"; const TEAM_CLASS = "team"; const results = {}; for (const row of rows.values()) { const rowResult = { name: "", statistics: {}, }; for (const [index, column] of row.querySelectorAll("td").entries()) { if (headerMapping[index]) { if (column.classList.contains(NAME_CLASS)) { rowResult.name = parseNameColumn(column); } else if (!column.classList.contains(TEAM_CLASS)) { const value = column.textContent; if (value) { let parsedValue = 0; if (value.startsWith(".")) { parsedValue = Number.parseFloat(`0${value}`); } else { parsedValue = Number.parseFloat(value); } rowResult.statistics[headerMapping[index]] = parsedValue; } } } } results[rowResult.name] = rowResult; } return results; }); const parseNameColumn = (column) => { var _a; let lastName = (_a = column.querySelector("strong")) === null || _a === void 0 ? void 0 : _a.textContent; let firstName = column.textContent || undefined; if (lastName) { firstName = firstName === null || firstName === void 0 ? void 0 : firstName.substring(lastName.length).trim(); lastName = `${lastName.charAt(0)}${lastName.slice(1).toLowerCase()}`; } return [firstName, lastName].filter(Boolean).join(" "); };