UNPKG

@hardbulls/wbsc-crawler

Version:

Tool to crawl events, leagues and statistics from WBSC based websites.

216 lines (215 loc) 7.33 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; Object.defineProperty(exports, "__esModule", { value: true }); exports.JsonStatisticsCrawler = void 0; const jsdom_1 = require("jsdom"); const fetch_1 = require("./fetch"); const pitchingKeyMap = { pitch_win: "w", pitch_loss: "l", pitch_appear: "g", pitch_gs: "gs", pitch_cg: "cg", pitch_sho: "sho", pitch_save: "sv", pitch_ip: "ip", pitch_r: "r", pitch_er: "er", pitch_h: "h", pitch_bb: "bb", pitch_so: "so", pitch_double: "2b", pitch_triple: "3b", pitch_hr: "hr", pitch_ab: "ab", bavg: "bavg", pitch_wp: "wp", pitch_hbp: "hbp", pitch_bk: "bk", pitch_sfa: "sf", pitch_sha: "sh", pitch_whip: "whip", era: "era", }; const battingKeyMap = { g: "g", gs: "gs", // optional, not in original but you might want to keep ab: "ab", r: "r", h: "h", double: "2b", triple: "3b", hr: "hr", rbi: "rbi", tb: "tb", // optional, not in original avg: "avg", slg: "slg", obp: "obp", ops: "ops", bb: "bb", hbp: "hp", // renamed from hbp to hp so: "so", gdp: "gdp", sf: "sf", sh: "sh", sb: "sb", cs: "cs", // "pa" and "ibb" are not present in new format — keep that in mind }; const fieldingKeyMap = { field_g: "g", field_c: "c", field_po: "po", field_a: "a", field_e: "e", fldp: "fldp", field_dp: "dp", field_sba: "sba", field_csb: "csb", sbap: "sbap", field_pb: "pb", field_ci: "ci", }; exports.JsonStatisticsCrawler = { crawl: (options) => __awaiter(void 0, void 0, void 0, function* () { let battingStatistics = {}; let pitchingStatistics = {}; let fieldingStatistics = {}; if (options.batting) { battingStatistics = yield crawlUrl(options.batting); } if (options.pitching) { pitchingStatistics = yield crawlUrl(options.pitching); } if (options.fielding) { fieldingStatistics = yield crawlUrl(options.fielding); } const names = [ ...new Set([ ...Object.keys(battingStatistics), ...Object.keys(pitchingStatistics), ...Object.keys(fieldingStatistics), ]), ]; const result = []; for (const name of names) { const playerStatistics = { name, statistics: { batting: (battingStatistics[name] && battingStatistics[name].statistics && normalizeBattingStats(battingStatistics[name].statistics)) || {}, pitching: (pitchingStatistics[name] && pitchingStatistics[name].statistics && normalizePitchingStats(pitchingStatistics[name].statistics)) || {}, fielding: (fieldingStatistics[name] && fieldingStatistics[name].statistics && normalizeFieldingStats(fieldingStatistics[name].statistics)) || {}, }, }; result.push(playerStatistics); } return result; }), }; const crawlUrl = (url) => __awaiter(void 0, void 0, void 0, function* () { const response = yield (0, fetch_1.fetchUrl)(url, { method: "GET" }); const json = yield response.json(); const params = new URLSearchParams(url.split("?")[1]); const teamId = params.has("team") ? parseInt(params.get("team"), 10) : null; const results = {}; for (const playerData of json.data) { if (teamId && playerData.teamid !== teamId) { continue; } const name = parseName(playerData.name); const statistics = {}; for (const [key, value] of Object.entries(playerData)) { if (key !== "name" && key !== "link" && key !== "teamid" && key !== "teamcode" && typeof value === "number") { statistics[key] = key === "avg" || key === "slg" || key === "obp" || key === "ops" || key === "fldp" || key === "bavg" ? value / 1000 // normalize from 500 => 0.500 : value; } } results[name] = { name, statistics, }; } return results; }); const parseName = (html) => { var _a, _b, _c, _d; const dom = new jsdom_1.JSDOM(html); const last = (_b = (_a = dom.window.document .querySelector(".lastname")) === null || _a === void 0 ? void 0 : _a.textContent) === null || _b === void 0 ? void 0 : _b.trim(); const first = (_d = (_c = dom.window.document .querySelector(".firstname")) === null || _c === void 0 ? void 0 : _c.textContent) === null || _d === void 0 ? void 0 : _d.trim(); const capitalizeWords = (s) => s ? s .split(/\s+/) .map((word) => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase()) .join(" ") : ""; return [capitalizeWords(first), capitalizeWords(last)] .filter(Boolean) .join(" "); }; const normalizePitchingStats = (rawStats) => { const normalized = {}; for (const [newKey, originalKey] of Object.entries(pitchingKeyMap)) { if (rawStats[newKey] != null) { const value = rawStats[newKey]; normalized[originalKey] = typeof value === "string" ? parseFloat(value) : value; } } return normalized; }; const normalizeBattingStats = (rawStats) => { const normalized = {}; for (const [newKey, originalKey] of Object.entries(battingKeyMap)) { if (rawStats[newKey] != null) { const value = rawStats[newKey]; normalized[originalKey] = typeof value === "string" ? Number.parseFloat(value) : value; } } return normalized; }; const normalizeFieldingStats = (rawStats) => { const normalized = {}; for (const [newKey, originalKey] of Object.entries(fieldingKeyMap)) { if (rawStats[newKey] != null) { const value = rawStats[newKey]; normalized[originalKey] = typeof value === "string" ? Number.parseFloat(value) : value; } else { // If missing, but required field normalized[originalKey] = 0; } } return normalized; };