UNPKG

@hardbulls/wbsc-crawler

Version:

Tool to crawl events, leagues and statistics from WBSC based websites.

144 lines (143 loc) 7.07 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; Object.defineProperty(exports, "__esModule", { value: true }); exports.GameCrawler = void 0; const jsdom_1 = require("jsdom"); const date_fns_1 = require("date-fns"); const GameStatus_1 = require("./Model/GameStatus"); const Selector_1 = require("./Parser/Selector"); const NodeNotFoundError_1 = require("./Parser/NodeNotFoundError"); const date_fns_tz_1 = require("date-fns-tz"); const fetch_1 = require("./fetch"); exports.GameCrawler = { crawl: (url, timezone) => __awaiter(void 0, void 0, void 0, function* () { const html = yield (yield (0, fetch_1.fetchUrl)(url, { method: "GET" })).text(); const dom = new jsdom_1.JSDOM(html); const appElement = dom.window.document.querySelector("#app"); if (appElement && appElement.hasAttribute("data-page")) { return crawlAppJson(dom, timezone); } return crawlHtml(dom, timezone); }), }; function crawlHtml(dom, timezone) { var _a, _b, _c, _d, _e; const rows = dom.window.document.querySelectorAll(".schedule-item"); const games = []; for (const row of rows) { const gameInfo = (0, Selector_1.querySelectorOrThrow)(row, "a.box-score-link:nth-child(1)"); // TODO find a game where is set... // eslint-disable-next-line @typescript-eslint/no-unused-vars const note = null; const venue = (_a = (0, Selector_1.querySelectorOrThrow)(gameInfo, "div:nth-child(1) > p:nth-child(2)").textContent) === null || _a === void 0 ? void 0 : _a.trim(); const date = (0, Selector_1.querySelectorOrThrow)(gameInfo, "div:nth-child(2) > p:nth-child(2)"); if (!date || !date.textContent) { throw new NodeNotFoundError_1.NodeNotFoundError(2); } let parsedDate = (0, date_fns_1.parse)(date.textContent, "dd/MM/yyyy, HH:mm", new Date()); if (!(0, date_fns_1.isValid)(parsedDate)) { parsedDate = (0, date_fns_1.parse)(date.textContent, "dd/MM/yyyy --:--", new Date()); if ((0, date_fns_1.isValid)(parsedDate)) { parsedDate.setHours(14, 0); } } if (timezone) { parsedDate = (0, date_fns_tz_1.fromZonedTime)(parsedDate, timezone); } const teamInfo = row.querySelector(".score") || row.querySelector(".regular-score") || (0, Selector_1.querySelectorOrThrow)(row, ".baseball-score-bug"); const awayTeamInfo = (0, Selector_1.querySelectorOrThrow)(teamInfo, "div.team-info:nth-child(1)"); const awayTeamName = (_b = (0, Selector_1.querySelectorOrThrow)(awayTeamInfo, "p:nth-child(4)p:nth-child(4)").textContent) === null || _b === void 0 ? void 0 : _b.trim(); const homeTeamInfo = (0, Selector_1.querySelectorOrThrow)(teamInfo, "div.team-info:nth-child(3)"); const homeTeamName = (_c = (0, Selector_1.querySelectorOrThrow)(homeTeamInfo, "p:nth-child(4)p:nth-child(4)").textContent) === null || _c === void 0 ? void 0 : _c.trim(); const scoreInfo = row.querySelector("div.score > div:nth-child(2) > p") || row.querySelector("div.regular-score > div:nth-child(2) > p") || (0, Selector_1.querySelectorOrThrow)(row, "div.baseball-score-bug > div:nth-child(2) > p"); const scoreInfoText = (_d = scoreInfo.textContent) === null || _d === void 0 ? void 0 : _d.trim(); let awayScore = 0; let homeScore = 0; if (scoreInfoText) { const parsedScore = scoreInfoText .split(":") .map((v) => Number.parseInt(v.trim())); awayScore = parsedScore[0] || 0; homeScore = parsedScore[1] || 0; } const gameStatusInfo = (0, Selector_1.querySelectorOrThrow)(row, "div.calendar-buttons a > div > p"); let gameStatus = GameStatus_1.GameStatus.SCHEDULED; const statusText = (_e = gameStatusInfo.textContent) === null || _e === void 0 ? void 0 : _e.trim().toLowerCase(); if (statusText && ["w.o.", "forfeit"].includes(statusText)) { gameStatus = GameStatus_1.GameStatus.FORFEIT; } else if (statusText && ["fortsetzung", "suspended"].includes(statusText)) { gameStatus = GameStatus_1.GameStatus.SUSPENDED; } else if (statusText && ("final" === statusText || statusText.match(/^f\/\d+$/))) { gameStatus = GameStatus_1.GameStatus.FINISHED; } else if (statusText === "canceled") { gameStatus = GameStatus_1.GameStatus.CANCELED; } games.push({ venue: venue || "Unknown", home: homeTeamName || "Unknown", away: awayTeamName || "Unknown", awayScore: awayScore, homeScore: homeScore, status: gameStatus, date: parsedDate, note: null, }); } return games; } function crawlAppJson(dom, timezone) { const appElement = dom.window.document.querySelector("#app"); const dataPage = appElement.getAttribute("data-page"); const data = JSON.parse(dataPage); const games = []; for (const gameData of data.props.games) { let gameStatus = GameStatus_1.GameStatus.SCHEDULED; if (gameData.gamestatus === 3) { gameStatus = GameStatus_1.GameStatus.FINISHED; } else if (gameData.gamestatus === 4) { gameStatus = GameStatus_1.GameStatus.FORFEIT; } else if (gameData.gamestatus === 0) { gameStatus = GameStatus_1.GameStatus.SCHEDULED; } else if (gameData.gamestatus === -3) { gameStatus = GameStatus_1.GameStatus.CANCELED; } else if (gameData.gamestatus === -2) { gameStatus = GameStatus_1.GameStatus.SUSPENDED; } let parsedDate = gameData.start; if (timezone) { parsedDate = (0, date_fns_tz_1.fromZonedTime)(parsedDate, timezone); } games.push({ venue: [gameData.stadium, gameData.location].join(", ") || "Unknown", home: gameData.homelabel || "Unknown", away: gameData.awaylabel || "Unknown", awayScore: gameData.awayruns, homeScore: gameData.homeruns, status: gameStatus, date: parsedDate, note: gameData.note || null, }); } return games; }