UNPKG

@hardbulls/wbsc-crawler

Version:

Tool to crawl events, leagues and statistics from WBSC based websites.

183 lines (182 loc) 9.4 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); __setModuleDefault(result, mod); return result; }; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; Object.defineProperty(exports, "__esModule", { value: true }); exports.GameCrawler = void 0; const date_fns_1 = require("date-fns"); const GameStatus_1 = require("./Model/GameStatus"); const Selector_1 = require("./Parser/Selector"); const NodeNotFoundError_1 = require("./Parser/NodeNotFoundError"); const date_fns_tz_1 = require("date-fns-tz"); const fetch_1 = require("./fetch"); exports.GameCrawler = { crawl: (url, options) => __awaiter(void 0, void 0, void 0, function* () { const { JSDOM } = yield Promise.resolve().then(() => __importStar(require("jsdom"))); const html = yield (yield (0, fetch_1.fetchUrl)(url, { method: "GET" })).text(); const dom = new JSDOM(html); const appElement = dom.window.document.querySelector("#app"); if (appElement && appElement.hasAttribute("data-page")) { return crawlAppJson(dom, options); } return crawlHtml(dom, options === null || options === void 0 ? void 0 : options.timezone); }), }; function crawlHtml(dom, timezone) { var _a, _b, _c, _d, _e; const rows = dom.window.document.querySelectorAll(".schedule-item"); const games = []; for (const row of rows) { const gameInfo = (0, Selector_1.querySelectorOrThrow)(row, "a.box-score-link:nth-child(1)"); // TODO find a game where is set... // eslint-disable-next-line @typescript-eslint/no-unused-vars const note = null; const venue = (_a = (0, Selector_1.querySelectorOrThrow)(gameInfo, "div:nth-child(1) > p:nth-child(2)").textContent) === null || _a === void 0 ? void 0 : _a.trim(); const date = (0, Selector_1.querySelectorOrThrow)(gameInfo, "div:nth-child(2) > p:nth-child(2)"); if (!date || !date.textContent) { throw new NodeNotFoundError_1.NodeNotFoundError(2); } let parsedDate = (0, date_fns_1.parse)(date.textContent, "dd/MM/yyyy, HH:mm", new Date()); if (!(0, date_fns_1.isValid)(parsedDate)) { parsedDate = (0, date_fns_1.parse)(date.textContent, "dd/MM/yyyy --:--", new Date()); if ((0, date_fns_1.isValid)(parsedDate)) { parsedDate.setHours(14, 0); } } if (timezone) { parsedDate = (0, date_fns_tz_1.fromZonedTime)(parsedDate, timezone); } const teamInfo = row.querySelector(".score") || row.querySelector(".regular-score") || (0, Selector_1.querySelectorOrThrow)(row, ".baseball-score-bug"); const awayTeamInfo = (0, Selector_1.querySelectorOrThrow)(teamInfo, "div.team-info:nth-child(1)"); const awayTeamName = (_b = (0, Selector_1.querySelectorOrThrow)(awayTeamInfo, "p:nth-child(4)p:nth-child(4)").textContent) === null || _b === void 0 ? void 0 : _b.trim(); const homeTeamInfo = (0, Selector_1.querySelectorOrThrow)(teamInfo, "div.team-info:nth-child(3)"); const homeTeamName = (_c = (0, Selector_1.querySelectorOrThrow)(homeTeamInfo, "p:nth-child(4)p:nth-child(4)").textContent) === null || _c === void 0 ? void 0 : _c.trim(); const scoreInfo = row.querySelector("div.score > div:nth-child(2) > p") || row.querySelector("div.regular-score > div:nth-child(2) > p") || (0, Selector_1.querySelectorOrThrow)(row, "div.baseball-score-bug > div:nth-child(2) > p"); const scoreInfoText = (_d = scoreInfo.textContent) === null || _d === void 0 ? void 0 : _d.trim(); let awayScore = 0; let homeScore = 0; if (scoreInfoText) { const parsedScore = scoreInfoText .split(":") .map((v) => Number.parseInt(v.trim())); awayScore = parsedScore[0] || 0; homeScore = parsedScore[1] || 0; } const gameStatusInfo = (0, Selector_1.querySelectorOrThrow)(row, "div.calendar-buttons a > div > p"); let gameStatus = GameStatus_1.GameStatus.SCHEDULED; const statusText = (_e = gameStatusInfo.textContent) === null || _e === void 0 ? void 0 : _e.trim().toLowerCase(); if (statusText && ["w.o.", "forfeit"].includes(statusText)) { gameStatus = GameStatus_1.GameStatus.FORFEIT; } else if (statusText && ["fortsetzung", "suspended"].includes(statusText)) { gameStatus = GameStatus_1.GameStatus.SUSPENDED; } else if (statusText && ("final" === statusText || statusText.match(/^f\/\d+$/))) { gameStatus = GameStatus_1.GameStatus.FINISHED; } else if (statusText === "canceled") { gameStatus = GameStatus_1.GameStatus.CANCELED; } games.push({ venue: venue || "Unknown", home: homeTeamName || "Unknown", away: awayTeamName || "Unknown", awayScore: awayScore, homeScore: homeScore, status: gameStatus, date: parsedDate, note: null, }); } return games; } function crawlAppJson(dom, options) { var _a, _b, _c, _d, _e, _f; const appElement = dom.window.document.querySelector("#app"); const dataPage = appElement.getAttribute("data-page"); const data = JSON.parse(dataPage); const games = []; const tournamentkey = (_c = (_b = (_a = data.props) === null || _a === void 0 ? void 0 : _a.tournament) === null || _b === void 0 ? void 0 : _b.tournamentkey) !== null && _c !== void 0 ? _c : null; const tournamentid = (_f = (_e = (_d = data.props) === null || _d === void 0 ? void 0 : _d.tournament) === null || _e === void 0 ? void 0 : _e.id) !== null && _f !== void 0 ? _f : null; for (const gameData of data.props.games) { let gameStatus = GameStatus_1.GameStatus.SCHEDULED; if (gameData.gamestatus === 1) { gameStatus = GameStatus_1.GameStatus.ONGOING; } else if (gameData.gamestatus === 3 || gameData.gamestatus === 2) { gameStatus = GameStatus_1.GameStatus.FINISHED; } else if (gameData.gamestatus === 4) { gameStatus = GameStatus_1.GameStatus.FORFEIT; } else if (gameData.gamestatus === 0) { gameStatus = GameStatus_1.GameStatus.SCHEDULED; } else if (gameData.gamestatus === -3) { gameStatus = GameStatus_1.GameStatus.CANCELED; } else if (gameData.gamestatus === -2) { gameStatus = GameStatus_1.GameStatus.SUSPENDED; } let parsedDate = gameData.start; if (options === null || options === void 0 ? void 0 : options.timezone) { parsedDate = (0, date_fns_tz_1.fromZonedTime)(parsedDate, options.timezone); } let tickerUrl = null; if (tournamentkey && (options === null || options === void 0 ? void 0 : options.tickerUrlPattern) && gameData.id) { tickerUrl = options.tickerUrlPattern .replace("{tournamentkey}", tournamentkey !== null && tournamentkey !== void 0 ? tournamentkey : "") .replace("{id}", gameData.id); } games.push({ venue: [gameData.stadium, gameData.location].join(", ") || "Unknown", home: gameData.homelabel || "Unknown", away: gameData.awaylabel || "Unknown", awayScore: gameData.awayruns, homeScore: gameData.homeruns, status: gameStatus, date: parsedDate, note: gameData.note || null, tickerUrl: tickerUrl, externalTournamentId: `${tournamentid}`, externalTournamentKey: `${tournamentkey}`, externalGameId: `${gameData.id}`, }); } return games; }