@hardbulls/wbsc-crawler
Version:
Tool to crawl events, leagues and statistics from WBSC based websites.
122 lines (121 loc) • 6.94 kB
JavaScript
;
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.StandingsCrawler = void 0;
const jsdom_1 = require("jsdom");
const Selector_1 = require("./Parser/Selector");
const StandingType_1 = require("./Model/StandingType");
const fetch_1 = require("./fetch");
const getStandingType = (input) => {
if (input.includes("endklassement")) {
return StandingType_1.StandingType.FINAL;
}
if (input.includes("regular season")) {
return StandingType_1.StandingType.REGULAR;
}
if (input.includes("playoff")) {
return StandingType_1.StandingType.PLAYOFF;
}
if (input.includes("finalturnier")) {
return StandingType_1.StandingType.FINAL_TOURNAMENT;
}
if (input.includes("group a") || input.includes("gruppe a")) {
return StandingType_1.StandingType.GROUP_A;
}
if (input.includes("group b") || input.includes("gruppe b")) {
return StandingType_1.StandingType.GROUP_B;
}
if (input.includes("group c") || input.includes("gruppe c")) {
return StandingType_1.StandingType.GROUP_C;
}
if (input.includes("west")) {
return StandingType_1.StandingType.WEST;
}
if (input.includes("mitte") || input.includes("middle")) {
return StandingType_1.StandingType.MIDDLE;
}
if (input.includes("east") || input.includes("ost")) {
return StandingType_1.StandingType.EAST;
}
return StandingType_1.StandingType.UNKNOWN;
};
const IGNORE_TABLES = ["Aktueller Daily Report"].map((v) => v.toLowerCase());
exports.StandingsCrawler = {
crawl: (url) => __awaiter(void 0, void 0, void 0, function* () {
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l;
const html = yield (yield (0, fetch_1.fetchUrl)(url, { method: "GET" })).text();
const dom = new jsdom_1.JSDOM(html);
const tables = dom.window.document.querySelectorAll("div.box-container:has(table)");
const standings = [];
for (const table of tables) {
const tableTitle = ((_a = table.querySelector("h3, h2, h1")) === null || _a === void 0 ? void 0 : _a.textContent) || "";
if (IGNORE_TABLES.includes(tableTitle.toLowerCase())) {
continue;
}
const standingType = getStandingType(tableTitle.toLowerCase());
const standing = {
type: standingType,
results: [],
};
const rows = table.querySelectorAll("table tbody tr");
for (const [index, row] of rows.entries()) {
if (((_b = row.textContent) === null || _b === void 0 ? void 0 : _b.trim()) !== "") {
if (standingType === StandingType_1.StandingType.FINAL) {
if (index > 1) {
const position = Number.parseInt((0, Selector_1.querySelectorOrThrow)(row, "td:nth-child(1)").textContent ||
"-1");
const team = (_c = (0, Selector_1.querySelectorOrThrow)(row, "td:nth-child(2) small").textContent) === null || _c === void 0 ? void 0 : _c.trim();
const wins = Number.parseInt(((_d = (0, Selector_1.querySelectorOrThrow)(row, "td:nth-child(4)").textContent) === null || _d === void 0 ? void 0 : _d.trim()) || "0");
const loses = Number.parseInt(((_e = (0, Selector_1.querySelectorOrThrow)(row, "td:nth-child(5)").textContent) === null || _e === void 0 ? void 0 : _e.trim()) || "0");
const ties = Number.parseInt(((_f = (0, Selector_1.querySelectorOrThrow)(row, "td:nth-child(6)").textContent) === null || _f === void 0 ? void 0 : _f.trim()) || "0");
if (team) {
standing.results.push({
position,
team,
wins,
loses,
ties,
winsPercentage: Math.round(((wins + 0.5 * ties) / (wins + loses + ties)) * 1000) / 1000,
});
}
}
}
else {
if (index > 0) {
const position = Number.parseInt((0, Selector_1.querySelectorOrThrow)(row, "td:nth-child(1)").textContent ||
"-1");
const team = (_g = (0, Selector_1.querySelectorOrThrow)(row, "td:nth-child(3) small").textContent) === null || _g === void 0 ? void 0 : _g.trim();
const wins = Number.parseInt(((_h = (0, Selector_1.querySelectorOrThrow)(row, "td:nth-child(4)").textContent) === null || _h === void 0 ? void 0 : _h.trim()) || "0");
const loses = Number.parseInt(((_j = (0, Selector_1.querySelectorOrThrow)(row, "td:nth-child(5)").textContent) === null || _j === void 0 ? void 0 : _j.trim()) || "0");
const ties = Number.parseInt(((_k = (0, Selector_1.querySelectorOrThrow)(row, "td:nth-child(6)").textContent) === null || _k === void 0 ? void 0 : _k.trim()) || "0");
const gamesBehind = Number.parseFloat(((_l = (0, Selector_1.querySelectorOrThrow)(row, "td:nth-child(8)").textContent) === null || _l === void 0 ? void 0 : _l.trim()) || "0");
if (team) {
standing.results.push({
position,
team,
wins,
loses,
ties,
winsPercentage: Math.round(((wins + 0.5 * ties) / (wins + loses + ties)) * 1000) / 1000,
gamesBehind,
});
}
}
}
}
}
if (standing.results.length > 0) {
standings.push(standing);
}
}
return standings;
}),
};