@hardbulls/wbsc-crawler
Version:
Tool to crawl events, leagues and statistics from WBSC based websites.
133 lines (132 loc) • 5.74 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.StatisticsCrawler = void 0;
const Selector_1 = require("./Parser/Selector");
const fetch_1 = require("./fetch");
exports.StatisticsCrawler = {
crawl: (options) => __awaiter(void 0, void 0, void 0, function* () {
let battingStatistics = {};
let pitchingStatistics = {};
let fieldingStatistics = {};
if (options.batting) {
battingStatistics = yield crawlUrl(options.batting);
}
if (options.pitching) {
pitchingStatistics = yield crawlUrl(options.pitching);
}
if (options.fielding) {
fieldingStatistics = yield crawlUrl(options.fielding);
}
const names = [
...new Set([
...Object.keys(battingStatistics),
...Object.keys(pitchingStatistics),
...Object.keys(fieldingStatistics),
]),
];
const result = [];
for (const name of names) {
const playerStatistics = {
name,
statistics: {
batting: (battingStatistics[name] && battingStatistics[name].statistics) ||
{},
pitching: (pitchingStatistics[name] && pitchingStatistics[name].statistics) ||
{},
fielding: (fieldingStatistics[name] && fieldingStatistics[name].statistics) ||
{},
},
};
result.push(playerStatistics);
}
return result;
}),
};
const crawlUrl = (url) => __awaiter(void 0, void 0, void 0, function* () {
const { JSDOM } = yield Promise.resolve().then(() => __importStar(require("jsdom")));
const html = yield (yield (0, fetch_1.fetchUrl)(url, { method: "GET" })).text();
const dom = new JSDOM(html);
const table = (0, Selector_1.querySelectorOrThrow)(dom.window.document, "table.table-condensed");
const headers = table.querySelectorAll("thead > tr > th");
const rows = table.querySelectorAll("tbody > tr");
const headerMapping = Array.from(headers).reduce((result, header, index) => {
const text = header.textContent;
if (text) {
result[index] = text.toLowerCase();
}
return result;
}, {});
const NAME_CLASS = "player";
const TEAM_CLASS = "team";
const results = {};
for (const row of rows.values()) {
const rowResult = {
name: "",
statistics: {},
};
for (const [index, column] of row.querySelectorAll("td").entries()) {
if (headerMapping[index]) {
if (column.classList.contains(NAME_CLASS)) {
rowResult.name = parseNameColumn(column);
}
else if (!column.classList.contains(TEAM_CLASS)) {
const value = column.textContent;
if (value) {
let parsedValue = 0;
if (value.startsWith(".")) {
parsedValue = Number.parseFloat(`0${value}`);
}
else {
parsedValue = Number.parseFloat(value);
}
rowResult.statistics[headerMapping[index]] = parsedValue;
}
}
}
}
results[rowResult.name] = rowResult;
}
return results;
});
const parseNameColumn = (column) => {
var _a;
let lastName = (_a = column.querySelector("strong")) === null || _a === void 0 ? void 0 : _a.textContent;
let firstName = column.textContent || undefined;
if (lastName) {
firstName = firstName === null || firstName === void 0 ? void 0 : firstName.substring(lastName.length).trim();
lastName = `${lastName.charAt(0)}${lastName.slice(1).toLowerCase()}`;
}
return [firstName, lastName].filter(Boolean).join(" ");
};