@hardbulls/wbsc-crawler
Version:
Tool to crawl events, leagues and statistics from WBSC based websites.
216 lines (215 loc) • 7.33 kB
JavaScript
;
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.JsonStatisticsCrawler = void 0;
const jsdom_1 = require("jsdom");
const fetch_1 = require("./fetch");
const pitchingKeyMap = {
pitch_win: "w",
pitch_loss: "l",
pitch_appear: "g",
pitch_gs: "gs",
pitch_cg: "cg",
pitch_sho: "sho",
pitch_save: "sv",
pitch_ip: "ip",
pitch_r: "r",
pitch_er: "er",
pitch_h: "h",
pitch_bb: "bb",
pitch_so: "so",
pitch_double: "2b",
pitch_triple: "3b",
pitch_hr: "hr",
pitch_ab: "ab",
bavg: "bavg",
pitch_wp: "wp",
pitch_hbp: "hbp",
pitch_bk: "bk",
pitch_sfa: "sf",
pitch_sha: "sh",
pitch_whip: "whip",
era: "era",
};
const battingKeyMap = {
g: "g",
gs: "gs", // optional, not in original but you might want to keep
ab: "ab",
r: "r",
h: "h",
double: "2b",
triple: "3b",
hr: "hr",
rbi: "rbi",
tb: "tb", // optional, not in original
avg: "avg",
slg: "slg",
obp: "obp",
ops: "ops",
bb: "bb",
hbp: "hp", // renamed from hbp to hp
so: "so",
gdp: "gdp",
sf: "sf",
sh: "sh",
sb: "sb",
cs: "cs",
// "pa" and "ibb" are not present in new format — keep that in mind
};
const fieldingKeyMap = {
field_g: "g",
field_c: "c",
field_po: "po",
field_a: "a",
field_e: "e",
fldp: "fldp",
field_dp: "dp",
field_sba: "sba",
field_csb: "csb",
sbap: "sbap",
field_pb: "pb",
field_ci: "ci",
};
exports.JsonStatisticsCrawler = {
crawl: (options) => __awaiter(void 0, void 0, void 0, function* () {
let battingStatistics = {};
let pitchingStatistics = {};
let fieldingStatistics = {};
if (options.batting) {
battingStatistics = yield crawlUrl(options.batting);
}
if (options.pitching) {
pitchingStatistics = yield crawlUrl(options.pitching);
}
if (options.fielding) {
fieldingStatistics = yield crawlUrl(options.fielding);
}
const names = [
...new Set([
...Object.keys(battingStatistics),
...Object.keys(pitchingStatistics),
...Object.keys(fieldingStatistics),
]),
];
const result = [];
for (const name of names) {
const playerStatistics = {
name,
statistics: {
batting: (battingStatistics[name] &&
battingStatistics[name].statistics &&
normalizeBattingStats(battingStatistics[name].statistics)) ||
{},
pitching: (pitchingStatistics[name] &&
pitchingStatistics[name].statistics &&
normalizePitchingStats(pitchingStatistics[name].statistics)) ||
{},
fielding: (fieldingStatistics[name] &&
fieldingStatistics[name].statistics &&
normalizeFieldingStats(fieldingStatistics[name].statistics)) ||
{},
},
};
result.push(playerStatistics);
}
return result;
}),
};
const crawlUrl = (url) => __awaiter(void 0, void 0, void 0, function* () {
const response = yield (0, fetch_1.fetchUrl)(url, { method: "GET" });
const json = yield response.json();
const params = new URLSearchParams(url.split("?")[1]);
const teamId = params.has("team") ? parseInt(params.get("team"), 10) : null;
const results = {};
for (const playerData of json.data) {
if (teamId && playerData.teamid !== teamId) {
continue;
}
const name = parseName(playerData.name);
const statistics = {};
for (const [key, value] of Object.entries(playerData)) {
if (key !== "name" &&
key !== "link" &&
key !== "teamid" &&
key !== "teamcode" &&
typeof value === "number") {
statistics[key] =
key === "avg" ||
key === "slg" ||
key === "obp" ||
key === "ops" ||
key === "fldp" ||
key === "bavg"
? value / 1000 // normalize from 500 => 0.500
: value;
}
}
results[name] = {
name,
statistics,
};
}
return results;
});
const parseName = (html) => {
var _a, _b, _c, _d;
const dom = new jsdom_1.JSDOM(html);
const last = (_b = (_a = dom.window.document
.querySelector(".lastname")) === null || _a === void 0 ? void 0 : _a.textContent) === null || _b === void 0 ? void 0 : _b.trim();
const first = (_d = (_c = dom.window.document
.querySelector(".firstname")) === null || _c === void 0 ? void 0 : _c.textContent) === null || _d === void 0 ? void 0 : _d.trim();
const capitalizeWords = (s) => s
? s
.split(/\s+/)
.map((word) => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
.join(" ")
: "";
return [capitalizeWords(first), capitalizeWords(last)]
.filter(Boolean)
.join(" ");
};
const normalizePitchingStats = (rawStats) => {
const normalized = {};
for (const [newKey, originalKey] of Object.entries(pitchingKeyMap)) {
if (rawStats[newKey] != null) {
const value = rawStats[newKey];
normalized[originalKey] =
typeof value === "string" ? parseFloat(value) : value;
}
}
return normalized;
};
const normalizeBattingStats = (rawStats) => {
const normalized = {};
for (const [newKey, originalKey] of Object.entries(battingKeyMap)) {
if (rawStats[newKey] != null) {
const value = rawStats[newKey];
normalized[originalKey] =
typeof value === "string" ? Number.parseFloat(value) : value;
}
}
return normalized;
};
const normalizeFieldingStats = (rawStats) => {
const normalized = {};
for (const [newKey, originalKey] of Object.entries(fieldingKeyMap)) {
if (rawStats[newKey] != null) {
const value = rawStats[newKey];
normalized[originalKey] =
typeof value === "string" ? Number.parseFloat(value) : value;
}
else {
// If missing, but required field
normalized[originalKey] = 0;
}
}
return normalized;
};