UNPKG

mls-elo

Version:

Calculate ELO rankings for Major League Soccer

336 lines (322 loc) 14.5 kB
const cheerio = require('cheerio'); const db = require('sqlite'); const fs = require('fs-extra'); const safeGet = require ('safe-get'); const ccl = require('../data/ccl'); const competitions = require('../data/competitions'); const teams = require('../data/teams'); const valuations = require('../data/valuations'); const getBirthdateFromAgeString = require('../lib/getBirthdateFromAgeString'); const heightToInches = require('../lib/heightToInches'); const calculateFantasyPoints = require('../lib/calculateFantasyPoints'); module.exports = async (argv) => { const {dbPath, directory, verbose} = argv; if (verbose) { console.info(`getting connection to database ${dbPath}`); } await db.open(dbPath, { Promise }); await loadCheckedInData(verbose); await loadFormData(verbose, directory); await loadPlayerData(verbose); }; async function loadCheckedInData(verbose) { const teamsStatement = `INSERT INTO teams (teamname, abbreviation, location, start, end, founded, dissolved) VALUES ${ teams.reduce((acc, val) => `${acc} ("${val.name}", "${val.abbreviation}", "${val.location}", ${val.start}, ${val.end}, ${(new Date(val.founded)).getTime()}, ${val.dissolved ? (new Date(val.dissolved)).getTime() : val.dissolved}),`, '').slice(0, -1) }`; const competitionsStatement = `INSERT INTO competitions (competitionname, k, m) VALUES ${ competitions.reduce((acc, val) => `${acc} ("${val.name}", ${val.k}, ${val.m}),`, '').slice(0, -1) }`; if (verbose) { console.info(teamsStatement); console.info(competitionsStatement); } await Promise.all([ db.run(competitionsStatement), db.run(teamsStatement), ]); const valuationPromises = []; Object.keys(valuations).forEach(year => { valuations[year].forEach(team => { const insertStatement = ` INSERT INTO valuations (valuationteamid, value, revenue, income, year) VALUES ( (SELECT teamid FROM teams where teamname = "${team.teamname}"), ${team.value}, ${team.revenue}, ${team.income}, ${year} ); `; if (verbose) { console.info(insertStatement); } valuationPromises.push(db.run(insertStatement)); }); }); await Promise.all(valuationPromises); await Promise.all(ccl.map(cclmatch => { const insertQuery = ` INSERT INTO matches (hometeam, awayteam, matchcompetition, homegoals, awaygoals, date) VALUES ( (SELECT teamid FROM teams where abbreviation = "${cclmatch.home}"), (SELECT teamid FROM teams where abbreviation = "${cclmatch.away}"), (SELECT competitionid FROM competitions where competitionname = "${competitions[5].name}"), ${cclmatch.homeGoals}, ${cclmatch.awayGoals}, ${cclmatch.date} )`; console.log(insertQuery); return db.run(insertQuery); })); } async function loadFormData(verbose, directory) { for (let year = 1996; year <= 2017; year++) { if (verbose) { console.info(`reading files from ${directory}/${year}.html`); } const $ = cheerio.load(await fs.readFile(`${directory}/${year}.html`)); $('#center_content > div.ct_wrapper > div.results-map > div.scroll-container > table > tbody > tr').each((i, row) => { if (i === 0) { return; } const th = $(`#center_content > div.ct_wrapper > div.results-map > table > tbody > tr:nth-child(${i+1}) > th`); const abbreviation = th[0].children[0].data; const team = teams.reduce((team, acc) => team.abbreviation === abbreviation ? team : acc, []); if (verbose) { console.info(`${abbreviation} ${team.name}`); } row.children.forEach(async child => { if (!(child.children)) { return; } let dateStr = safeGet(child, 'children[3].children[1].children[3].children[1].children[0].data'); let opponent = safeGet(child, 'children[3].children[1].children[3].children[3].children[0].data'); let scoreline = safeGet(child, 'children[3].children[1].children[3].children[5].children[0].data'); let date = (new Date(`${dateStr} ${year}`)).getTime(); if (date && opponent && scoreline) { if (verbose) { console.info(`played: ${date}, ${team.name} ${opponent}, ${scoreline}`); } const [goals, opponentGoals] = scoreline.split('-'); if (opponent.includes('at ')) { const location = opponent.replace('at ', ''); const q = ` SELECT matchid FROM matches WHERE hometeam = (SELECT teamid FROM teams where location = "${location}") AND awayteam = (SELECT teamid FROM teams where abbreviation = "${abbreviation}") AND date = ${date} AND matchcompetition = (SELECT competitionid FROM competitions where competitionname = "${competitions[3].name}"); `; const results = await db.get(q); if (results && results.matchid) { if (verbose) { console.info(`Updating existing match with id ${matchid}`); } db.run(` UPDATE matches SET homegoals = ${goals}, awaygoals = ${opponentGoals} WHERE matchs.matchid = ${results.matchid} )`); } else { db.run(` INSERT INTO matches (hometeam, awayteam, matchcompetition, homegoals, awaygoals, date) VALUES ( (SELECT teamid FROM teams where location = "${location}"), (SELECT teamid FROM teams where abbreviation = "${abbreviation}"), (SELECT competitionid FROM competitions where competitionname = "${competitions[3].name}"), ${goals}, ${opponentGoals}, ${date} )`); } } } else { dateStr = safeGet(child, 'children[3].children[3].children[1].children[0].data'); opponent = safeGet(child, 'children[3].children[3].children[3].children[0].data'); scoreline = safeGet(child, 'children[3].children[3].children[5].children[0].data'); date = (new Date(`${dateStr} ${year}`)).getTime(); if (verbose) { console.info(`scheduled: ${date} ${opponent} ${year} ${scoreline}`); } if (opponent.includes('at ')) { const location = opponent.replace('at ', ''); db.run(` INSERT INTO matches (hometeam, awayteam, matchcompetition, date) VALUES ( (SELECT teamid FROM teams where location = "${location}"), (SELECT teamid FROM teams where abbreviation = "${abbreviation}"), (SELECT competitionid FROM competitions where competitionname = "${competitions[3].name}"), ${date} ) `); } } }); }); } } async function loadPlayerData(verbose) { const playerFiles = await fs.readdir('.cached/players'); let playerid; for (let playerFile of playerFiles) { const $ = cheerio.load(await fs.readFile(`.cached/players/${playerFile}`)); const titleOverlay = $('div.title_overlay'); const playerInfo = $('div.player_info_alternate'); const hometown = playerInfo.find('.hometown').length > 0 ? playerInfo.find('.hometown')[0].children[1].data.replace('\n', '') : null; const birthplace = playerInfo.find('.hometown').length > 1 ? playerInfo.find('.hometown')[1].children[1].data.replace('\n', '') : null; const twitter = playerInfo.find('.twitter_handle').length > 0 ? playerInfo.find('.twitter_handle')[0].children[0].children[0].data : null; const weight = playerInfo.find('.stat').length > 1 ? playerInfo.find('.stat')[1].children[0].data : null; const jersey = titleOverlay.find('.jersey_container') && titleOverlay.find('.jersey_container').length > 0 ? titleOverlay.find('.jersey_container')[0].children[0].children[0].data : null; const height = playerInfo.find('.stat') && playerInfo.find('.stat').length > 0 ? heightToInches(playerInfo.find('.stat')[0].children[0].data) : null; const position = titleOverlay.find('.position') && titleOverlay.find('.position').length > 0 ? titleOverlay.find('.position')[0].children[0].data : null; const insertQuery = ` INSERT INTO players (name, realname, height, weight, hometown, birthdate, birthplace, twitter, position, jersey) VALUES ( "${titleOverlay.find('.title')[0].children[0].data}", "${playerInfo.find('.name')[0].children[1].data.replace('\n', '')}", ${height}, ${weight}, "${hometown}", ${getBirthdateFromAgeString(playerInfo.find('.age')[0].children[1].data)}, "${birthplace}", "${twitter}", "${position}", ${jersey} ); `; const selectQuery = ` SELECT playerid FROM players WHERE birthplace = "${birthplace}" AND twitter = "${twitter}" AND name = "${titleOverlay.find('.title')[0].children[0].data}"; `; if (verbose) { console.info(insertQuery); console.info(selectQuery); } await db.run(insertQuery); playerid = await db.get(selectQuery); // insert match stats $('div.gamelog_tables > table.no-more-tables > tbody > tr').each(async (i, row) => { const result = safeGet(row, 'children[2].children[0].data'); const scorelineStr = safeGet(row, 'children[1].children[0].data'); const dateStr = safeGet(row, 'children[0].children[0].data'); const [awayteam, hometeam] = scorelineStr.split('@'); const date = (new Date(dateStr)).getTime(); const query = ` SELECT matchid FROM matches WHERE date = ${date} AND hometeam = (SELECT teamid FROM teams WHERE abbreviation = "${hometeam.trim()}") AND awayteam = (SELECT teamid FROM teams WHERE abbreviation = "${awayteam.trim()}")`; let matchid = await db.get(query); if (!matchid) { // Player data includes MLS Cup statisitics, meaning we can extract mls // cup matches from them! if (date < 0) { debugger; } let [awaygoals, homegoals] = (result.split(' ')[1]).split('-'); if (homegoals.includes('(')) { /// It's a tie. Add the penalties to the total goals to make rankings work homegoals = homegoals.replace(')', '+'); homegoals = homegoals.replace('(', ''); homegoals = eval(homegoals); awaygoals = awaygoals.replace(')', ''); awaygoals = awaygoals.replace('(', '+'); awaygoals = eval(awaygoals); } db.run(` INSERT INTO matches (hometeam, awayteam, matchcompetition, homegoals, awaygoals, date) VALUES ( (SELECT teamid FROM teams where abbreviation = "${hometeam.trim()}"), (SELECT teamid FROM teams where abbreviation = "${awayteam.trim()}"), (SELECT competitionid FROM competitions where competitionname = "${competitions[4].name}"), ${homegoals}, ${awaygoals}, ${date} ) `); matchid = await db.get(query); } if (verbose) { console.info(`Processing player id ${JSON.stringify(playerid)} for match id ${JSON.stringify(matchid)}`); } if (position.includes('Goalkeeper')) { const appearance = safeGet(row, 'children[3].children[0].data'); const minutes = safeGet(row, 'children[4].children[0].data'); const goalsfor = safeGet(row, 'children[5].children[0].data'); const goalsagainst = safeGet(row, 'children[6].children[0].data'); const shotsongoal = safeGet(row, 'children[7].children[0].data'); const saves = safeGet(row, 'children[8].children[0].data'); const penaltykicksagainst = safeGet(row, 'children[9].children[0].data'); const penaltykickgoals = safeGet(row, 'children[10].children[0].data'); const penaltykicksaves = safeGet(row, 'children[11].children[0].data'); const fantasypoints = calculateFantasyPoints({appearance, minutes, goalsfor, goalsagainst, shotsongoal, saves, penaltykicksagainst, penaltykickgoals, penaltykicksaves}); db.run(` INSERT INTO goalkeeperMatchLog (playerid, matchid, result, appearance, minutes, goalsfor, goalsagainst, shotsongoal, saves, penaltykicksagainst, penaltykickgoals, penaltykicksaves, fantasypoints) VALUES ( ${playerid.playerid}, ${matchid.matchid}, "${result}", "${appearance}", ${minutes}, ${goalsfor}, ${goalsagainst}, ${shotsongoal}, ${saves}, ${penaltykicksagainst}, ${penaltykickgoals}, ${penaltykicksaves}, ${fantasypoints} ) `); } else { const appearance = safeGet(row, 'children[3].children[0].data'); const minutes = safeGet(row, 'children[4].children[0].data'); const goals = safeGet(row, 'children[5].children[0].data'); const assists = safeGet(row, 'children[6].children[0].data'); const shots = safeGet(row, 'children[7].children[0].data') const shotsongoal = safeGet(row, 'children[8].children[0].data'); const foulscommitted = safeGet(row, 'children[9].children[0].data'); const foulsssuffered = safeGet(row, 'children[10].children[0].data'); const yellows = safeGet(row, 'children[11].children[0].data'); const reds = safeGet(row, 'children[12].children[0].data'); const fantasypoints = calculateFantasyPoints({appearance, minutes, goals, assists, shots, shotsongoal, foulscommitted, foulsssuffered, yellows, reds}); db.run(` INSERT INTO outfieldPlayerMatchLog (playerid, matchid, result, appearance, minutes, goals, assists, shots, shotsongoal, foulscommitted, foulsssuffered, yellows, reds, fantasypoints) VALUES ( ${playerid.playerid}, ${matchid.matchid}, "${result}", "${appearance}", ${minutes}, ${goals}, ${assists}, ${shots}, ${shotsongoal}, ${foulscommitted}, ${foulsssuffered}, ${yellows}, ${reds}, ${fantasypoints} ) `); } }); } }