UNPKG

clicktt-crawler

Version:

A DOM crawler to extract results from https://www.mytischtennis.de/clicktt/home/

131 lines (106 loc) 5.11 kB
const translateVector = require('lodash.zip') const fetch = require('./utils/fetch') const extractTable = require('./utils/extractTable') const extractPlayerStatisticTable = require('./utils/extractPlayerStatisticTable') const extractDoubleStatisticTable = require('./utils/extractDoubleStatisticTable') class ClickTTCrawler { constructor (options) { this.baseUrl = options.baseUrl this.organisation = options.organisation this.season = options.season } getTableDataByPrecedingTitle ({ title, html }) { return extractTable({ selector: `h3:contains("${ title }") + table`, html }) } getTableDataById ({ id, html }) { return extractTable({ selector: `#${ id }`, html }) } // https://www.mytischtennis.de/clicktt/PTTV/19-20/ligen/Kreisliga-Sued-West/gruppe/359928/tabelle/gesamt async getStandings ({ league, groupId }) { const url = `${this.baseUrl}/${this.organisation}/${this.season}/ligen/${league}/gruppe/${groupId}/tabelle/gesamt` const html = await fetch({ url }) let data = this.getTableDataByPrecedingTitle({ title: 'Tabelle', html }) // get array of table columns data.shift() // remove first row with empty values data = translateVector(...data) data.shift() // remove labels const labels = ['position', 'club', 'played', 'won', 'drawn', 'lost', 'games', 'difference', 'points'] return data.map((row) => { const clubResult = {} for (let i = 0; i < labels.length; i++) { clubResult[`${ labels[i] }`] = row[i].replace(/\d*\./g, '').trim() // array of values to object { label: value } } return clubResult }) } // https://www.mytischtennis.de/clicktt/PTTV/19-20/ligen/Kreisliga-Sued-West/gruppe/359928/mannschaft/2241162/TTC-Insheim/spielerbilanzen/gesamt async getSchedule ({ league, groupId, clubId, club }) { const url = `${this.baseUrl}/${this.organisation}/${this.season}/ligen/${league}/gruppe/${groupId}/mannschaft/${clubId}/${club}/spielerbilanzen/gesamt` const html = await fetch({ url }) let data = this.getTableDataByPrecedingTitle({ title: 'Spielplan (gesamt)', html }) // get array of table columns data.pop() // remove last item with empty values data.splice(5, 1) // remove 6th item data = translateVector(...data) data.shift() // remove labels const labels = ['date', 'time', 'location', 'home', 'guest', 'result'] return data.map((row) => { const scheduleItem = {} for (let i = 0; i < labels.length; i++) { const column = labels[i]; const content = row[i].trim(); if (column === 'date') { const dateParse = /(\d+)\.(\d+)\.(\d+)/.exec(content) scheduleItem[column] = `${ dateParse[1] }.${ dateParse[2] }.${ dateParse[3] }` } else if (column === 'time') { const timeParse = /(\d+):(\d+)/.exec(content) scheduleItem[column] = (timeParse) ? timeParse[0] : '' } else if (column === 'result') { const resultParse = /(\d+):(\d+)/.exec(content) if (resultParse) { scheduleItem['resultHome'] = parseInt(resultParse[1]) scheduleItem['resultGuest'] = parseInt(resultParse[2]) } } else { scheduleItem[column] = content } } return scheduleItem }) } // https://www.mytischtennis.de/clicktt/PTTV/19-20/ligen/Kreisliga-Sued-West/gruppe/359928/mannschaft/2241162/TTC-Insheim/spielerbilanzen/gesamt async getPlayerStatistics ({ league, groupId, clubId, club }) { const url = `${this.baseUrl}/${this.organisation}/${this.season}/ligen/${league}/gruppe/${groupId}/mannschaft/${clubId}/${club}/spielerbilanzen/gesamt` const html = await fetch({ url }) return extractPlayerStatisticTable({ html }) } // https://www.mytischtennis.de/clicktt/PTTV/19-20/ligen/Kreisliga-Sued-West/gruppe/359928/mannschaft/2241162/TTC-Insheim/spielerbilanzen/gesamt async getDoubleStatistics ({ league, groupId, clubId, club }) { const url = `${this.baseUrl}/${this.organisation}/${this.season}/ligen/${league}/gruppe/${groupId}/mannschaft/${clubId}/${club}/spielerbilanzen/gesamt` const html = await fetch({ url }) return extractDoubleStatisticTable({ html }) } // https://www.mytischtennis.de/clicktt/PTTV/19-20/ligen/Kreisliga-Sued-West/gruppe/359928/spielplan/rr/ async getResults ({ league, groupId, round }) { const url = `${this.baseUrl}/${this.organisation}/${this.season}/ligen/${league}/gruppe/${groupId}/spielplan/${round}` const html = await fetch({ url }) let data = this.getTableDataById({ id: 'playingPlanDesktop', html }) // get array of table columns data = translateVector(...data) data.shift() // remove labels const labels = ['date', 'time', 'location', 'home', 'guest', undefined, 'result'] return data.map((row) => { const teamResult = {} for (let i = 0; i < labels.length; i++) { if(labels[i]) { teamResult[labels[i]] = row[i].trim() } } return teamResult }) } } module.exports = ClickTTCrawler