generate-db-shop-urls
Version:
Magically generate Deutsche Bahn ticket URLs.
229 lines (195 loc) • 7.12 kB
JavaScript
const moment = require('moment-timezone')
const {strictEqual} = require('assert')
const {createHash} = require('crypto')
const slugg = require('slugg')
const trim = require('trim-newlines')
const cheerio = require('cheerio')
const url = require('url')
const debug = require('debug')('generate-db-shop-urls:parse')
const {showDetails} = require('./helpers')
const nextStepLink = (outbound, returning, $, row) => {
const href = $(".buttonbold", row).attr("href")
if (!href) return null
const u = url.parse(href, true)
u.query.HWAI = showDetails(true)
delete u.search
return url.format(u)
}
const parseTime = (base, str) => {
const match = /(\d{2}):(\d{2})/.exec(str)
if (!match || !match[1] || !match[2]) return null
base = moment.tz(base, 'Europe/Berlin')
const dt = moment(base).tz('Europe/Berlin')
.hours(parseInt(match[1]))
.minutes(parseInt(match[2]))
if (dt < base) dt.add(1, 'days')
return dt.toISOString()
}
strictEqual(parseTime('2020-04-08T17:16Z', 'ab 20:53 '), '2020-04-08T18:53:00.000Z')
strictEqual(parseTime('2020-04-08T19:16Z', 'an 23:53 \n'), '2020-04-08T21:53:00.000Z')
strictEqual(parseTime('2020-04-08T19:16Z', '00:12'), '2020-04-08T22:12:00.000Z')
const parseWhen = (base, node) => {
const timeNode = node.get(0).childNodes.find(n => n.type === 'text')
const plannedWhen = parseTime(base, (timeNode || {}).data || '')
const when = parseTime(base, node.find('.delay, .delayOnTime').text() || '')
let delay = null
if (when && plannedWhen) {
delay = Math.round((new Date(when) - new Date(plannedWhen)) / 1000)
}
return {plannedWhen, when, delay}
}
const irrelevantBookingLinkParams = [
'protocol',
'ident',
'bcrvglpreis',
'services',
'showAvail',
]
const parseJourneyIdFromBookingLink = (link) => {
const u = new URL(link)
const p = Array.from(u.searchParams.entries())
.filter(([k]) => !irrelevantBookingLinkParams.includes(k))
.sort(([kA, kB]) => kA < kB ? -1 : (kA > kB ? 1 : 0))
return createHash('sha1')
.update(new URLSearchParams(p).toString())
.digest('hex')
}
strictEqual(
parseJourneyIdFromBookingLink(`\
https://reiseauskunft.bahn.de/bin/query.exe/dn?ld=43121&protocol=https:&seqnr=1&ident=1j.020193121.1654088623&rt=1&rememberSortType=minDeparture&sTID=C0-1.0@1&oCID=C0-1&orderSOP=yes&showAvail=yes&completeFulfillment=1&hafasSessionExpires=0106221518&zielorth=Hanau&zielortb=rheinmain&zielorta=DEU&xcoorda=8929003&ycoorda=50120957&distancea=193&zielortm=Hanau%20Hbf&services=hbma&bcrvglpreis=8260&HWAI=SELCON!lastsel=C0-1!&`
),
createHash('sha1')
.update(`\
HWAI=SELCON%21lastsel%3DC0-1%21&orderSOP=yes&ld=43121&seqnr=1&rt=1&rememberSortType=minDeparture&sTID=C0-1.0%401&oCID=C0-1&completeFulfillment=1&hafasSessionExpires=0106221518&zielorth=Hanau&zielortb=rheinmain&zielorta=DEU&xcoorda=8929003&ycoorda=50120957&distancea=193&zielortm=Hanau+Hbf`)
.digest('hex'),
)
const parseLegs = (outbound, returning, isReturn, $) => (data, row) => {
const classes = (row.attribs.class || '').split(/\s+/)
if (classes.includes('intermediate')) return data // skip walking etc.
const isFirstOfLeg = classes.includes('sectionDeparture')
const isLastOfLeg = classes.includes('sectionArrival')
let leg
if (isFirstOfLeg) {
// create new leg
leg = {public: true}
} else {
// get last added leg
leg = data.journey.legs[data.journey.legs.length - 1]
}
if (isFirstOfLeg || isLastOfLeg) {
const station = trim($('.station', row).text().trim().split(/\n+/)[0])
leg[isFirstOfLeg ? 'origin' : 'destination'] = {
type: 'station',
id: null,
name: station
}
const platform = $('.platform', row).text().trim().split(/\s+/)[1]
// todo: prognosedPlatform?
leg[isFirstOfLeg ? 'departurePlatform' : 'arrivalPlatform'] = platform
const base = isReturn
? returning.legs[0].plannedDeparture
: outbound.legs[0].plannedDeparture
// todo: base might be *after* the date/time to parse, or `null`
const {when, plannedWhen, delay} = parseWhen(base, $('.time', row))
// todo: cancelled, prognosedWhen
leg[isFirstOfLeg ? 'departure' : 'arrival'] = when
leg[isFirstOfLeg ? 'plannedDeparture' : 'plannedArrival'] = plannedWhen
leg[isFirstOfLeg ? 'departureDelay' : 'arrivalDelay'] = delay
}
if (isFirstOfLeg) {
leg.lines = $('.products a', row).get()
.map((l) => {
let name = $(l).text().trim().replace(/\s+/, ' ')
let fahrtNr = null
if (name.includes('(')) {
const m = /^([\w\s]+)\s\(([^)]+)\)$/.exec(name)
if (m) {
name = m[1]
fahrtNr = m[2]
}
}
return {
type: 'line',
id: slugg(fahrtNr || name),
name,
fahrtNr,
}
})
.filter(l => !!l.id && !!l.name)
}
if (isFirstOfLeg) {
data.journey.legs.push(leg)
}
return data
}
const tagStationInJourney = (journey, name, id) => {
const normalized = slugg(name)
// Note: This is almost as brittle as looking through the list of all DB stations and matching by name. Find a better way!
for (let leg of journey.legs) {
if (slugg(leg.origin.name) === normalized) leg.origin.id = id
if (slugg(leg.destination.name) === normalized) leg.destination.id = id
}
}
const tagStations = ($, row, journey) => {
const stationOptions = $('.stationInfo select option', row).get()
for (let l of stationOptions) {
const name = trim($(l).text().trim())
if (!l.attribs.href) continue
// https://data.deutschebahn.com/dataset/data-haltestellen.html
// todo: translate to HAFAS/IBNR
const betreiberNr = l.attribs.value
if (!betreiberNr) continue
tagStationInJourney(journey, name, betreiberNr)
}
}
const parsePrice = (str) => {
if (!str) return {amount: null, currency: null}
const m = /(\d+),?(\d+)\s+([A-Z]{3})?/.exec(trim(str.trim()))
if (m && m[1]) {
return {
amount: parseInt(m[1]) + (m[2] ? parseInt(m[2]) * .01 : 0),
currency: m[3] || 'EUR'
}
}
return {amount: null, currency: null}
}
const parse = (outbound, returning, isReturn) => (html) => {
const $ = cheerio.load(html)
return $('.scheduledCon, .liveCon', '#resultsOverviewContainer').get()
.map((row, journeyI) => {
const nextStep = nextStepLink(outbound, returning, $, row)
if (!nextStep) {
debug('journey without next step (booking link)', journeyI)
return null
}
const bookingLink = nextStep // todo
const id = (
bookingLink && parseJourneyIdFromBookingLink(bookingLink)
|| (isReturn ? 'returning' : 'outbound') + '-' + journeyI
)
const journey = $('.details .connectionDetails li', row).get()
.reduce(parseLegs(outbound, returning, isReturn, $), {
journey: {
type: 'journey',
id,
legs: []
}
})
.journey
if (journey.legs.length === 0) {
debug('journey with 0 legs, ignoring', journeyI, journey)
return null
}
tagStations($, row, journey)
// todo: rename discount -> totalDiscount, price -> totalPrice
const discount = parsePrice($('.farePep .fareOutput', row).text())
const price = parsePrice($('.fareStd .fareOutput', row).text())
if (!discount.amount && !price.amount) return null
journey.price = price
journey.discount = discount
return {journey, nextStep}
})
.filter((j) => !!j)
}
module.exports = parse