UNPKG

tardis-dev

Version:

Convenient access to tick-level historical and real-time cryptocurrency market data via Node.js

502 lines (420 loc) 14.5 kB
import crypto, { createHash } from 'crypto' import { createWriteStream, ensureDirSync, rename, removeSync } from 'fs-extra' import https, { RequestOptions } from 'https' import createHttpsProxyAgent from 'https-proxy-agent' import got, { ExtendOptions } from 'got' import path from 'path' import { debug } from './debug' import { Mapper } from './mappers' import { Disconnect, Exchange, Filter, FilterForExchange } from './types' import { SocksProxyAgent } from 'socks-proxy-agent' export function parseAsUTCDate(val: string) { // not sure about this one, but it should force parsing date as UTC date not as local timezone if (val.endsWith('Z') === false) { val += 'Z' } var date = new Date(val) return new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate(), date.getUTCHours(), date.getUTCMinutes())) } export function wait(delayMS: number) { return new Promise((resolve) => { setTimeout(resolve, delayMS) }) } export function getRandomString() { return crypto.randomBytes(24).toString('hex') } export function formatDateToPath(date: Date) { const year = date.getUTCFullYear() const month = doubleDigit(date.getUTCMonth() + 1) const day = doubleDigit(date.getUTCDate()) const hour = doubleDigit(date.getUTCHours()) const minute = doubleDigit(date.getUTCMinutes()) return `${year}/${month}/${day}/${hour}/${minute}` } export function doubleDigit(input: number) { return input < 10 ? '0' + input : '' + input } export function sha256(obj: object) { return createHash('sha256').update(JSON.stringify(obj)).digest('hex') } export function addMinutes(date: Date, minutes: number) { return new Date(date.getTime() + minutes * 60000) } export function addDays(date: Date, days: number) { return new Date(date.getTime() + days * 60000 * 1440) } export function* sequence(end: number, seed = 0) { let current = seed while (current < end) { yield current current += 1 } return } export const ONE_SEC_IN_MS = 1000 export class HttpError extends Error { constructor(public readonly status: number, public readonly responseText: string, public readonly url: string) { super(`HttpError: status code: ${status}, response text: ${responseText}`) } } export function* take(iterable: Iterable<any>, length: number) { if (length === 0) { return } for (const item of iterable) { yield item length-- if (length === 0) { return } } } export async function* normalizeMessages( exchange: Exchange, symbols: string[] | undefined, messages: AsyncIterableIterator<{ localTimestamp: Date; message: any } | undefined>, mappers: Mapper<any, any>[], createMappers: (localTimestamp: Date) => Mapper<any, any>[], withDisconnectMessages: boolean | undefined, filter?: (symbol: string) => boolean, currentTimestamp?: Date | undefined ) { let previousLocalTimestamp: Date | undefined = currentTimestamp let mappersForExchange: Mapper<any, any>[] | undefined = mappers if (mappersForExchange.length === 0) { throw new Error(`Can't normalize data without any normalizers provided`) } for await (const messageWithTimestamp of messages) { if (messageWithTimestamp === undefined) { // we received undefined meaning Websocket disconnection // lets create new mappers with clean state for 'new connection' mappersForExchange = undefined // if flag withDisconnectMessages is set, yield disconnect message if (withDisconnectMessages === true && previousLocalTimestamp !== undefined) { const disconnect: Disconnect = { type: 'disconnect', exchange, localTimestamp: previousLocalTimestamp, symbols } yield disconnect as any } continue } if (mappersForExchange === undefined) { mappersForExchange = createMappers(messageWithTimestamp.localTimestamp) } previousLocalTimestamp = messageWithTimestamp.localTimestamp for (const mapper of mappersForExchange) { if (mapper.canHandle(messageWithTimestamp.message)) { const mappedMessages = mapper.map(messageWithTimestamp.message, messageWithTimestamp.localTimestamp) if (!mappedMessages) { continue } for (const message of mappedMessages) { if (filter === undefined) { yield message } else if (filter(message.symbol)) { yield message } } } } } } export function getFilters<T extends Exchange>(mappers: Mapper<T, any>[], symbols?: string[]) { const filters = mappers.flatMap((mapper) => mapper.getFilters(symbols)) const deduplicatedFilters = filters.reduce((prev, current) => { const matchingExisting = prev.find((c) => c.channel === current.channel) if (matchingExisting !== undefined) { if (matchingExisting.symbols !== undefined && current.symbols) { for (let symbol of current.symbols) { if (matchingExisting.symbols.includes(symbol) === false) { matchingExisting.symbols.push(symbol) } } } else if (current.symbols) { matchingExisting.symbols = [...current.symbols] } } else { prev.push(current) } return prev }, [] as FilterForExchange[T][]) return deduplicatedFilters } export function* batch(symbols: string[], batchSize: number) { for (let i = 0; i < symbols.length; i += batchSize) { yield symbols.slice(i, i + batchSize) } } export function* batchObjects<T>(payload: T[], batchSize: number) { for (let i = 0; i < payload.length; i += batchSize) { yield payload.slice(i, i + batchSize) } } export function parseμs(dateString: string): number { // check if we have ISO 8601 format date string, e.g: 2019-06-01T00:03:03.1238784Z or 2020-07-22T00:09:16.836773Z // or 2020-03-01T00:00:24.893456+00:00 if (dateString.length === 27 || dateString.length === 28 || dateString.length === 32 || dateString.length === 30) { return Number(dateString.slice(23, 26)) } return 0 } export function optimizeFilters(filters: Filter<any>[]) { // deduplicate filters (if the channel was provided multiple times) const optimizedFilters = filters.reduce((prev, current) => { const matchingExisting = prev.find((c) => c.channel === current.channel) if (matchingExisting) { // both previous and current have symbols let's merge them if (matchingExisting.symbols && current.symbols) { matchingExisting.symbols.push(...current.symbols) } else if (current.symbols) { matchingExisting.symbols = [...current.symbols] } } else { prev.push(current) } return prev }, [] as Filter<any>[]) // sort filters in place to improve local disk cache ratio (no matter filters order if the same filters are provided will hit the cache) optimizedFilters.sort((f1, f2) => { if (f1.channel < f2.channel) { return -1 } if (f1.channel > f2.channel) { return 1 } return 0 }) // sort and deduplicate filters symbols optimizedFilters.forEach((filter) => { if (filter.symbols) { filter.symbols = [...new Set(filter.symbols)].sort() } }) return optimizedFilters } const httpsAgent = new https.Agent({ keepAlive: true, keepAliveMsecs: 10 * ONE_SEC_IN_MS, maxSockets: 120 }) export const httpsProxyAgent: https.Agent | undefined = process.env.HTTP_PROXY !== undefined ? createHttpsProxyAgent(process.env.HTTP_PROXY) : process.env.SOCKS_PROXY !== undefined ? new SocksProxyAgent(process.env.SOCKS_PROXY) : undefined export async function download({ apiKey, downloadPath, url, userAgent }: { url: string downloadPath: string userAgent: string apiKey: string }) { const httpRequestOptions = { agent: httpsProxyAgent !== undefined ? httpsProxyAgent : httpsAgent, timeout: 90 * ONE_SEC_IN_MS, headers: { 'Accept-Encoding': 'gzip', 'User-Agent': userAgent, Authorization: apiKey ? `Bearer ${apiKey}` : '' } } const MAX_ATTEMPTS = 30 let attempts = 0 while (true) { // simple retry logic when fetching from the network... attempts++ try { return await _downloadFile(httpRequestOptions, url, downloadPath) } catch (error) { const badOrUnauthorizedRequest = error instanceof HttpError && ((error.status === 400 && error.message.includes('ISO 8601 format') === false) || error.status === 401) const tooManyRequests = error instanceof HttpError && error.status === 429 // do not retry when we've got bad or unauthorized request or enough attempts if (badOrUnauthorizedRequest || attempts === MAX_ATTEMPTS) { throw error } const randomIngridient = Math.random() * 500 const attemptsDelayMS = Math.min(Math.pow(2, attempts) * ONE_SEC_IN_MS, 120 * ONE_SEC_IN_MS) let nextAttemptDelayMS = randomIngridient + attemptsDelayMS if (tooManyRequests) { // when too many requests received wait one minute nextAttemptDelayMS += 60 * ONE_SEC_IN_MS } debug('download file error: %o, next attempt delay: %d, url %s, path: %s', error, nextAttemptDelayMS, url, downloadPath) await wait(nextAttemptDelayMS) } } } const tmpFileCleanups = new Map<string, () => void>() export function cleanTempFiles() { tmpFileCleanups.forEach((cleanup) => cleanup()) } async function _downloadFile(requestOptions: RequestOptions, url: string, downloadPath: string) { // first ensure that directory where we want to download file exists ensureDirSync(path.dirname(downloadPath)) // create write file stream that we'll write data into - first as unconfirmed temp file const tmpFilePath = `${downloadPath}${crypto.randomBytes(8).toString('hex')}.unconfirmed` const fileWriteStream = createWriteStream(tmpFilePath) const cleanup = () => { try { fileWriteStream.destroy() removeSync(tmpFilePath) } catch {} } tmpFileCleanups.set(tmpFilePath, cleanup) try { // based on https://github.com/nodejs/node/issues/28172 - only reliable way to consume response stream and avoiding all the 'gotchas' await new Promise<void>((resolve, reject) => { const req = https .get(url, requestOptions, (res) => { const { statusCode } = res if (statusCode !== 200) { // read the error response text and throw it as an HttpError res.setEncoding('utf8') let body = '' res.on('error', reject) res.on('data', (chunk) => (body += chunk)) res.on('end', () => { reject(new HttpError(statusCode!, body, url)) }) } else { // consume the response stream by writing it to the file res .on('error', reject) .on('aborted', () => reject(new Error('Request aborted'))) .pipe(fileWriteStream) .on('error', reject) .on('finish', () => { if (res.complete) { resolve() } else { reject(new Error('The connection was terminated while the message was still being sent')) } }) } }) .on('error', reject) .on('timeout', () => { debug('download file request timeout, %s', url) req.abort() }) }) // finally when saving from the network to file has succeded, rename tmp file to normal name // then we're sure that responses is 100% saved and also even if different process was doing the same we're good await rename(tmpFilePath, downloadPath) } finally { tmpFileCleanups.delete(tmpFilePath) cleanup() } } export class CircularBuffer<T> { private _buffer: T[] = [] private _index: number = 0 constructor(private readonly _bufferSize: number) {} append(value: T) { const isFull = this._buffer.length === this._bufferSize let poppedValue if (isFull) { poppedValue = this._buffer[this._index] } this._buffer[this._index] = value this._index = (this._index + 1) % this._bufferSize return poppedValue } *items() { for (let i = 0; i < this._buffer.length; i++) { const index = (this._index + i) % this._buffer.length yield this._buffer[index] } } get count() { return this._buffer.length } clear() { this._buffer = [] this._index = 0 } } export class CappedSet<T> { private _set = new Set<T>() constructor(private readonly _maxSize: number) {} public has(value: T) { return this._set.has(value) } public add(value: T) { if (this._set.size >= this._maxSize) { this._set.delete(this._set.keys().next().value) } this._set.add(value) } public remove(value: T) { this._set.delete(value) } public size() { return this._set.size } } function hasFraction(n: number) { return Math.abs(Math.round(n) - n) > 1e-10 } // https://stackoverflow.com/a/44815797 export function decimalPlaces(n: number) { let count = 0 // multiply by increasing powers of 10 until the fractional part is ~ 0 while (hasFraction(n * 10 ** count) && isFinite(10 ** count)) count++ return count } export function asNumberIfValid(val: string | number | undefined | null) { if (val === undefined || val === null) { return } var asNumber = Number(val) if (isNaN(asNumber) || isFinite(asNumber) === false) { return } if (asNumber === 0) { return } return asNumber } const gotDefaultOptions: ExtendOptions = {} if (httpsProxyAgent !== undefined) { gotDefaultOptions.agent = { https: httpsProxyAgent } } export function upperCaseSymbols(symbols?: string[]) { if (symbols !== undefined) { return symbols.map((s) => s.toUpperCase()) } return } export function lowerCaseSymbols(symbols?: string[]) { if (symbols !== undefined) { return symbols.map((s) => s.toLowerCase()) } return } export const httpClient = got.extend(gotDefaultOptions) export const fromMicroSecondsToDate = (micros: number) => { const isMicroseconds = micros > 1e15 // Check if the number is likely in microseconds if (!isMicroseconds) { return new Date(micros) } const timestamp = new Date(micros / 1000) timestamp.μs = micros % 1000 return timestamp } export function onlyUnique(value: string, index: number, array: string[]) { return array.indexOf(value) === index }