tardis-dev
Version:
Convenient access to tick-level historical and real-time cryptocurrency market data via Node.js
420 lines (356 loc) • 14.6 kB
text/typescript
import { createReadStream, remove } from 'fs-extra'
import path from 'path'
import { EventEmitter } from 'stream'
import { Worker } from 'worker_threads'
import { constants, createGunzip } from 'zlib'
import { BinarySplitStream } from './binarysplit'
import { clearCacheSync } from './clearcache'
import { EXCHANGES, EXCHANGE_CHANNELS_INFO } from './consts'
import { debug } from './debug'
import { addDays, getFilters, normalizeMessages, parseAsUTCDate, parseμs, wait } from './handy'
import { MapperFactory, normalizeBookChanges } from './mappers'
import { getOptions } from './options'
import { Disconnect, Exchange, FilterForExchange } from './types'
import { WorkerJobPayload, WorkerMessage, WorkerSignal } from './worker'
export async function* replay<T extends Exchange, U extends boolean = false, Z extends boolean = false>({
exchange,
from,
to,
filters,
skipDecoding = undefined,
withDisconnects = undefined,
apiKey = undefined,
withMicroseconds = undefined,
autoCleanup = undefined,
waitWhenDataNotYetAvailable = undefined
}: ReplayOptions<T, U, Z>): AsyncIterableIterator<
Z extends true
? U extends true
? { localTimestamp: Buffer; message: Buffer } | undefined
: { localTimestamp: Date; message: any } | undefined
: U extends true
? { localTimestamp: Buffer; message: Buffer }
: { localTimestamp: Date; message: any }
> {
validateReplayOptions(exchange, from, to, filters)
const fromDate = parseAsUTCDate(from)
const toDate = parseAsUTCDate(to)
const cachedSlicePaths = new Map<string, string>()
let replayError
debug('replay for exchange: %s started - from: %s, to: %s, filters: %o', exchange, fromDate.toISOString(), toDate.toISOString(), filters)
const options = getOptions()
// initialize worker thread that will fetch and cache data feed slices and "report back" by setting proper key/values in cachedSlicePaths
const payload: WorkerJobPayload = {
cacheDir: options.cacheDir,
endpoint: options.endpoint,
apiKey: apiKey || options.apiKey,
userAgent: options._userAgent,
fromDate,
toDate,
exchange,
filters: filters || [],
waitWhenDataNotYetAvailable
}
const worker = new ReliableWorker(payload)
worker.on('message', (message: WorkerMessage) => {
cachedSlicePaths.set(message.sliceKey, message.slicePath)
})
worker.on('error', (err) => {
debug('worker error %o', err)
replayError = err
})
try {
// date is always formatted to have length of 28 so we can skip looking for first space in line and use it
// as hardcoded value
const DATE_MESSAGE_SPLIT_INDEX = 28
// more lenient gzip decompression
// see https://github.com/request/request/pull/2492 and https://github.com/node-fetch/node-fetch/pull/239
const ZLIB_OPTIONS = {
chunkSize: 128 * 1024,
flush: constants.Z_SYNC_FLUSH,
finishFlush: constants.Z_SYNC_FLUSH
}
// helper flag that helps us not yielding two subsequent undefined/disconnect messages
let lastMessageWasUndefined = false
let currentSliceDate = new Date(fromDate)
// iterate over every minute in <=from,to> date range
// get cached slice paths, read them as file streams, decompress, split by new lines and yield as messages
while (currentSliceDate < toDate) {
const sliceKey = currentSliceDate.toISOString()
debug('getting slice: %s, exchange: %s', sliceKey, exchange)
let cachedSlicePath
while (cachedSlicePath === undefined) {
cachedSlicePath = cachedSlicePaths.get(sliceKey)
// if something went wrong(network issue, auth issue, gunzip issue etc)
if (replayError !== undefined) {
throw replayError
}
if (cachedSlicePath === undefined) {
// if response for requested date is not ready yet wait 100ms and try again
debug('waiting for slice: %s, exchange: %s', sliceKey, exchange)
await wait(100)
}
}
// response is a path to file on disk let' read it as stream
const linesStream = createReadStream(cachedSlicePath, { highWaterMark: 128 * 1024 })
// unzip it
.pipe(createGunzip(ZLIB_OPTIONS))
.on('error', function onGunzipError(err) {
debug('gunzip error %o', err)
linesStream.destroy(err)
})
// and split by new line
.pipe(new BinarySplitStream())
.on('error', function onBinarySplitStreamError(err) {
debug('binary split stream error %o', err)
linesStream.destroy(err)
})
let linesCount = 0
for await (const bufferLine of linesStream as unknown as Iterable<Buffer>) {
linesCount++
if (bufferLine.length > 0) {
lastMessageWasUndefined = false
const localTimestampBuffer = bufferLine.slice(0, DATE_MESSAGE_SPLIT_INDEX)
const messageBuffer = bufferLine.slice(DATE_MESSAGE_SPLIT_INDEX + 1)
// as any due to https://github.com/Microsoft/TypeScript/issues/24929
if (skipDecoding === true) {
yield {
localTimestamp: localTimestampBuffer,
message: messageBuffer
} as any
} else {
let messageString = messageBuffer.toString()
// hack to handle huobi long numeric id for trades
if (exchange.startsWith('huobi-') && messageString.includes('.trade.detail')) {
messageString = messageString.replace(/"id":([0-9]+),/g, '"id":"$1",')
}
// hack to handle upbit long numeric id for trades
if (exchange === 'upbit' && messageString.includes('sequential_id')) {
messageString = messageString.replace(/"sequential_id":([0-9]+),/g, '"sequential_id":"$1",')
}
const message = JSON.parse(messageString)
const localTimestampString = localTimestampBuffer.toString()
const localTimestamp = new Date(localTimestampString)
if (withMicroseconds) {
// provide additionally fractions of millisecond at microsecond resolution
// local timestamp always has format like this 2019-06-01T00:03:03.1238784Z
localTimestamp.μs = parseμs(localTimestampString)
}
yield {
// when skipDecoding is not set, decode timestamp to Date and message to object
localTimestamp,
message
} as any
}
// ignore empty lines unless withDisconnects is set to true
// do not yield subsequent undefined messages
} else if (withDisconnects === true && lastMessageWasUndefined === false) {
lastMessageWasUndefined = true
yield undefined as any
}
}
debug('processed slice: %s, exchange: %s, count: %d', sliceKey, exchange, linesCount)
// remove slice key from the map as it's already processed
cachedSlicePaths.delete(sliceKey)
if (autoCleanup) {
await cleanupSlice(cachedSlicePath)
}
// move one minute forward
currentSliceDate.setUTCMinutes(currentSliceDate.getUTCMinutes() + 1)
}
debug(
'replay for exchange: %s finished - from: %s, to: %s, filters: %o',
exchange,
fromDate.toISOString(),
toDate.toISOString(),
filters
)
} finally {
if (autoCleanup) {
debug(
'replay for exchange %s auto cleanup started - from: %s, to: %s, filters: %o',
exchange,
fromDate.toISOString(),
toDate.toISOString(),
filters
)
let startDate = new Date(fromDate)
while (startDate < toDate) {
clearCacheSync(exchange, filters, startDate.getUTCFullYear(), startDate.getUTCMonth() + 1, startDate.getUTCDate())
startDate = addDays(startDate, 1)
}
debug(
'replay for exchange %s auto cleanup finished - from: %s, to: %s, filters: %o',
exchange,
fromDate.toISOString(),
toDate.toISOString(),
filters
)
}
const underlyingWorker = worker.getUnderlyingWorker()
if (underlyingWorker !== undefined) {
await terminateWorker(underlyingWorker, 500)
}
}
}
async function cleanupSlice(slicePath: string) {
try {
await remove(slicePath)
} catch (e) {
debug('cleanupSlice error %s %o', slicePath, e)
}
}
// gracefully terminate worker
async function terminateWorker(worker: Worker, waitTimeout: number) {
let cancelWait = () => {}
const maxWaitGuard = new Promise((resolve) => {
const timeoutId = setTimeout(resolve, waitTimeout)
cancelWait = () => clearTimeout(timeoutId)
})
const readyToTerminate = new Promise<void>((resolve) => {
worker.once('message', (signal) => signal === WorkerSignal.READY_TO_TERMINATE && resolve())
}).then(cancelWait)
worker.postMessage(WorkerSignal.BEFORE_TERMINATE)
await Promise.race([readyToTerminate, maxWaitGuard])
await worker.terminate()
}
export function replayNormalized<T extends Exchange, U extends MapperFactory<T, any>[], Z extends boolean = false>(
{
exchange,
symbols,
from,
to,
withDisconnectMessages = undefined,
apiKey = undefined,
autoCleanup = undefined,
waitWhenDataNotYetAvailable = undefined
}: ReplayNormalizedOptions<T, Z>,
...normalizers: U
): AsyncIterableIterator<
Z extends true
? U extends MapperFactory<infer _, infer X>[]
? X | Disconnect
: never
: U extends MapperFactory<infer _, infer X>[]
? X
: never
> {
const fromDate = parseAsUTCDate(from)
validateReplayNormalizedOptions(fromDate, normalizers)
//TODO: zrovi replay dzien po dniu, tak ze kazdego dnia przekazuje swierze filters
const createMappers = (localTimestamp: Date) => normalizers.map((m) => m(exchange, localTimestamp))
const mappers = createMappers(fromDate)
const filters = getFilters(mappers, symbols)
const messages = replay({
exchange,
from,
to,
withDisconnects: true,
filters,
apiKey,
withMicroseconds: true,
autoCleanup,
waitWhenDataNotYetAvailable
})
// filter normalized messages by symbol as some exchanges do not provide server side filtering so we could end up with messages
// for symbols we've not requested for
const upperCaseSymbols = symbols !== undefined ? symbols.map((s) => s.toUpperCase()) : undefined
const filter = (symbol: string) => {
return upperCaseSymbols === undefined || upperCaseSymbols.length === 0 || upperCaseSymbols.includes(symbol)
}
return normalizeMessages(exchange, undefined, messages, mappers, createMappers, withDisconnectMessages, filter)
}
function validateReplayOptions<T extends Exchange>(exchange: T, from: string, to: string, filters: FilterForExchange[T][]) {
if (!exchange || EXCHANGES.includes(exchange) === false) {
throw new Error(`Invalid "exchange" argument: ${exchange}. Please provide one of the following exchanges: ${EXCHANGES.join(', ')}.`)
}
if (!from || isNaN(Date.parse(from))) {
throw new Error(`Invalid "from" argument: ${from}. Please provide valid date string.`)
}
if (!to || isNaN(Date.parse(to))) {
throw new Error(`Invalid "to" argument: ${to}. Please provide valid date string.`)
}
if (parseAsUTCDate(to) < parseAsUTCDate(from)) {
throw new Error(`Invalid "to" and "from" arguments combination. Please provide "to" date that is later than "from" date.`)
}
if (filters && filters.length > 0) {
for (let i = 0; i < filters.length; i++) {
const filter = filters[i]
if (!filter.channel || (EXCHANGE_CHANNELS_INFO[exchange] as any).includes(filter.channel) === false) {
throw new Error(
`Invalid "filters[].channel" argument: ${filter.channel}. Please provide one of the following channels: ${EXCHANGE_CHANNELS_INFO[
exchange
].join(', ')}.`
)
}
if (filter.symbols && Array.isArray(filter.symbols) === false) {
throw new Error(`Invalid "filters[].symbols" argument: ${filter.symbols}. Please provide array of symbol strings`)
}
}
}
}
function validateReplayNormalizedOptions(fromDate: Date, normalizers: MapperFactory<any, any>[]) {
const hasBookChangeNormalizer = normalizers.some((n) => n === normalizeBookChanges)
const dateDoesNotStartAtTheBeginningOfTheDay = fromDate.getUTCHours() !== 0 || fromDate.getUTCMinutes() !== 0
if (hasBookChangeNormalizer && dateDoesNotStartAtTheBeginningOfTheDay) {
debug('Initial order book snapshots are available only at 00:00 UTC')
}
}
class ReliableWorker extends EventEmitter {
private _errorsCount = 0
private _worker: Worker | undefined = undefined
constructor(private readonly _payload: WorkerJobPayload) {
super()
this._initWorker()
}
private _initWorker() {
this._worker = new Worker(path.resolve(__dirname, 'worker.js'), {
workerData: this._payload
})
this._worker.on('message', (message: WorkerMessage) => {
this.emit('message', message)
})
this._worker.on('error', this._handleError)
this._worker.on('exit', (code) => {
debug('worker finished with code: %d', code)
})
}
private _handleError = async (err: Error) => {
debug('underlying worker error %o', err)
if (err.message.includes('HttpError') === false && this._errorsCount < 30) {
this._errorsCount++
const delayMS = Math.min(Math.pow(2, this._errorsCount) * 1000, 120 * 1000)
debug('re-init worker after: %d ms', delayMS)
await wait(delayMS)
// it was most likely unhandled socket hang up error, let's retry first with new worker and don't emit error right away
this._initWorker()
} else {
this.emit('error', err)
}
}
public getUnderlyingWorker() {
return this._worker
}
}
export type ReplayOptions<T extends Exchange, U extends boolean = false, Z extends boolean = false> = {
readonly exchange: T
readonly from: string
readonly to: string
readonly filters: FilterForExchange[T][]
readonly skipDecoding?: U
readonly withDisconnects?: Z
readonly apiKey?: string
readonly withMicroseconds?: boolean
readonly autoCleanup?: boolean
readonly waitWhenDataNotYetAvailable?: boolean | number
}
export type ReplayNormalizedOptions<T extends Exchange, U extends boolean = false> = {
readonly exchange: T
readonly symbols?: string[]
readonly from: string
readonly to: string
readonly withDisconnectMessages?: U
readonly apiKey?: string
readonly autoCleanup?: boolean
readonly waitWhenDataNotYetAvailable?: boolean | number
}