UNPKG

tardis-dev

Version:

Convenient access to tick-level historical and real-time cryptocurrency market data via Node.js

143 lines 7.89 kB
import dbg from 'debug'; import { existsSync } from 'node:fs'; import pMap from 'p-map'; import { isMainThread, parentPort, workerData } from 'worker_threads'; import { addMinutes, download, formatDateToPath, optimizeFilters, sha256, wait, cleanTempFiles } from "./handy.js"; const debug = dbg('tardis-dev'); const DEFAULT_DATA_FEED_SLICE_SIZE = 1; if (isMainThread) { debug('current worker is not meant to run in main thread'); } else { parentPort.on('message', (signal) => { if (signal === "BEFORE_TERMINATE" /* WorkerSignal.BEFORE_TERMINATE */) { cleanTempFiles(); parentPort.postMessage("READY_TO_TERMINATE" /* WorkerSignal.READY_TO_TERMINATE */); } }); getDataFeedSlices(workerData); } process.on('unhandledRejection', (err, promise) => { debug('Unhandled Rejection at: %o, reason: %o', promise, err); throw err; }); async function getDataFeedSlices(payload) { const MILLISECONDS_IN_MINUTE = 60 * 1000; const MIN_WAIT_WHEN_DATA_NOT_AVAILABLE_OFFSET = 6; const CONCURRENCY_LIMIT = 60; // deduplicate filters (if the channel was provided multiple times) const filters = optimizeFilters(payload.filters); // let's calculate number of minutes between "from" and "to" dates as those will give us total number of requests or checks // that will have to be performed concurrently with CONCURRENCY_LIMIT const minutesCountToFetch = Math.floor((payload.toDate.getTime() - payload.fromDate.getTime()) / MILLISECONDS_IN_MINUTE); // each filter will have separate sub dir based on it's sha hash const cacheDir = `${payload.cacheDir}/feeds/${payload.exchange}/${sha256(filters)}`; const waitOffsetMinutes = typeof payload.waitWhenDataNotYetAvailable === 'number' ? Math.max(payload.waitWhenDataNotYetAvailable, MIN_WAIT_WHEN_DATA_NOT_AVAILABLE_OFFSET) : 30; const waitOffsetMS = waitOffsetMinutes * MILLISECONDS_IN_MINUTE; const minutesCountThatAreAlreadyAvailableToFetch = await getAvailableMinutesCount(payload, minutesCountToFetch, waitOffsetMS, MILLISECONDS_IN_MINUTE); await getAvailableDataFeedSlices(payload, filters, cacheDir, minutesCountThatAreAlreadyAvailableToFetch, CONCURRENCY_LIMIT); // for remaining data iterate one by one and wait as needed for (let offset = minutesCountThatAreAlreadyAvailableToFetch; offset < minutesCountToFetch; offset++) { const timestampToFetch = payload.fromDate.valueOf() + offset * MILLISECONDS_IN_MINUTE; const timestampForLastAvailableData = new Date().valueOf() - waitOffsetMS; if (timestampToFetch > timestampForLastAvailableData) { const waitTime = timestampToFetch - timestampForLastAvailableData + 100; await wait(waitTime); } await getDataFeedSlice(payload, offset, filters, cacheDir); } } async function getAvailableMinutesCount(payload, minutesCountToFetch, waitOffsetMS, millisecondsInMinute) { const waitWhenDataIsNotAvailable = payload.waitWhenDataNotYetAvailable && payload.toDate.valueOf() > new Date().valueOf() - waitOffsetMS; if (!waitWhenDataIsNotAvailable) { return minutesCountToFetch; } let timestampForLastAvailableData = new Date().valueOf() - waitOffsetMS; // in case when even initial from date is not yet available wait until it is if (timestampForLastAvailableData < payload.fromDate.valueOf()) { const initialWaitTime = payload.fromDate.valueOf() - timestampForLastAvailableData; if (initialWaitTime > 0) { await wait(initialWaitTime); } } // fetch concurently any data that is already available timestampForLastAvailableData = new Date().valueOf() - waitOffsetMS; const availableMinutesCount = Math.floor((timestampForLastAvailableData - payload.fromDate.valueOf()) / millisecondsInMinute); return Math.min(Math.max(availableMinutesCount, 0), minutesCountToFetch); } async function getAvailableDataFeedSlices(payload, filters, cacheDir, minutesCountToFetch, concurrencyLimit) { if (minutesCountToFetch <= 0) { return; } // fetch last slice - it will tell us if user has access to the end of requested date range and data is available // also fetch it from API to get current suggested slice size headers const lastSlice = await getDataFeedSlice(payload, minutesCountToFetch - 1, filters, cacheDir, DEFAULT_DATA_FEED_SLICE_SIZE, false); // fetch first slice - it will tell us if user has access to the beginning of requested date range const firstSlice = minutesCountToFetch === 1 ? lastSlice : await getDataFeedSlice(payload, 0, filters, cacheDir, DEFAULT_DATA_FEED_SLICE_SIZE, false); const replaySliceSize = filters.length === 0 ? DEFAULT_DATA_FEED_SLICE_SIZE : Math.max(firstSlice.suggestedSliceSize, lastSlice.suggestedSliceSize); const sliceOffsets = []; for (let offset = 1; offset < minutesCountToFetch - 1; offset += replaySliceSize) { sliceOffsets.push(offset); } // it both begining and end date of the range is accessible fetch all remaning slices concurently with CONCURRENCY_LIMIT await pMap(sliceOffsets, async (offset) => { const requestedSliceSize = Math.min(replaySliceSize, minutesCountToFetch - 1 - offset); await getDataFeedSlice(payload, offset, filters, cacheDir, requestedSliceSize); }, { concurrency: concurrencyLimit }); } async function getDataFeedSlice({ exchange, fromDate, endpoint, apiKey, dataFeedCompression, userAgent }, offset, filters, cacheDir, requestedSliceSize = DEFAULT_DATA_FEED_SLICE_SIZE, useCache = true) { const sliceTimestamp = addMinutes(fromDate, offset); const sliceKey = sliceTimestamp.toISOString(); const sliceSizeSuffix = requestedSliceSize === DEFAULT_DATA_FEED_SLICE_SIZE ? '' : `.size-${requestedSliceSize}`; const sliceBasePath = `${cacheDir}/${formatDateToPath(sliceTimestamp)}${sliceSizeSuffix}.json`; const zstdSlicePath = `${sliceBasePath}.zst`; const gzipSlicePath = `${sliceBasePath}.gz`; let cachedSlicePath; if (useCache) { cachedSlicePath = existsSync(zstdSlicePath) ? zstdSlicePath : existsSync(gzipSlicePath) ? gzipSlicePath : undefined; } if (cachedSlicePath !== undefined) { debug('getDataFeedSlice already cached: %s, sliceSize: %d', sliceKey, requestedSliceSize); const message = { sliceKey, slicePath: cachedSlicePath, sliceSize: requestedSliceSize }; parentPort.postMessage(message); return { sliceSize: requestedSliceSize, suggestedSliceSize: DEFAULT_DATA_FEED_SLICE_SIZE }; } let url = `${endpoint}/data-feeds/${exchange}?from=${fromDate.toISOString()}&offset=${offset}&compression=${dataFeedCompression}`; if (requestedSliceSize > DEFAULT_DATA_FEED_SLICE_SIZE) { url += `&sliceSize=${requestedSliceSize}`; } if (filters.length > 0) { url += `&filters=${encodeURIComponent(JSON.stringify(filters))}`; } const downloadResult = await download({ apiKey, downloadPath: sliceBasePath, url, userAgent, appendContentEncodingExtension: true, acceptEncoding: dataFeedCompression === 'gzip' ? 'gzip' : 'zstd, gzip' }); const responseSliceSize = Number(downloadResult.headers['x-slice-size']); const suggestedSliceSize = Number(downloadResult.headers['x-suggested-slice-size'] ?? DEFAULT_DATA_FEED_SLICE_SIZE); debug('getDataFeedSlice fetched from API and cached, %s, sliceSize: %d', sliceKey, responseSliceSize); const message = { sliceKey, slicePath: downloadResult.downloadPath, sliceSize: responseSliceSize }; parentPort.postMessage(message); return { sliceSize: responseSliceSize, suggestedSliceSize }; } //# sourceMappingURL=worker.js.map