UNPKG

tardis-dev

Version:

Convenient access to tick-level historical and real-time cryptocurrency market data via Node.js

189 lines (165 loc) 6.04 kB
import { existsSync } from 'fs-extra' import pMap from 'p-map' import { debug } from './debug' import { DatasetType } from './exchangedetails' import { addDays, doubleDigit, download, parseAsUTCDate, sequence } from './handy' import { getOptions } from './options' import { Exchange } from './types' const CONCURRENCY_LIMIT = 20 const MILLISECONDS_IN_SINGLE_DAY = 24 * 60 * 60 * 1000 const DEFAULT_DOWNLOAD_DIR = './datasets' const options = getOptions() export async function downloadDatasets(downloadDatasetsOptions: DownloadDatasetsOptions) { const { exchange, dataTypes, from, to, symbols } = downloadDatasetsOptions const apiKey = downloadDatasetsOptions.apiKey !== undefined ? downloadDatasetsOptions.apiKey : options.apiKey const downloadDir = downloadDatasetsOptions.downloadDir !== undefined ? downloadDatasetsOptions.downloadDir : DEFAULT_DOWNLOAD_DIR const format = downloadDatasetsOptions.format !== undefined ? downloadDatasetsOptions.format : 'csv' const getFilename = downloadDatasetsOptions.getFilename !== undefined ? downloadDatasetsOptions.getFilename : getFilenameDefault const skipIfExists = downloadDatasetsOptions.skipIfExists === undefined ? true : downloadDatasetsOptions.skipIfExists // in case someone provided 'api/exchange' symbol, transform it to symbol that is accepted by datasets API const datasetsSymbols = symbols.map((s) => s.replace(/\/|:/g, '-').toUpperCase()) for (const symbol of datasetsSymbols) { for (const dataType of dataTypes) { const { daysCountToFetch, startDate } = getDownloadDateRange(downloadDatasetsOptions) const startTimestamp = new Date().valueOf() debug('dataset download started for %s %s %s from %s to %s', exchange, dataType, symbol, from, to) if (daysCountToFetch > 1) { // start with downloading last day of the range, validates is API key has access to the end range of requested data await downloadDataSet( getDownloadOptions({ exchange, symbol, apiKey, downloadDir, dataType, format, getFilename, date: addDays(startDate, daysCountToFetch - 1) }), skipIfExists ) } // then download the first day of the range, validates is API key has access to the start range of requested data await downloadDataSet( getDownloadOptions({ exchange, symbol, apiKey, downloadDir, dataType, format, getFilename, date: startDate }), skipIfExists ) // download the rest concurrently up to the CONCURRENCY_LIMIT await pMap( sequence(daysCountToFetch - 1, 1), // this will produce Iterable sequence from 1 to daysCountToFetch - 1 (as we already downloaded data for the first and last day) (offset) => downloadDataSet( getDownloadOptions({ exchange, symbol, apiKey, downloadDir, dataType, format, getFilename, date: addDays(startDate, offset) }), skipIfExists ), { concurrency: CONCURRENCY_LIMIT } ) const elapsedSeconds = (new Date().valueOf() - startTimestamp) / 1000 debug('dataset download finished for %s %s %s from %s to %s, time: %s seconds', exchange, dataType, symbol, from, to, elapsedSeconds) } } } async function downloadDataSet(downloadOptions: DownloadOptions, skipIfExists: boolean) { if (skipIfExists && existsSync(downloadOptions.downloadPath)) { debug('dataset %s already exists, skipping download', downloadOptions.downloadPath) } else { return await download(downloadOptions) } } function getDownloadOptions({ apiKey, exchange, dataType, date, symbol, format, downloadDir, getFilename }: { exchange: Exchange dataType: DatasetType symbol: string date: Date format: string apiKey: string downloadDir: string getFilename: (options: GetFilenameOptions) => string }): DownloadOptions { const year = date.getUTCFullYear() const month = doubleDigit(date.getUTCMonth() + 1) const day = doubleDigit(date.getUTCDate()) const url = `${options.datasetsEndpoint}/${exchange}/${dataType}/${year}/${month}/${day}/${symbol}.${format}.gz` const filename = getFilename({ dataType, date, exchange, format, symbol }) const downloadPath = `${downloadDir}/${filename}` return { url, downloadPath, userAgent: options._userAgent, apiKey } } type DownloadOptions = Parameters<typeof download>[0] function getFilenameDefault({ exchange, dataType, format, date, symbol }: GetFilenameOptions) { return `${exchange}_${dataType}_${date.toISOString().split('T')[0]}_${symbol}.${format}.gz` } function getDownloadDateRange({ from, to }: DownloadDatasetsOptions) { if (!from || isNaN(Date.parse(from))) { throw new Error(`Invalid "from" argument: ${from}. Please provide valid date string.`) } if (!to || isNaN(Date.parse(to))) { throw new Error(`Invalid "to" argument: ${to}. Please provide valid date string.`) } const toDate = parseAsUTCDate(to) const fromDate = parseAsUTCDate(from) const daysCountToFetch = Math.floor((toDate.getTime() - fromDate.getTime()) / MILLISECONDS_IN_SINGLE_DAY) if (daysCountToFetch < 1) { throw new Error(`Invalid "to" and "from" arguments combination. Please provide "to" day that is later than "from" day.`) } return { startDate: fromDate, daysCountToFetch } } type GetFilenameOptions = { exchange: Exchange dataType: DatasetType symbol: string date: Date format: string } type DownloadDatasetsOptions = { exchange: Exchange dataTypes: DatasetType[] symbols: string[] from: string to: string format?: 'csv' apiKey?: string downloadDir?: string getFilename?: (options: GetFilenameOptions) => string skipIfExists?: boolean }