UNPKG

@dpkit/file

Version:

Fast TypeScript data management framework built on top of the Data Package standard and Polars DataFrames

72 lines (59 loc) 1.63 kB
import { createReadStream } from "node:fs" import { Readable, Transform } from "node:stream" import { isRemotePath } from "@dpkit/core" export async function loadFileStream( pathOrPaths: string | string[], options?: { index?: number maxBytes?: number }, ) { const index = options?.index ?? 0 const paths = Array.isArray(pathOrPaths) ? pathOrPaths : [pathOrPaths] const path = paths[index] if (!path) { throw new Error(`Cannot stream resource ${path} at index ${index}`) } const isRemote = isRemotePath(path) const stream = isRemote ? await loadRemoteFileStream(path, options) : await loadLocalFileStream(path, options) return stream } async function loadRemoteFileStream( path: string, options?: { maxBytes?: number }, ) { const response = await fetch(path) if (!response.body) { throw new Error(`Cannot stream remote resource: ${path}`) } let stream = Readable.fromWeb(response.body) if (options?.maxBytes) { stream = limitBytesStream(stream, options.maxBytes) } return stream } async function loadLocalFileStream( path: string, options?: { maxBytes?: number }, ) { const end = options?.maxBytes ? options.maxBytes - 1 : undefined return createReadStream(path, { end }) } function limitBytesStream(inputStream: Readable, maxBytes: number) { let total = 0 return inputStream.pipe( new Transform({ transform(chunk, _encoding, callback) { if (total >= maxBytes) { this.push(null) callback() return } total += chunk.length callback(null, chunk) }, }), ) }