UNPKG

iterparse

Version:
106 lines (105 loc) 4.52 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.download = void 0; const tslib_1 = require("tslib"); const fs_extra_1 = require("fs-extra"); const node_fetch_1 = tslib_1.__importDefault(require("node-fetch")); const P = tslib_1.__importStar(require("ts-prime")); const path_1 = require("path"); const url_1 = require("url"); const helpers_1 = require("./helpers"); const types_1 = require("./types"); /** * Writes source stream to file * @param source - NodeJS ReadableStream * @param file - destination file * @category Utility */ async function write(source, file) { return new Promise(async (resolve) => { const dest = fs_extra_1.createWriteStream(file); source.pipe(dest); source.on('close', () => { resolve(file); }); source.on('end', () => { resolve(file); }); }).catch((err) => { console.log(err); throw err; }); } const defaultResourceIDHookFunction = (_url, headers) => { const date = new Date(); return headers.get('etag') || headers.get('last-modified') || `${date.toDateString()}:${date.getHours()}`; }; /** * Function will download any resource from interned and cache it in local file system. * * @param url URL to file * @include ./DownloadOptions.md * @example * import { download } from 'iterparse' * download({ url: "url/to/resource.csv", downloadFolder: "/tmp", progress: (q) => console.log(q.toString()) }) * .flatMap((filePath)=> csvRead({ filePath })) * .map((q)=> console.log(q)) * .count() * * @category Utility */ function download(options) { const { url } = options; return types_1.IX.defer(async () => { const response = await node_fetch_1.default(url, options); response.body.pause(); if (!response.ok) { response.body.resume(); const payload = await types_1.IX.from(response.body).toArray(); const json = P.canFail(() => JSON.parse(payload.map((q) => q.toString()).join(""))); if (!P.isError(json)) { throw new Error(`Code: ${response.status}, Body: ${JSON.stringify(json)}`); } throw new Error(`Code: ${response.status}, Status Text: ${response.statusText}`); } const { resourceId = defaultResourceIDHookFunction } = options; const resource = P.isString(resourceId) || P.isBoolean(resourceId) ? resourceId : resourceId(url, response.headers); const baseName = path_1.basename(new url_1.URL(url).pathname); const extension = helpers_1.getFileType(response.headers.get('content-type') || '') || path_1.extname(baseName); const fileName = resource === false ? `${baseName.replace(path_1.extname(baseName), '')}-${P.hash(`${url}`)}${extension}` : `${baseName.replace(path_1.extname(baseName), '')}-${P.hash(`${url}:${resource}`)}${extension}`; const filePath = path_1.resolve(options.downloadFolder, fileName); const lockFilePath = path_1.resolve(options.downloadFolder, `.${path_1.basename(filePath).replace(extension, '')}.lock`); if (fs_extra_1.existsSync(lockFilePath)) { if (fs_extra_1.existsSync(filePath)) { fs_extra_1.unlinkSync(filePath); } } if (fs_extra_1.existsSync(filePath)) { return types_1.IX.of(filePath); } const meta = { downloaded: Date.now(), status: 'downloading' }; await fs_extra_1.ensureFile(lockFilePath); await fs_extra_1.writeFile(lockFilePath, JSON.stringify(meta)); const downloadProgress = new helpers_1.DownloadProgress(url, Date.now(), P.toInt(response.headers.get('content-length'))); await fs_extra_1.ensureFile(filePath); const log = () => { var _a; (_a = options.progress) === null || _a === void 0 ? void 0 : _a.call(options, downloadProgress); }; const logTh = P.throttle(log, options.progressFrequency || 3000); response.body.resume(); response.body.on('data', (chunk) => { if (chunk instanceof Buffer) { downloadProgress.add(chunk.byteLength); } logTh(); }); const output = await write(response.body, filePath).then(async (q) => { log(); fs_extra_1.unlinkSync(lockFilePath); return q; }); return types_1.IX.of(output); }); } exports.download = download;