iterparse
Version:
Delightful data parsing
106 lines (105 loc) • 4.52 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.download = void 0;
const tslib_1 = require("tslib");
const fs_extra_1 = require("fs-extra");
const node_fetch_1 = tslib_1.__importDefault(require("node-fetch"));
const P = tslib_1.__importStar(require("ts-prime"));
const path_1 = require("path");
const url_1 = require("url");
const helpers_1 = require("./helpers");
const types_1 = require("./types");
/**
* Writes source stream to file
* @param source - NodeJS ReadableStream
* @param file - destination file
* @category Utility
*/
async function write(source, file) {
return new Promise(async (resolve) => {
const dest = fs_extra_1.createWriteStream(file);
source.pipe(dest);
source.on('close', () => {
resolve(file);
});
source.on('end', () => {
resolve(file);
});
}).catch((err) => {
console.log(err);
throw err;
});
}
const defaultResourceIDHookFunction = (_url, headers) => {
const date = new Date();
return headers.get('etag') || headers.get('last-modified') || `${date.toDateString()}:${date.getHours()}`;
};
/**
* Function will download any resource from interned and cache it in local file system.
*
* @param url URL to file
* @include ./DownloadOptions.md
* @example
* import { download } from 'iterparse'
* download({ url: "url/to/resource.csv", downloadFolder: "/tmp", progress: (q) => console.log(q.toString()) })
* .flatMap((filePath)=> csvRead({ filePath }))
* .map((q)=> console.log(q))
* .count()
*
* @category Utility
*/
function download(options) {
const { url } = options;
return types_1.IX.defer(async () => {
const response = await node_fetch_1.default(url, options);
response.body.pause();
if (!response.ok) {
response.body.resume();
const payload = await types_1.IX.from(response.body).toArray();
const json = P.canFail(() => JSON.parse(payload.map((q) => q.toString()).join("")));
if (!P.isError(json)) {
throw new Error(`Code: ${response.status}, Body: ${JSON.stringify(json)}`);
}
throw new Error(`Code: ${response.status}, Status Text: ${response.statusText}`);
}
const { resourceId = defaultResourceIDHookFunction } = options;
const resource = P.isString(resourceId) || P.isBoolean(resourceId) ? resourceId : resourceId(url, response.headers);
const baseName = path_1.basename(new url_1.URL(url).pathname);
const extension = helpers_1.getFileType(response.headers.get('content-type') || '') || path_1.extname(baseName);
const fileName = resource === false ? `${baseName.replace(path_1.extname(baseName), '')}-${P.hash(`${url}`)}${extension}` : `${baseName.replace(path_1.extname(baseName), '')}-${P.hash(`${url}:${resource}`)}${extension}`;
const filePath = path_1.resolve(options.downloadFolder, fileName);
const lockFilePath = path_1.resolve(options.downloadFolder, `.${path_1.basename(filePath).replace(extension, '')}.lock`);
if (fs_extra_1.existsSync(lockFilePath)) {
if (fs_extra_1.existsSync(filePath)) {
fs_extra_1.unlinkSync(filePath);
}
}
if (fs_extra_1.existsSync(filePath)) {
return types_1.IX.of(filePath);
}
const meta = { downloaded: Date.now(), status: 'downloading' };
await fs_extra_1.ensureFile(lockFilePath);
await fs_extra_1.writeFile(lockFilePath, JSON.stringify(meta));
const downloadProgress = new helpers_1.DownloadProgress(url, Date.now(), P.toInt(response.headers.get('content-length')));
await fs_extra_1.ensureFile(filePath);
const log = () => {
var _a;
(_a = options.progress) === null || _a === void 0 ? void 0 : _a.call(options, downloadProgress);
};
const logTh = P.throttle(log, options.progressFrequency || 3000);
response.body.resume();
response.body.on('data', (chunk) => {
if (chunk instanceof Buffer) {
downloadProgress.add(chunk.byteLength);
}
logTh();
});
const output = await write(response.body, filePath).then(async (q) => {
log();
fs_extra_1.unlinkSync(lockFilePath);
return q;
});
return types_1.IX.of(output);
});
}
exports.download = download;