UNPKG

mget

Version:

Parallel downloader powered by curl.

176 lines (153 loc) 5.69 kB
'use strict'; const { EventEmitter } = require('events'); const { URL } = require('url'); const http = require('http'); const https = require('https'); const fs = require('fs'); const util = require('util'); const path = require('path'); const _ = require('lodash'); const assert = require('assert'); const tool = require('./tool'); class MultiGet extends EventEmitter { constructor(config) { super() this._config = _.defaults(config || config, { minChunkSize: 8*1024, // 8 KiB concurrency: 0 // auto (the number of CPUs) }); if (!this._config.concurrency) { this._config.concurrency = require('os').cpus().length; } } start(url, output) { this._url = new URL(url); this._res = { url: url, output: output? output : path.basename(this._url.pathname), chunks: [], downloaded: 0, hadError: false, startAt: Date.now() }; const cleanUp = () => { this._res.endAt = Date.now(); if (this._fd) { fs.fsyncSync(this._fd); fs.closeSync(this._fd); delete this._fd; } }; return this._determineContentLength() .then(() => { // Define chunk size let offset = 0; let remaining = this._res.size; let chunkSize = Math.max( Math.floor(this._res.size / this._config.concurrency), this._config.minChunkSize); while(remaining > 0) { if (remaining >= chunkSize) { remaining -= chunkSize; } else { chunkSize = remaining; remaining = 0; } this._res.chunks.push({ offset: offset, size: chunkSize, downloaded: 0 }); offset += chunkSize; } this._fd = fs.openSync(this._res.output, 'w'); fs.ftruncateSync(this._fd, this._res.size); // Start concurrent downloads const promises = this._res.chunks.map((chunk, index) => { return this._download(chunk, index); }); return Promise.all(promises) .then(() => { return this._res; }, (err) => { err.res = this._res; throw err; }); }) .finally(() => { cleanUp(); }); } _determineContentLength() { return new Promise((resolve, reject) => { const proto = (this._url.protocol === 'http:')? http : https; const options = { host: this._url.hostname, port: tool.determinePortNumber(this._url), path: this._url.pathname, method: 'HEAD' }; const req = proto.request(options, (res) => { const { statusCode } = res; if (statusCode !== 200) { res.resume(); return reject(new Error(`HEAD request failed. Status code: ${statusCode}`)); } this._res.type = res.headers['content-type'], this._res.size = parseInt(res.headers['content-length']); return resolve(); }); req.on('error', (e) => { reject(e); }); req.end(); }); } _download(chunk, index) { chunk.startAt = Date.now(); return new Promise((resolve, reject) => { const { spawn } = require('child_process'); const range = `${chunk.offset}-${chunk.offset + chunk.size - 1}`; const args = ['-X', 'GET', '-r', range, this._url.href, '-f']; const child = spawn('curl', args, { encoding: 'binary' }); child.stdout.on('data', (data) => { //const data = Buffer.from(str, 'binary'); assert.ok(Buffer.isBuffer(data)); try { const written = fs.writeSync(this._fd, data, 0, data.length, chunk.offset + chunk.downloaded); chunk.downloaded += written; this._res.downloaded += written; //console.log('[%d] received %d bytes, now %d bytes of %d bytes', index, data.length, chunk.downloaded, chunk.size); } catch (e) { reject(e); } this.emit('progress', this._res, index); }); child.stderr.on('data', (data) => { void(data); }); child.on('close', (code) => { if (chunk.code !== 0) { const errMsg = chunk.signal? `Exited with signal ${chunk.signal}`: `Unexpected error: code=${chunk.code}`; return reject(new Error(errMsg)); } resolve(code); }); child.on('exit', (code, signal) => { chunk.code = code; chunk.endAt = Date.now(); if (signal) { chunk.signal = signal; this._res.hadError = true; } }); child.on('error', (err) => { chunk.error = err; this._res.hadError = true; reject(err); }); chunk._child = child; // for debug purpose only }); } } module.exports = MultiGet;