mget
Version:
Parallel downloader powered by curl.
176 lines (153 loc) • 5.69 kB
JavaScript
'use strict';
const { EventEmitter } = require('events');
const { URL } = require('url');
const http = require('http');
const https = require('https');
const fs = require('fs');
const util = require('util');
const path = require('path');
const _ = require('lodash');
const assert = require('assert');
const tool = require('./tool');
class MultiGet extends EventEmitter {
constructor(config) {
super()
this._config = _.defaults(config || config, {
minChunkSize: 8*1024, // 8 KiB
concurrency: 0 // auto (the number of CPUs)
});
if (!this._config.concurrency) {
this._config.concurrency = require('os').cpus().length;
}
}
start(url, output) {
this._url = new URL(url);
this._res = {
url: url,
output: output? output : path.basename(this._url.pathname),
chunks: [],
downloaded: 0,
hadError: false,
startAt: Date.now()
};
const cleanUp = () => {
this._res.endAt = Date.now();
if (this._fd) {
fs.fsyncSync(this._fd);
fs.closeSync(this._fd);
delete this._fd;
}
};
return this._determineContentLength()
.then(() => {
// Define chunk size
let offset = 0;
let remaining = this._res.size;
let chunkSize = Math.max(
Math.floor(this._res.size / this._config.concurrency),
this._config.minChunkSize);
while(remaining > 0) {
if (remaining >= chunkSize) {
remaining -= chunkSize;
} else {
chunkSize = remaining;
remaining = 0;
}
this._res.chunks.push({
offset: offset,
size: chunkSize,
downloaded: 0
});
offset += chunkSize;
}
this._fd = fs.openSync(this._res.output, 'w');
fs.ftruncateSync(this._fd, this._res.size);
// Start concurrent downloads
const promises = this._res.chunks.map((chunk, index) => {
return this._download(chunk, index);
});
return Promise.all(promises)
.then(() => {
return this._res;
}, (err) => {
err.res = this._res;
throw err;
});
})
.finally(() => {
cleanUp();
});
}
_determineContentLength() {
return new Promise((resolve, reject) => {
const proto = (this._url.protocol === 'http:')? http : https;
const options = {
host: this._url.hostname,
port: tool.determinePortNumber(this._url),
path: this._url.pathname,
method: 'HEAD'
};
const req = proto.request(options, (res) => {
const { statusCode } = res;
if (statusCode !== 200) {
res.resume();
return reject(new Error(`HEAD request failed. Status code: ${statusCode}`));
}
this._res.type = res.headers['content-type'],
this._res.size = parseInt(res.headers['content-length']);
return resolve();
});
req.on('error', (e) => {
reject(e);
});
req.end();
});
}
_download(chunk, index) {
chunk.startAt = Date.now();
return new Promise((resolve, reject) => {
const { spawn } = require('child_process');
const range = `${chunk.offset}-${chunk.offset + chunk.size - 1}`;
const args = ['-X', 'GET', '-r', range, this._url.href, '-f'];
const child = spawn('curl', args, { encoding: 'binary' });
child.stdout.on('data', (data) => {
//const data = Buffer.from(str, 'binary');
assert.ok(Buffer.isBuffer(data));
try {
const written = fs.writeSync(this._fd, data, 0, data.length, chunk.offset + chunk.downloaded);
chunk.downloaded += written;
this._res.downloaded += written;
//console.log('[%d] received %d bytes, now %d bytes of %d bytes', index, data.length, chunk.downloaded, chunk.size);
} catch (e) {
reject(e);
}
this.emit('progress', this._res, index);
});
child.stderr.on('data', (data) => { void(data); });
child.on('close', (code) => {
if (chunk.code !== 0) {
const errMsg = chunk.signal?
`Exited with signal ${chunk.signal}`:
`Unexpected error: code=${chunk.code}`;
return reject(new Error(errMsg));
}
resolve(code);
});
child.on('exit', (code, signal) => {
chunk.code = code;
chunk.endAt = Date.now();
if (signal) {
chunk.signal = signal;
this._res.hadError = true;
}
});
child.on('error', (err) => {
chunk.error = err;
this._res.hadError = true;
reject(err);
});
chunk._child = child; // for debug purpose only
});
}
}
module.exports = MultiGet;