dl-vampire
Version:
download file like a vampire
333 lines (325 loc) • 9.47 kB
JavaScript
// src/dl.ts
import assert from "assert";
import { HTTPError as HTTPError2, RequestError } from "got";
import pretry, { RetryError as RetryError2 } from "promise.retry";
// src/vampire.ts
import { EventEmitter } from "events";
import path from "path";
import { pipeline } from "stream/promises";
import fse2 from "fs-extra";
import got from "got";
import { ProxyAgent } from "proxy-agent";
// src/common.ts
import debugFactory from "debug";
var baseDebug = debugFactory("dl-vampire");
// src/util.ts
import { createHash } from "crypto";
import fse from "fs-extra";
import { HTTPError } from "got";
import { RetryError } from "promise.retry";
async function getFileHash({ file, alg }) {
const exists = await fse.pathExists(file);
if (!exists) return;
const hash = createHash(alg);
return new Promise((resolve, reject) => {
fse.createReadStream(file).on("error", reject).on("data", (chunk) => hash.update(chunk)).on("end", () => {
const val = hash.digest("hex");
resolve(val);
});
});
}
var md5 = (s) => createHash("md5").update(s, "utf8").digest("hex");
var isGot404Error = (e) => e instanceof HTTPError && e.response.statusCode === 404;
var is404Error = (e) => isGot404Error(e) || e instanceof RetryError && e.errors.every((childError) => isGot404Error(childError));
// src/vampire.ts
var debug = baseDebug.extend("vampire");
var CHROME_UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36";
var Vampire = class extends EventEmitter {
constructor(options) {
super();
options = {
useChromeUa: true,
useProxyEnv: true,
requestOptions: {},
...options
};
this.request = got.extend({
mutableDefaults: true
// Error: The `onCancel` handler was attached after the promise settled.
// https://github.com/sindresorhus/got/issues/1489#issuecomment-1318617918
// consider disable got retry options
// new ver v11.8.6 fix this
// retry: 0,
});
this.config(options);
}
request;
proxyAgent;
config(options) {
const instance = this.request;
const { useChromeUa, useProxyEnv, requestOptions } = options;
const update = (obj) => {
instance.defaults.options.merge(obj);
};
if (useChromeUa) {
update({
headers: {
"user-agent": CHROME_UA
}
});
}
if (useProxyEnv) {
const agent = this.proxyAgent = new ProxyAgent();
update({
agent: {
http: agent,
https: agent
}
});
}
if (requestOptions) {
update(requestOptions);
}
}
/**
* get content-length
*/
async getSize(url) {
try {
const res = await this.request.head(url);
const len = res.headers["content-length"];
if (!len) return;
debug("content-length = %s for %s", len, url);
const lenNum = Number(len);
return lenNum;
} catch (e) {
if (is404Error(e)) {
return void 0;
} else {
throw e;
}
}
}
/**
* 是否有需要下载一个文件
*/
needDownload = async ({
url,
file,
skipExists = true,
expectSize,
expectHash,
expectHashAlgorithm = "md5",
useHeadRequestToFetchExpectSize = true
}) => {
if (!skipExists) return true;
if (!await fse2.exists(file)) return true;
if (expectHash && expectHashAlgorithm) {
const hash = await getFileHash({ file, alg: expectHashAlgorithm });
if (hash !== expectHash) {
debug("needDownload for hash mismatch: alg=%s actual(%s) != expect(%s)", expectHashAlgorithm, hash, expectHash);
return true;
}
}
const stat = await fse2.stat(file);
const localSize = stat.size;
if (localSize === 0) {
debug("needDownload for local file invalid, stat.size = 0");
return true;
}
if (!expectSize && useHeadRequestToFetchExpectSize) {
expectSize = await this.getSize(url);
}
if (!expectSize) {
return true;
}
if (localSize !== expectSize) {
debug("needDownload: true, localSize = %s & expectSize = %s", localSize, expectSize);
return true;
}
return false;
};
/**
* 下载一个文件
*/
download = async ({ url, file, onprogress }, signal) => {
const networkStream = this.request.stream(url);
if (onprogress) {
networkStream.on("downloadProgress", onprogress);
}
file = path.resolve(file);
await fse2.ensureDir(path.dirname(file));
const fileStream = fse2.createWriteStream(file);
signal == null ? void 0 : signal.addEventListener("abort", () => {
fileStream.close();
networkStream.destroy();
});
let err;
try {
await pipeline(networkStream, fileStream);
} catch (e) {
err = e;
}
if (err) {
if (is404Error(err)) {
await fse2.remove(file);
}
if (await fse2.exists(file) && (await fse2.stat(file)).size === 0) {
await fse2.remove(file);
}
throw err;
}
};
};
// src/dl.ts
async function dl(options) {
const {
// VampireNewOptions
useChromeUa,
requestOptions,
useProxyEnv,
// DownloadInput
url,
file,
// ValidateExistingFileOptions
skipExists = true,
expectSize,
expectHash,
expectHashAlgorithm = "md5",
useHeadRequestToFetchExpectSize = true,
// download extra
retry = { times: 5 },
onprogress,
inspectError: inspectErrorFlag = true
} = options;
assert(url, "options.url can not be empty");
assert(file, "options.file can not be empty");
const vampire = new Vampire({
useChromeUa,
requestOptions,
useProxyEnv
});
const callInspectError = (e) => {
if (!inspectErrorFlag || !e) return;
inspectError(e, { url, file });
};
const tryNeedDownload = pretry(vampire.needDownload, retry);
try {
const need = await tryNeedDownload({
url,
file,
skipExists,
expectSize,
expectHash,
expectHashAlgorithm,
useHeadRequestToFetchExpectSize
});
if (!need) return { skip: true };
} catch (e) {
callInspectError(e);
throw e;
}
const tryDownload = pretry(vampire.download, retry);
try {
await tryDownload({ url, file, onprogress });
return { skip: false };
} catch (e) {
callInspectError(e);
throw e;
}
}
function inspectError(e, { url, file }) {
if (e instanceof RequestError) {
return console.error("[dl-vampire]: RequestError happens for url=%s file=%s code=%s", url, file, e.code);
}
if (e instanceof HTTPError2) {
return console.error("[dl-vampire]: HTTPError happens for url=%s file=%s statusCode=%s", url, file, e.response.statusCode);
}
if (e instanceof RetryError2) {
const innerErrorTypes = new Set(e.errors.map((e2) => e2.constructor.name));
const statusCodes = new Set(e.errors.map((e2) => e2 instanceof HTTPError2 ? e2.response.statusCode : void 0).filter(Boolean));
const errorCodes = new Set(
e.errors.map((e2) => e2 instanceof RequestError && !(e2 instanceof HTTPError2) ? e2.code : void 0).filter(Boolean)
);
return console.error(
`[dl-vampire]: RetryError(inner errorType:%o HTTPError.statusCode:%o RequestError.code:%o) happens for url=%s file=%s`,
innerErrorTypes,
statusCodes,
errorCodes,
url,
file
);
}
console.error("[dl-vampire]: error happens for url=%s file=%s", url, file);
}
// src/read-url.ts
import assert2 from "assert";
import { tmpdir } from "os";
import { join } from "path";
import fse3 from "fs-extra";
import ms from "ms";
var debug2 = baseDebug.extend("read-url");
async function readUrl(opts) {
const options = { ...opts };
assert2(options.url, "options.url is required");
options.file = options.file || getReadUrlCacheFile({ url: options.url, cacheDir: options.cacheDir });
debug2("using file = %s", options.file);
const isCacheValid = async () => {
if (!options.maxAge) return false;
if (!await fse3.pathExists(file)) return false;
const stat = await fse3.stat(file);
const age = Date.now() - stat.mtimeMs;
const maxAgeMs = typeof options.maxAge === "number" ? options.maxAge : ms(options.maxAge);
return age <= maxAgeMs;
};
const file = options.file;
if (await isCacheValid()) {
debug2("skip download due to maxAge config, maxAge = %s", options.maxAge);
} else {
await dl(options);
}
if ("encoding" in options && options.encoding) {
return await fse3.readFile(options.file, options.encoding);
} else {
return await fse3.readFile(options.file);
}
}
function getReadUrlCacheFile(options) {
const cacheDir = options.cacheDir || tmpdir();
const file = join(cacheDir, "dl-vampire-cache", md5(options.url));
return file;
}
// src/index.ts
import {
CacheError,
CancelError,
HTTPError as HTTPError3,
MaxRedirectsError,
ParseError,
ReadError,
RequestError as RequestError2,
TimeoutError,
UploadError
} from "got";
import { RetryError as RetryError3, TimeoutError as TimeoutError2 } from "promise.retry";
export {
CacheError,
CancelError,
HTTPError3 as HTTPError,
MaxRedirectsError,
ParseError,
ReadError,
RequestError2 as RequestError,
TimeoutError as RequestTimeoutError,
RetryError3 as RetryError,
TimeoutError2 as TimeoutError,
UploadError,
Vampire,
dl as default,
dl,
getReadUrlCacheFile,
inspectError,
is404Error,
isGot404Error,
readUrl
};