UNPKG

dl-vampire

Version:
333 lines (325 loc) 9.47 kB
// src/dl.ts import assert from "assert"; import { HTTPError as HTTPError2, RequestError } from "got"; import pretry, { RetryError as RetryError2 } from "promise.retry"; // src/vampire.ts import { EventEmitter } from "events"; import path from "path"; import { pipeline } from "stream/promises"; import fse2 from "fs-extra"; import got from "got"; import { ProxyAgent } from "proxy-agent"; // src/common.ts import debugFactory from "debug"; var baseDebug = debugFactory("dl-vampire"); // src/util.ts import { createHash } from "crypto"; import fse from "fs-extra"; import { HTTPError } from "got"; import { RetryError } from "promise.retry"; async function getFileHash({ file, alg }) { const exists = await fse.pathExists(file); if (!exists) return; const hash = createHash(alg); return new Promise((resolve, reject) => { fse.createReadStream(file).on("error", reject).on("data", (chunk) => hash.update(chunk)).on("end", () => { const val = hash.digest("hex"); resolve(val); }); }); } var md5 = (s) => createHash("md5").update(s, "utf8").digest("hex"); var isGot404Error = (e) => e instanceof HTTPError && e.response.statusCode === 404; var is404Error = (e) => isGot404Error(e) || e instanceof RetryError && e.errors.every((childError) => isGot404Error(childError)); // src/vampire.ts var debug = baseDebug.extend("vampire"); var CHROME_UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"; var Vampire = class extends EventEmitter { constructor(options) { super(); options = { useChromeUa: true, useProxyEnv: true, requestOptions: {}, ...options }; this.request = got.extend({ mutableDefaults: true // Error: The `onCancel` handler was attached after the promise settled. // https://github.com/sindresorhus/got/issues/1489#issuecomment-1318617918 // consider disable got retry options // new ver v11.8.6 fix this // retry: 0, }); this.config(options); } request; proxyAgent; config(options) { const instance = this.request; const { useChromeUa, useProxyEnv, requestOptions } = options; const update = (obj) => { instance.defaults.options.merge(obj); }; if (useChromeUa) { update({ headers: { "user-agent": CHROME_UA } }); } if (useProxyEnv) { const agent = this.proxyAgent = new ProxyAgent(); update({ agent: { http: agent, https: agent } }); } if (requestOptions) { update(requestOptions); } } /** * get content-length */ async getSize(url) { try { const res = await this.request.head(url); const len = res.headers["content-length"]; if (!len) return; debug("content-length = %s for %s", len, url); const lenNum = Number(len); return lenNum; } catch (e) { if (is404Error(e)) { return void 0; } else { throw e; } } } /** * 是否有需要下载一个文件 */ needDownload = async ({ url, file, skipExists = true, expectSize, expectHash, expectHashAlgorithm = "md5", useHeadRequestToFetchExpectSize = true }) => { if (!skipExists) return true; if (!await fse2.exists(file)) return true; if (expectHash && expectHashAlgorithm) { const hash = await getFileHash({ file, alg: expectHashAlgorithm }); if (hash !== expectHash) { debug("needDownload for hash mismatch: alg=%s actual(%s) != expect(%s)", expectHashAlgorithm, hash, expectHash); return true; } } const stat = await fse2.stat(file); const localSize = stat.size; if (localSize === 0) { debug("needDownload for local file invalid, stat.size = 0"); return true; } if (!expectSize && useHeadRequestToFetchExpectSize) { expectSize = await this.getSize(url); } if (!expectSize) { return true; } if (localSize !== expectSize) { debug("needDownload: true, localSize = %s & expectSize = %s", localSize, expectSize); return true; } return false; }; /** * 下载一个文件 */ download = async ({ url, file, onprogress }, signal) => { const networkStream = this.request.stream(url); if (onprogress) { networkStream.on("downloadProgress", onprogress); } file = path.resolve(file); await fse2.ensureDir(path.dirname(file)); const fileStream = fse2.createWriteStream(file); signal == null ? void 0 : signal.addEventListener("abort", () => { fileStream.close(); networkStream.destroy(); }); let err; try { await pipeline(networkStream, fileStream); } catch (e) { err = e; } if (err) { if (is404Error(err)) { await fse2.remove(file); } if (await fse2.exists(file) && (await fse2.stat(file)).size === 0) { await fse2.remove(file); } throw err; } }; }; // src/dl.ts async function dl(options) { const { // VampireNewOptions useChromeUa, requestOptions, useProxyEnv, // DownloadInput url, file, // ValidateExistingFileOptions skipExists = true, expectSize, expectHash, expectHashAlgorithm = "md5", useHeadRequestToFetchExpectSize = true, // download extra retry = { times: 5 }, onprogress, inspectError: inspectErrorFlag = true } = options; assert(url, "options.url can not be empty"); assert(file, "options.file can not be empty"); const vampire = new Vampire({ useChromeUa, requestOptions, useProxyEnv }); const callInspectError = (e) => { if (!inspectErrorFlag || !e) return; inspectError(e, { url, file }); }; const tryNeedDownload = pretry(vampire.needDownload, retry); try { const need = await tryNeedDownload({ url, file, skipExists, expectSize, expectHash, expectHashAlgorithm, useHeadRequestToFetchExpectSize }); if (!need) return { skip: true }; } catch (e) { callInspectError(e); throw e; } const tryDownload = pretry(vampire.download, retry); try { await tryDownload({ url, file, onprogress }); return { skip: false }; } catch (e) { callInspectError(e); throw e; } } function inspectError(e, { url, file }) { if (e instanceof RequestError) { return console.error("[dl-vampire]: RequestError happens for url=%s file=%s code=%s", url, file, e.code); } if (e instanceof HTTPError2) { return console.error("[dl-vampire]: HTTPError happens for url=%s file=%s statusCode=%s", url, file, e.response.statusCode); } if (e instanceof RetryError2) { const innerErrorTypes = new Set(e.errors.map((e2) => e2.constructor.name)); const statusCodes = new Set(e.errors.map((e2) => e2 instanceof HTTPError2 ? e2.response.statusCode : void 0).filter(Boolean)); const errorCodes = new Set( e.errors.map((e2) => e2 instanceof RequestError && !(e2 instanceof HTTPError2) ? e2.code : void 0).filter(Boolean) ); return console.error( `[dl-vampire]: RetryError(inner errorType:%o HTTPError.statusCode:%o RequestError.code:%o) happens for url=%s file=%s`, innerErrorTypes, statusCodes, errorCodes, url, file ); } console.error("[dl-vampire]: error happens for url=%s file=%s", url, file); } // src/read-url.ts import assert2 from "assert"; import { tmpdir } from "os"; import { join } from "path"; import fse3 from "fs-extra"; import ms from "ms"; var debug2 = baseDebug.extend("read-url"); async function readUrl(opts) { const options = { ...opts }; assert2(options.url, "options.url is required"); options.file = options.file || getReadUrlCacheFile({ url: options.url, cacheDir: options.cacheDir }); debug2("using file = %s", options.file); const isCacheValid = async () => { if (!options.maxAge) return false; if (!await fse3.pathExists(file)) return false; const stat = await fse3.stat(file); const age = Date.now() - stat.mtimeMs; const maxAgeMs = typeof options.maxAge === "number" ? options.maxAge : ms(options.maxAge); return age <= maxAgeMs; }; const file = options.file; if (await isCacheValid()) { debug2("skip download due to maxAge config, maxAge = %s", options.maxAge); } else { await dl(options); } if ("encoding" in options && options.encoding) { return await fse3.readFile(options.file, options.encoding); } else { return await fse3.readFile(options.file); } } function getReadUrlCacheFile(options) { const cacheDir = options.cacheDir || tmpdir(); const file = join(cacheDir, "dl-vampire-cache", md5(options.url)); return file; } // src/index.ts import { CacheError, CancelError, HTTPError as HTTPError3, MaxRedirectsError, ParseError, ReadError, RequestError as RequestError2, TimeoutError, UploadError } from "got"; import { RetryError as RetryError3, TimeoutError as TimeoutError2 } from "promise.retry"; export { CacheError, CancelError, HTTPError3 as HTTPError, MaxRedirectsError, ParseError, ReadError, RequestError2 as RequestError, TimeoutError as RequestTimeoutError, RetryError3 as RetryError, TimeoutError2 as TimeoutError, UploadError, Vampire, dl as default, dl, getReadUrlCacheFile, inspectError, is404Error, isGot404Error, readUrl };