UNPKG

@huggingface/hub

Version:

Utilities to interact with the Hugging Face hub

1,585 lines (1,552 loc) 112 kB
// src/lib/cache-management.ts import { homedir } from "os"; import { join, basename } from "path"; import { stat, readdir, readFile, realpath, lstat } from "fs/promises"; function getDefaultHome() { return join(homedir(), ".cache"); } function getDefaultCachePath() { return join(process.env["HF_HOME"] ?? join(process.env["XDG_CACHE_HOME"] ?? getDefaultHome(), "huggingface"), "hub"); } function getHuggingFaceHubCache() { return process.env["HUGGINGFACE_HUB_CACHE"] ?? getDefaultCachePath(); } function getHFHubCachePath() { return process.env["HF_HUB_CACHE"] ?? getHuggingFaceHubCache(); } var FILES_TO_IGNORE = [".DS_Store"]; var REPO_ID_SEPARATOR = "--"; function getRepoFolderName({ name, type }) { const parts = [`${type}s`, ...name.split("/")]; return parts.join(REPO_ID_SEPARATOR); } async function scanCacheDir(cacheDir = void 0) { if (!cacheDir) cacheDir = getHFHubCachePath(); const s = await stat(cacheDir); if (!s.isDirectory()) { throw new Error( `Scan cache expects a directory but found a file: ${cacheDir}. Please use \`cacheDir\` argument or set \`HF_HUB_CACHE\` environment variable.` ); } const repos = []; const warnings = []; const directories = await readdir(cacheDir); for (const repo of directories) { if (repo === ".locks") continue; const absolute = join(cacheDir, repo); const s2 = await stat(absolute); if (!s2.isDirectory()) { continue; } try { const cached = await scanCachedRepo(absolute); repos.push(cached); } catch (err) { warnings.push(err); } } return { repos, size: [...repos.values()].reduce((sum, repo) => sum + repo.size, 0), warnings }; } async function scanCachedRepo(repoPath) { const name = basename(repoPath); if (!name.includes(REPO_ID_SEPARATOR)) { throw new Error(`Repo path is not a valid HuggingFace cache directory: ${name}`); } const [type, ...remaining] = name.split(REPO_ID_SEPARATOR); const repoType = parseRepoType(type); const repoId = remaining.join("/"); const snapshotsPath = join(repoPath, "snapshots"); const refsPath = join(repoPath, "refs"); const snapshotStat = await stat(snapshotsPath); if (!snapshotStat.isDirectory()) { throw new Error(`Snapshots dir doesn't exist in cached repo ${snapshotsPath}`); } const refsByHash = /* @__PURE__ */ new Map(); const refsStat = await stat(refsPath); if (refsStat.isDirectory()) { await scanRefsDir(refsPath, refsByHash); } const cachedRevisions = []; const blobStats = /* @__PURE__ */ new Map(); const snapshotDirs = await readdir(snapshotsPath); for (const dir of snapshotDirs) { if (FILES_TO_IGNORE.includes(dir)) continue; const revisionPath = join(snapshotsPath, dir); const revisionStat = await stat(revisionPath); if (!revisionStat.isDirectory()) { throw new Error(`Snapshots folder corrupted. Found a file: ${revisionPath}`); } const cachedFiles = []; await scanSnapshotDir(revisionPath, cachedFiles, blobStats); const revisionLastModified = cachedFiles.length > 0 ? Math.max(...[...cachedFiles].map((file) => file.blob.lastModifiedAt.getTime())) : revisionStat.mtimeMs; cachedRevisions.push({ commitOid: dir, files: cachedFiles, refs: refsByHash.get(dir) || [], size: [...cachedFiles].reduce((sum, file) => sum + file.blob.size, 0), path: revisionPath, lastModifiedAt: new Date(revisionLastModified) }); refsByHash.delete(dir); } if (refsByHash.size > 0) { throw new Error( `Reference(s) refer to missing commit hashes: ${JSON.stringify(Object.fromEntries(refsByHash))} (${repoPath})` ); } const repoStats = await stat(repoPath); const repoLastAccessed = blobStats.size > 0 ? Math.max(...[...blobStats.values()].map((stat3) => stat3.atimeMs)) : repoStats.atimeMs; const repoLastModified = blobStats.size > 0 ? Math.max(...[...blobStats.values()].map((stat3) => stat3.mtimeMs)) : repoStats.mtimeMs; return { id: { name: repoId, type: repoType }, path: repoPath, filesCount: blobStats.size, revisions: cachedRevisions, size: [...blobStats.values()].reduce((sum, stat3) => sum + stat3.size, 0), lastAccessedAt: new Date(repoLastAccessed), lastModifiedAt: new Date(repoLastModified) }; } async function scanRefsDir(refsPath, refsByHash) { const refFiles = await readdir(refsPath, { withFileTypes: true }); for (const refFile of refFiles) { const refFilePath = join(refsPath, refFile.name); if (refFile.isDirectory()) continue; const commitHash = await readFile(refFilePath, "utf-8"); const refName = refFile.name; if (!refsByHash.has(commitHash)) { refsByHash.set(commitHash, []); } refsByHash.get(commitHash)?.push(refName); } } async function scanSnapshotDir(revisionPath, cachedFiles, blobStats) { const files = await readdir(revisionPath, { withFileTypes: true }); for (const file of files) { if (file.isDirectory()) continue; const filePath = join(revisionPath, file.name); const blobPath = await realpath(filePath); const blobStat = await getBlobStat(blobPath, blobStats); cachedFiles.push({ path: filePath, blob: { path: blobPath, size: blobStat.size, lastAccessedAt: new Date(blobStat.atimeMs), lastModifiedAt: new Date(blobStat.mtimeMs) } }); } } async function getBlobStat(blobPath, blobStats) { const blob = blobStats.get(blobPath); if (!blob) { const statResult = await lstat(blobPath); blobStats.set(blobPath, statResult); return statResult; } return blob; } function parseRepoType(type) { switch (type) { case "models": return "model"; case "datasets": return "dataset"; case "spaces": return "space"; default: throw new TypeError(`Invalid repo type: ${type}`); } } // src/consts.ts var HUB_URL = "https://huggingface.co"; // src/error.ts async function createApiError(response, opts) { const error = new HubApiError(response.url, response.status, response.headers.get("X-Request-Id") ?? opts?.requestId); error.message = `Api error with status ${error.statusCode}${opts?.message ? `. ${opts.message}` : ""}`; const trailer = [`URL: ${error.url}`, error.requestId ? `Request ID: ${error.requestId}` : void 0].filter(Boolean).join(". "); if (response.headers.get("Content-Type")?.startsWith("application/json")) { const json = await response.json(); error.message = json.error || json.message || error.message; if (json.error_description) { error.message = error.message ? error.message + `: ${json.error_description}` : json.error_description; } error.data = json; } else { error.data = { message: await response.text() }; } error.message += `. ${trailer}`; throw error; } var HubApiError = class extends Error { statusCode; url; requestId; data; constructor(url, statusCode, requestId, message) { super(message); this.statusCode = statusCode; this.requestId = requestId; this.url = url; } }; var InvalidApiResponseFormatError = class extends Error { }; // src/utils/checkCredentials.ts function checkAccessToken(accessToken) { if (!accessToken.startsWith("hf_")) { throw new TypeError("Your access token must start with 'hf_'"); } } function checkCredentials(params) { if (params.accessToken) { checkAccessToken(params.accessToken); return params.accessToken; } if (params.credentials?.accessToken) { checkAccessToken(params.credentials.accessToken); return params.credentials.accessToken; } } // src/utils/toRepoId.ts function toRepoId(repo) { if (typeof repo !== "string") { return repo; } if (repo.startsWith("model/") || repo.startsWith("models/")) { throw new TypeError( "A repo designation for a model should not start with 'models/', directly specify the model namespace / name" ); } if (repo.startsWith("space/")) { throw new TypeError("Spaces should start with 'spaces/', plural, not 'space/'"); } if (repo.startsWith("dataset/")) { throw new TypeError("Datasets should start with 'dataset/', plural, not 'dataset/'"); } const slashes = repo.split("/").length - 1; if (repo.startsWith("spaces/")) { if (slashes !== 2) { throw new TypeError("Space Id must include namespace and name of the space"); } return { type: "space", name: repo.slice("spaces/".length) }; } if (repo.startsWith("datasets/")) { if (slashes > 2) { throw new TypeError("Too many slashes in repo designation: " + repo); } return { type: "dataset", name: repo.slice("datasets/".length) }; } if (slashes > 1) { throw new TypeError("Too many slashes in repo designation: " + repo); } return { type: "model", name: repo }; } // src/lib/check-repo-access.ts async function checkRepoAccess(params) { const accessToken = params && checkCredentials(params); const repoId = toRepoId(params.repo); const response = await (params.fetch || fetch)(`${params?.hubUrl || HUB_URL}/api/${repoId.type}s/${repoId.name}`, { headers: { ...accessToken ? { Authorization: `Bearer ${accessToken}` } : {} } }); if (!response.ok) { throw await createApiError(response); } } // src/utils/eventToGenerator.ts async function* eventToGenerator(cb) { const promises = []; function addPromise() { let resolve3; let reject; const p = new Promise((res, rej) => { resolve3 = res; reject = rej; }); promises.push({ p, resolve: resolve3, reject }); } addPromise(); const callbackRes = Promise.resolve().then( () => cb( (y) => { addPromise(); promises.at(-2)?.resolve({ done: false, value: y }); }, (r) => { addPromise(); promises.at(-2)?.resolve({ done: true, value: r }); }, (err) => promises.shift()?.reject(err) ) ).catch((err) => promises.shift()?.reject(err)); while (1) { const p = promises[0]; if (!p) { throw new Error("Logic error in eventGenerator, promises should never be empty"); } const result = await p.p; promises.shift(); if (result.done) { await callbackRes; return result.value; } yield result.value; } throw new Error("Unreachable"); } // src/utils/hexFromBytes.ts function hexFromBytes(arr) { if (globalThis.Buffer) { return globalThis.Buffer.from(arr).toString("hex"); } else { const bin = []; arr.forEach((byte) => { bin.push(byte.toString(16).padStart(2, "0")); }); return bin.join(""); } } // src/utils/isBackend.ts var isBrowser = typeof window !== "undefined" && typeof window.document !== "undefined"; var isWebWorker = typeof self === "object" && self.constructor && self.constructor.name === "DedicatedWorkerGlobalScope"; var isBackend = !isBrowser && !isWebWorker; // src/utils/isFrontend.ts var isFrontend = !isBackend; // src/utils/sha256.ts async function getWebWorkerCode() { const sha256Module = await import("./sha256-wrapper-DYTB3MXW.mjs"); return URL.createObjectURL(new Blob([sha256Module.createSHA256WorkerCode()])); } var pendingWorkers = []; var runningWorkers = /* @__PURE__ */ new Set(); var resolve; var waitPromise = new Promise((r) => { resolve = r; }); async function getWorker(poolSize) { { const worker2 = pendingWorkers.pop(); if (worker2) { runningWorkers.add(worker2); return worker2; } } if (!poolSize) { const worker2 = new Worker(await getWebWorkerCode()); runningWorkers.add(worker2); return worker2; } if (poolSize <= 0) { throw new TypeError("Invalid webworker pool size: " + poolSize); } while (runningWorkers.size >= poolSize) { await waitPromise; } const worker = new Worker(await getWebWorkerCode()); runningWorkers.add(worker); return worker; } async function freeWorker(worker, poolSize) { if (!poolSize) { return destroyWorker(worker); } runningWorkers.delete(worker); pendingWorkers.push(worker); const r = resolve; waitPromise = new Promise((r2) => { resolve = r2; }); r(); } function destroyWorker(worker) { runningWorkers.delete(worker); worker.terminate(); const r = resolve; waitPromise = new Promise((r2) => { resolve = r2; }); r(); } async function* sha256(buffer, opts) { yield 0; const maxCryptoSize = typeof opts?.useWebWorker === "object" && opts?.useWebWorker.minSize !== void 0 ? opts.useWebWorker.minSize : 1e7; if (buffer.size < maxCryptoSize && globalThis.crypto?.subtle) { const res = hexFromBytes( new Uint8Array( await globalThis.crypto.subtle.digest("SHA-256", buffer instanceof Blob ? await buffer.arrayBuffer() : buffer) ) ); yield 1; return res; } if (isFrontend) { if (opts?.useWebWorker) { try { const poolSize = typeof opts?.useWebWorker === "object" ? opts.useWebWorker.poolSize : void 0; const worker = await getWorker(poolSize); let messageHandler; let errorHandler; const cleanup = () => { worker.removeEventListener("message", messageHandler); worker.removeEventListener("error", errorHandler); }; return yield* eventToGenerator((yieldCallback, returnCallback, rejectCallback) => { messageHandler = (event) => { if (event.data.sha256) { cleanup(); freeWorker(worker, poolSize); returnCallback(event.data.sha256); } else if (event.data.progress) { yieldCallback(event.data.progress); try { opts.abortSignal?.throwIfAborted(); } catch (err) { cleanup(); destroyWorker(worker); rejectCallback(err); } } else { cleanup(); destroyWorker(worker); rejectCallback(event); } }; errorHandler = (event) => { cleanup(); destroyWorker(worker); rejectCallback(event.error); }; if (opts?.abortSignal) { try { opts.abortSignal?.throwIfAborted(); } catch (err) { cleanup(); destroyWorker(worker); rejectCallback(opts.abortSignal.reason ?? new DOMException("Aborted", "AbortError")); return; } const abortListener = () => { cleanup(); destroyWorker(worker); rejectCallback(opts.abortSignal?.reason ?? new DOMException("Aborted", "AbortError")); opts.abortSignal?.removeEventListener("abort", abortListener); }; opts.abortSignal.addEventListener("abort", abortListener); } worker.addEventListener("message", messageHandler); worker.addEventListener("error", errorHandler); worker.postMessage({ file: buffer }); }); } catch (err) { console.warn("Failed to use web worker for sha256", err); } } if (!wasmModule) { wasmModule = await import("./sha256-wrapper-DYTB3MXW.mjs"); } const sha2562 = await wasmModule.createSHA256(); sha2562.init(); const reader = buffer.stream().getReader(); const total = buffer.size; let bytesDone = 0; while (true) { const { done, value } = await reader.read(); if (done) { break; } sha2562.update(value); bytesDone += value.length; yield bytesDone / total; opts?.abortSignal?.throwIfAborted(); } return sha2562.digest("hex"); } if (!cryptoModule) { cryptoModule = await import("./sha256-node-2YU2V4BH.mjs"); } return yield* cryptoModule.sha256Node(buffer, { abortSignal: opts?.abortSignal }); } var cryptoModule; var wasmModule; // src/utils/range.ts function range(n, b) { return b ? Array(b - n).fill(0).map((_, i) => n + i) : Array(n).fill(0).map((_, i) => i); } // src/utils/chunk.ts function chunk(arr, chunkSize) { if (isNaN(chunkSize) || chunkSize < 1) { throw new RangeError("Invalid chunk size: " + chunkSize); } if (!arr.length) { return []; } if (arr.length <= chunkSize) { return [arr]; } return range(Math.ceil(arr.length / chunkSize)).map((i) => { return arr.slice(i * chunkSize, (i + 1) * chunkSize); }); } // src/utils/promisesQueue.ts async function promisesQueue(factories, concurrency) { const results = []; const executing = /* @__PURE__ */ new Set(); let index = 0; for (const factory of factories) { const closureIndex = index++; const e = factory().then((r) => { results[closureIndex] = r; executing.delete(e); }); executing.add(e); if (executing.size >= concurrency) { await Promise.race(executing); } } await Promise.all(executing); return results; } // src/utils/promisesQueueStreaming.ts async function promisesQueueStreaming(factories, concurrency) { const executing = []; for await (const factory of factories) { const e = factory().then(() => { executing.splice(executing.indexOf(e), 1); }); executing.push(e); if (executing.length >= concurrency) { await Promise.race(executing); } } await Promise.all(executing); } // src/utils/WebBlob.ts var WebBlob = class extends Blob { static async create(url, opts) { const customFetch = opts?.fetch ?? fetch; const response = await customFetch(url, { method: "HEAD", ...opts?.accessToken && { headers: { Authorization: `Bearer ${opts.accessToken}` } } }); const size = Number(response.headers.get("content-length")); const contentType = response.headers.get("content-type") || ""; const supportRange = response.headers.get("accept-ranges") === "bytes"; if (!supportRange || size < (opts?.cacheBelow ?? 1e6)) { return await (await customFetch(url)).blob(); } return new WebBlob(url, 0, size, contentType, true, customFetch, opts?.accessToken); } url; start; end; contentType; full; fetch; accessToken; constructor(url, start, end, contentType, full, customFetch, accessToken) { super([]); this.url = url; this.start = start; this.end = end; this.contentType = contentType; this.full = full; this.fetch = customFetch; this.accessToken = accessToken; } get size() { return this.end - this.start; } get type() { return this.contentType; } slice(start = 0, end = this.size) { if (start < 0 || end < 0) { new TypeError("Unsupported negative start/end on WebBlob.slice"); } const slice = new WebBlob( this.url, this.start + start, Math.min(this.start + end, this.end), this.contentType, start === 0 && end === this.size ? this.full : false, this.fetch, this.accessToken ); return slice; } async arrayBuffer() { const result = await this.fetchRange(); return result.arrayBuffer(); } async text() { const result = await this.fetchRange(); return result.text(); } stream() { const stream = new TransformStream(); this.fetchRange().then((response) => response.body?.pipeThrough(stream)).catch((error) => stream.writable.abort(error.message)); return stream.readable; } fetchRange() { const fetch2 = this.fetch; if (this.full) { return fetch2(this.url, { ...this.accessToken && { headers: { Authorization: `Bearer ${this.accessToken}` } } }).then((resp) => resp.ok ? resp : createApiError(resp)); } return fetch2(this.url, { headers: { Range: `bytes=${this.start}-${this.end - 1}`, ...this.accessToken && { Authorization: `Bearer ${this.accessToken}` } } }).then((resp) => resp.ok ? resp : createApiError(resp)); } }; // src/utils/base64FromBytes.ts function base64FromBytes(arr) { if (globalThis.Buffer) { return globalThis.Buffer.from(arr).toString("base64"); } else { const bin = []; arr.forEach((byte) => { bin.push(String.fromCharCode(byte)); }); return globalThis.btoa(bin.join("")); } } // src/utils/createBlobs.ts async function createBlobs(url, destPath, opts) { if (url.protocol === "http:" || url.protocol === "https:") { const blob = await WebBlob.create(url, { fetch: opts?.fetch, accessToken: opts?.accessToken }); return [{ path: destPath, blob }]; } if (isFrontend) { throw new TypeError(`Unsupported URL protocol "${url.protocol}"`); } if (url.protocol === "file:") { const { FileBlob } = await import("./FileBlob-GRW5ZULE.mjs"); const { subPaths } = await import("./sub-paths-WNN3FV5L.mjs"); const paths = await subPaths(url, opts?.maxFolderDepth); if (paths.length === 1 && paths[0].relativePath === ".") { const blob = await FileBlob.create(url); return [{ path: destPath, blob }]; } return Promise.all( paths.map(async (path2) => ({ path: `${destPath}/${path2.relativePath}`.replace(/\/[.]$/, "").replaceAll("//", "/").replace(/^[.]?\//, ""), blob: await FileBlob.create(new URL(path2.path)) })) ); } throw new TypeError(`Unsupported URL protocol "${url.protocol}"`); } // src/lib/commit.ts var CONCURRENT_SHAS = 5; var CONCURRENT_LFS_UPLOADS = 5; var MULTIPART_PARALLEL_UPLOAD = 5; function isFileOperation(op) { const ret = op.operation === "addOrUpdate"; if (ret && !(op.content instanceof Blob)) { throw new TypeError("Precondition failed: op.content should be a Blob"); } return ret; } async function* commitIter(params) { const accessToken = checkCredentials(params); const repoId = toRepoId(params.repo); yield { event: "phase", phase: "preuploading" }; const lfsShas = /* @__PURE__ */ new Map(); const abortController = new AbortController(); const abortSignal = abortController.signal; if (!abortSignal.throwIfAborted) { abortSignal.throwIfAborted = () => { if (abortSignal.aborted) { throw new DOMException("Aborted", "AbortError"); } }; } if (params.abortSignal) { params.abortSignal.addEventListener("abort", () => abortController.abort()); } try { const allOperations = (await Promise.all( params.operations.map(async (operation) => { if (operation.operation !== "addOrUpdate") { return operation; } if (!(operation.content instanceof URL)) { return { ...operation, content: operation.content }; } const lazyBlobs = await createBlobs(operation.content, operation.path, { fetch: params.fetch, maxFolderDepth: params.maxFolderDepth }); abortSignal?.throwIfAborted(); return lazyBlobs.map((blob) => ({ ...operation, content: blob.blob, path: blob.path })); }) )).flat(1); const gitAttributes = allOperations.filter(isFileOperation).find((op) => op.path === ".gitattributes")?.content; for (const operations of chunk(allOperations.filter(isFileOperation), 100)) { const payload = { gitAttributes: gitAttributes && await gitAttributes.text(), files: await Promise.all( operations.map(async (operation) => ({ path: operation.path, size: operation.content.size, sample: base64FromBytes(new Uint8Array(await operation.content.slice(0, 512).arrayBuffer())) })) ) }; abortSignal?.throwIfAborted(); const res = await (params.fetch ?? fetch)( `${params.hubUrl ?? HUB_URL}/api/${repoId.type}s/${repoId.name}/preupload/${encodeURIComponent( params.branch ?? "main" )}` + (params.isPullRequest ? "?create_pr=1" : ""), { method: "POST", headers: { ...accessToken && { Authorization: `Bearer ${accessToken}` }, "Content-Type": "application/json" }, body: JSON.stringify(payload), signal: abortSignal } ); if (!res.ok) { throw await createApiError(res); } const json = await res.json(); for (const file of json.files) { if (file.uploadMode === "lfs") { lfsShas.set(file.path, null); } } } yield { event: "phase", phase: "uploadingLargeFiles" }; for (const operations of chunk( allOperations.filter(isFileOperation).filter((op) => lfsShas.has(op.path)), 100 )) { const shas = yield* eventToGenerator((yieldCallback, returnCallback, rejectCallack) => { return promisesQueue( operations.map((op) => async () => { const iterator = sha256(op.content, { useWebWorker: params.useWebWorkers, abortSignal }); let res2; do { res2 = await iterator.next(); if (!res2.done) { yieldCallback({ event: "fileProgress", path: op.path, progress: res2.value, state: "hashing" }); } } while (!res2.done); const sha = res2.value; lfsShas.set(op.path, res2.value); return sha; }), CONCURRENT_SHAS ).then(returnCallback, rejectCallack); }); abortSignal?.throwIfAborted(); const payload = { operation: "upload", // multipart is a custom protocol for HF transfers: ["basic", "multipart"], hash_algo: "sha_256", ...!params.isPullRequest && { ref: { name: params.branch ?? "main" } }, objects: operations.map((op, i) => ({ oid: shas[i], size: op.content.size })) }; const res = await (params.fetch ?? fetch)( `${params.hubUrl ?? HUB_URL}/${repoId.type === "model" ? "" : repoId.type + "s/"}${repoId.name}.git/info/lfs/objects/batch`, { method: "POST", headers: { ...accessToken && { Authorization: `Bearer ${accessToken}` }, Accept: "application/vnd.git-lfs+json", "Content-Type": "application/vnd.git-lfs+json" }, body: JSON.stringify(payload), signal: abortSignal } ); if (!res.ok) { throw await createApiError(res); } const json = await res.json(); const batchRequestId = res.headers.get("X-Request-Id") || void 0; const shaToOperation = new Map(operations.map((op, i) => [shas[i], op])); yield* eventToGenerator((yieldCallback, returnCallback, rejectCallback) => { return promisesQueueStreaming( json.objects.map((obj) => async () => { const op = shaToOperation.get(obj.oid); if (!op) { throw new InvalidApiResponseFormatError("Unrequested object ID in response"); } abortSignal?.throwIfAborted(); if (obj.error) { const errorMessage = `Error while doing LFS batch call for ${operations[shas.indexOf(obj.oid)].path}: ${obj.error.message}${batchRequestId ? ` - Request ID: ${batchRequestId}` : ""}`; throw new HubApiError(res.url, obj.error.code, batchRequestId, errorMessage); } if (!obj.actions?.upload) { yieldCallback({ event: "fileProgress", path: op.path, progress: 1, state: "uploading" }); return; } yieldCallback({ event: "fileProgress", path: op.path, progress: 0, state: "uploading" }); const content = op.content; const header = obj.actions.upload.header; if (header?.chunk_size) { const chunkSize = parseInt(header.chunk_size); const completionUrl = obj.actions.upload.href; const parts = Object.keys(header).filter((key) => /^[0-9]+$/.test(key)); if (parts.length !== Math.ceil(content.size / chunkSize)) { throw new Error("Invalid server response to upload large LFS file, wrong number of parts"); } const completeReq = { oid: obj.oid, parts: parts.map((part) => ({ partNumber: +part, etag: "" })) }; const progressCallback = (progress) => yieldCallback({ event: "fileProgress", path: op.path, progress, state: "uploading" }); await promisesQueueStreaming( parts.map((part) => async () => { abortSignal?.throwIfAborted(); const index = parseInt(part) - 1; const slice = content.slice(index * chunkSize, (index + 1) * chunkSize); const res3 = await (params.fetch ?? fetch)(header[part], { method: "PUT", /** Unfortunately, browsers don't support our inherited version of Blob in fetch calls */ body: slice instanceof WebBlob && isFrontend ? await slice.arrayBuffer() : slice, signal: abortSignal, ...{ progressHint: { path: op.path, part: index, numParts: parts.length, progressCallback } // eslint-disable-next-line @typescript-eslint/no-explicit-any } }); if (!res3.ok) { throw await createApiError(res3, { requestId: batchRequestId, message: `Error while uploading part ${part} of ${operations[shas.indexOf(obj.oid)].path} to LFS storage` }); } const eTag = res3.headers.get("ETag"); if (!eTag) { throw new Error("Cannot get ETag of part during multipart upload"); } completeReq.parts[Number(part) - 1].etag = eTag; }), MULTIPART_PARALLEL_UPLOAD ); abortSignal?.throwIfAborted(); const res2 = await (params.fetch ?? fetch)(completionUrl, { method: "POST", body: JSON.stringify(completeReq), headers: { Accept: "application/vnd.git-lfs+json", "Content-Type": "application/vnd.git-lfs+json" }, signal: abortSignal }); if (!res2.ok) { throw await createApiError(res2, { requestId: batchRequestId, message: `Error completing multipart upload of ${operations[shas.indexOf(obj.oid)].path} to LFS storage` }); } yieldCallback({ event: "fileProgress", path: op.path, progress: 1, state: "uploading" }); } else { const res2 = await (params.fetch ?? fetch)(obj.actions.upload.href, { method: "PUT", headers: { ...batchRequestId ? { "X-Request-Id": batchRequestId } : void 0 }, /** Unfortunately, browsers don't support our inherited version of Blob in fetch calls */ body: content instanceof WebBlob && isFrontend ? await content.arrayBuffer() : content, signal: abortSignal, ...{ progressHint: { path: op.path, progressCallback: (progress) => yieldCallback({ event: "fileProgress", path: op.path, progress, state: "uploading" }) } // eslint-disable-next-line @typescript-eslint/no-explicit-any } }); if (!res2.ok) { throw await createApiError(res2, { requestId: batchRequestId, message: `Error while uploading ${operations[shas.indexOf(obj.oid)].path} to LFS storage` }); } yieldCallback({ event: "fileProgress", path: op.path, progress: 1, state: "uploading" }); } }), CONCURRENT_LFS_UPLOADS ).then(returnCallback, rejectCallback); }); } abortSignal?.throwIfAborted(); yield { event: "phase", phase: "committing" }; return yield* eventToGenerator( async (yieldCallback, returnCallback, rejectCallback) => (params.fetch ?? fetch)( `${params.hubUrl ?? HUB_URL}/api/${repoId.type}s/${repoId.name}/commit/${encodeURIComponent( params.branch ?? "main" )}` + (params.isPullRequest ? "?create_pr=1" : ""), { method: "POST", headers: { ...accessToken && { Authorization: `Bearer ${accessToken}` }, "Content-Type": "application/x-ndjson" }, body: [ { key: "header", value: { summary: params.title, description: params.description, parentCommit: params.parentCommit } }, ...await Promise.all( allOperations.map((operation) => { if (isFileOperation(operation)) { const sha = lfsShas.get(operation.path); if (sha) { return { key: "lfsFile", value: { path: operation.path, algo: "sha256", size: operation.content.size, oid: sha } }; } } return convertOperationToNdJson(operation); }) ) ].map((x) => JSON.stringify(x)).join("\n"), signal: abortSignal, ...{ progressHint: { progressCallback: (progress) => { for (const op of allOperations) { if (isFileOperation(op) && !lfsShas.has(op.path)) { yieldCallback({ event: "fileProgress", path: op.path, progress, state: "uploading" }); } } } } // eslint-disable-next-line @typescript-eslint/no-explicit-any } } ).then(async (res) => { if (!res.ok) { throw await createApiError(res); } const json = await res.json(); returnCallback({ pullRequestUrl: json.pullRequestUrl, commit: { oid: json.commitOid, url: json.commitUrl }, hookOutput: json.hookOutput }); }).catch(rejectCallback) ); } catch (err) { abortController.abort(); throw err; } } async function commit(params) { const iterator = commitIter(params); let res = await iterator.next(); while (!res.done) { res = await iterator.next(); } return res.value; } async function convertOperationToNdJson(operation) { switch (operation.operation) { case "addOrUpdate": { return { key: "file", value: { content: base64FromBytes(new Uint8Array(await operation.content.arrayBuffer())), path: operation.path, encoding: "base64" } }; } case "delete": { return { key: "deletedFile", value: { path: operation.path } }; } default: throw new TypeError("Unknown operation: " + operation.operation); } } // src/lib/count-commits.ts async function countCommits(params) { const accessToken = checkCredentials(params); const repoId = toRepoId(params.repo); const url = `${params.hubUrl ?? HUB_URL}/api/${repoId.type}s/${repoId.name}/commits/${params.revision ?? "main"}?limit=1`; const res = await (params.fetch ?? fetch)(url, { headers: accessToken ? { Authorization: `Bearer ${accessToken}` } : {} }); if (!res.ok) { throw await createApiError(res); } return parseInt(res.headers.get("x-total-count") ?? "0", 10); } // src/lib/create-repo.ts async function createRepo(params) { const accessToken = checkCredentials(params); const repoId = toRepoId(params.repo); const [namespace, repoName] = repoId.name.split("/"); if (!namespace || !repoName) { throw new TypeError( `"${repoId.name}" is not a fully qualified repo name. It should be of the form "{namespace}/{repoName}".` ); } const res = await (params.fetch ?? fetch)(`${params.hubUrl ?? HUB_URL}/api/repos/create`, { method: "POST", body: JSON.stringify({ name: repoName, private: params.private, organization: namespace, license: params.license, ...repoId.type === "space" ? { type: "space", sdk: "static" } : { type: repoId.type }, files: params.files ? await Promise.all( params.files.map(async (file) => ({ encoding: "base64", path: file.path, content: base64FromBytes( new Uint8Array(file.content instanceof Blob ? await file.content.arrayBuffer() : file.content) ) })) ) : void 0 }), headers: { Authorization: `Bearer ${accessToken}`, "Content-Type": "application/json" } }); if (!res.ok) { throw await createApiError(res); } const output = await res.json(); return { repoUrl: output.url }; } // src/lib/create-branch.ts async function createBranch(params) { const repoId = toRepoId(params.repo); const res = await (params.fetch ?? fetch)( `${params.hubUrl ?? HUB_URL}/api/${repoId.type}s/${repoId.name}/branch/${encodeURIComponent(params.branch)}`, { method: "POST", headers: { "Content-Type": "application/json", ...params.accessToken && { Authorization: `Bearer ${params.accessToken}` } }, body: JSON.stringify({ startingPoint: params.revision, ...params.empty && { emptyBranch: true }, overwrite: params.overwrite }) } ); if (!res.ok) { throw await createApiError(res); } } // src/lib/create-collection.ts async function createCollection(params) { const accessToken = checkCredentials(params); const res = await (params.fetch ?? fetch)(`${params.hubUrl ?? HUB_URL}/api/collections`, { method: "POST", body: JSON.stringify(params.collection), headers: { Authorization: `Bearer ${accessToken}`, "Content-Type": "application/json" } }); if (!res.ok) { throw await createApiError(res); } const output = await res.json(); return { slug: output.slug }; } // src/utils/parseLinkHeader.ts function parseLinkHeader(header) { const regex = /<(https?:[/][/][^>]+)>;\s+rel="([^"]+)"/g; return Object.fromEntries([...header.matchAll(regex)].map(([, url, rel]) => [rel, url])); } // src/utils/pick.ts function pick(o, props) { return Object.assign( {}, ...props.map((prop) => { if (o[prop] !== void 0) { return { [prop]: o[prop] }; } }) ); } // src/lib/list-datasets.ts var DATASET_EXPAND_KEYS = [ "private", "downloads", "gated", "likes", "lastModified" ]; var DATASET_EXPANDABLE_KEYS = [ "author", "cardData", "citation", "createdAt", "disabled", "description", "downloads", "downloadsAllTime", "gated", "gitalyUid", "lastModified", "likes", "paperswithcode_id", "private", // "siblings", "sha", "tags" ]; async function* listDatasets(params) { const accessToken = params && checkCredentials(params); let totalToFetch = params?.limit ?? Infinity; const search = new URLSearchParams([ ...Object.entries({ limit: String(Math.min(totalToFetch, 500)), ...params?.search?.owner ? { author: params.search.owner } : void 0, ...params?.search?.query ? { search: params.search.query } : void 0 }), ...params?.search?.tags?.map((tag) => ["filter", tag]) ?? [], ...DATASET_EXPAND_KEYS.map((val) => ["expand", val]), ...params?.additionalFields?.map((val) => ["expand", val]) ?? [] ]).toString(); let url = `${params?.hubUrl || HUB_URL}/api/datasets` + (search ? "?" + search : ""); while (url) { const res = await (params?.fetch ?? fetch)(url, { headers: { accept: "application/json", ...accessToken ? { Authorization: `Bearer ${accessToken}` } : void 0 } }); if (!res.ok) { throw await createApiError(res); } const items = await res.json(); for (const item of items) { yield { ...params?.additionalFields && pick(item, params.additionalFields), id: item._id, name: item.id, private: item.private, downloads: item.downloads, likes: item.likes, gated: item.gated, updatedAt: new Date(item.lastModified) }; totalToFetch--; if (totalToFetch <= 0) { return; } } const linkHeader = res.headers.get("Link"); url = linkHeader ? parseLinkHeader(linkHeader).next : void 0; } } // src/lib/dataset-info.ts async function datasetInfo(params) { const accessToken = params && checkCredentials(params); const search = new URLSearchParams([ ...DATASET_EXPAND_KEYS.map((val) => ["expand", val]), ...params?.additionalFields?.map((val) => ["expand", val]) ?? [] ]).toString(); const response = await (params.fetch || fetch)( `${params?.hubUrl || HUB_URL}/api/datasets/${params.name}/revision/${encodeURIComponent( params.revision ?? "HEAD" )}?${search.toString()}`, { headers: { ...accessToken ? { Authorization: `Bearer ${accessToken}` } : {}, Accepts: "application/json" } } ); if (!response.ok) { throw await createApiError(response); } const data = await response.json(); return { ...params?.additionalFields && pick(data, params.additionalFields), id: data._id, name: data.id, private: data.private, downloads: data.downloads, likes: data.likes, gated: data.gated, updatedAt: new Date(data.lastModified) }; } // src/lib/delete-branch.ts async function deleteBranch(params) { const repoId = toRepoId(params.repo); const res = await (params.fetch ?? fetch)( `${params.hubUrl ?? HUB_URL}/api/${repoId.type}s/${repoId.name}/branch/${encodeURIComponent(params.branch)}`, { method: "DELETE", headers: { ...params.accessToken && { Authorization: `Bearer ${params.accessToken}` } } } ); if (!res.ok) { throw await createApiError(res); } } // src/lib/delete-file.ts function deleteFile(params) { return commit({ ...params.accessToken ? { accessToken: params.accessToken } : { credentials: params.credentials }, repo: params.repo, operations: [ { operation: "delete", path: params.path } ], title: params.commitTitle ?? `Delete ${params.path}`, description: params.commitDescription, hubUrl: params.hubUrl, branch: params.branch, isPullRequest: params.isPullRequest, parentCommit: params.parentCommit, fetch: params.fetch }); } // src/lib/delete-files.ts function deleteFiles(params) { return commit({ ...params.accessToken ? { accessToken: params.accessToken } : { credentials: params.credentials }, repo: params.repo, operations: params.paths.map((path2) => ({ operation: "delete", path: path2 })), title: params.commitTitle ?? `Deletes ${params.paths.length} files`, description: params.commitDescription, hubUrl: params.hubUrl, branch: params.branch, isPullRequest: params.isPullRequest, parentCommit: params.parentCommit, fetch: params.fetch }); } // src/lib/delete-repo.ts async function deleteRepo(params) { const accessToken = checkCredentials(params); const repoId = toRepoId(params.repo); const [namespace, repoName] = repoId.name.split("/"); const res = await (params.fetch ?? fetch)(`${params.hubUrl ?? HUB_URL}/api/repos/delete`, { method: "DELETE", body: JSON.stringify({ name: repoName, organization: namespace, type: repoId.type }), headers: { Authorization: `Bearer ${accessToken}`, "Content-Type": "application/json" } }); if (!res.ok) { throw await createApiError(res); } } // src/lib/delete-collection.ts async function deleteCollection(params) { const accessToken = checkCredentials(params); const res = await (params.fetch ?? fetch)(`${params.hubUrl ?? HUB_URL}/api/collections/${params.slug}`, { method: "DELETE", headers: { Authorization: `Bearer ${accessToken}`, "Content-Type": "application/json" } }); if (!res.ok) { throw await createApiError(res); } } // src/vendor/lz4js/util.ts function readU64(b, n) { let x = 0; x |= b[n++] << 0; x |= b[n++] << 8; x |= b[n++] << 16; x |= b[n++] << 24; x |= b[n++] << 32; x |= b[n++] << 40; x |= b[n++] << 48; x |= b[n++] << 56; return x; } function readU32(b, n) { let x = 0; x |= b[n++] << 0; x |= b[n++] << 8; x |= b[n++] << 16; x |= b[n++] << 24; return x; } // src/vendor/lz4js/index.ts var minMatch = 4; var hashSize = 1 << 16; var mlBits = 4; var mlMask = (1 << mlBits) - 1; var runBits = 4; var runMask = (1 << runBits) - 1; var blockBuf = makeBuffer(5 << 20); var hashTable = makeHashTable(); var magicNum = 407708164; var fdContentChksum = 4; var fdContentSize = 8; var fdBlockChksum = 16; var fdVersion = 64; var fdVersionMask = 192; var bsUncompressed = 2147483648; var bsShift = 4; var bsMask = 7; var bsMap = { 4: 65536, 5: 262144, 6: 1048576, 7: 4194304 }; function makeHashTable() { try { return new Uint32Array(hashSize); } catch (error) { const hashTable2 = new Array(hashSize); for (let i = 0; i < hashSize; i++) { hashTable2[i] = 0; } return hashTable2; } } function makeBuffer(size) { return new Uint8Array(size); } function sliceArray(array, start, end) { return array.slice(start, end); } function decompressBound(src) { let sIndex = 0; if (readU32(src, sIndex) !== magicNum) { throw new Error("invalid magic number"); } sIndex += 4; const descriptor = src[sIndex++]; if ((descriptor & fdVersionMask) !== fdVersion) { throw new Error("incompatible descriptor version " + (descriptor & fdVersionMask)); } const useBlockSum = (descriptor & fdBlockChksum) !== 0; const useContentSize = (descriptor & fdContentSize) !== 0; const bsIdx = src[sIndex++] >> bsShift & bsMask; if (bsMap[bsIdx] === void 0) { throw new Error("invalid block size " + bsIdx); } const maxBlockSize = bsMap[bsIdx]; if (useContentSize) { return readU64(src, sIndex); } sIndex++; let maxSize = 0; while (true) { let blockSize = readU32(src, sIndex); sIndex += 4; if (blockSize & bsUncompressed) { blockSize &= ~bsUncompressed; maxSize += blockSize; } else if (blockSize > 0) { maxSize += maxBlockSize; } if (blockSize === 0) { return maxSize; } if (useBlockSum) { sIndex += 4; } sIndex += blockSize; } } function decompressBlock(src, dst, sIndex, sLength, dIndex) { let mLength, mOffset, sEnd, n, i; const hasCopyWithin = dst.copyWithin !== void 0 && dst.fill !== void 0; sEnd = sIndex + sLength; while (sIndex < sEnd) { const token = src[sIndex++]; let literalCount = token >> 4; if (literalCount > 0) { if (literalCount === 15) { while (true) { literalCount += src[sIndex]; if (src[sIndex++] !== 255) { break; } } } for (n = sIndex + literalCount; sIndex < n; ) { dst[dIndex++] = src[sIndex++]; } } if (sIndex >= sEnd) { break; } mLength = token & 15; mOffset = src[sIndex++] | src[sIndex++] << 8; if (mLength === 15) { while (true) { mLength += src[sIndex]; if (src[sIndex++] !== 255) { break; } } } mLength += minMatch; if (hasCopyWithin && mOffset === 1) { dst.fill(dst[dIndex - 1] | 0, dIndex, dIndex + mLength); dIndex += mLength; } else if (hasCopyWithin && mOffset > mLength && mLength > 31) { dst.copyWithin(dIndex, dIndex - mOffset, dIndex - mOffset + mLength); dIndex += mLength; } else { for (i = dIndex - mOffset, n = i + mLength; i < n; ) { dst[dIndex++] = dst[i++] | 0; } } } return dIndex; } function decompressFrame(src, dst) { let useBlockSum, useContentSum, useContentSize, descriptor; let sIndex = 0; let dIndex = 0; if (readU32(src, sIndex) !== magicNum) { throw new Error("invalid magic number"); } sIndex += 4; descriptor = src[sIndex++]; if ((descriptor & fdVersionMask) !== fdVersion) { throw new Error("incompatible descriptor version"); } useBlockSum = (descriptor & fdBlockChksum) !== 0; useContentSum = (descriptor & fdContentChksum) !== 0; useContentSize = (descriptor & fdContentSize) !== 0; const bsIdx = src[sIndex++] >> bsShift & bsMask; if (bsMap[bsIdx] === void 0) { throw new Error("invalid block size"); } if (useContentSize) { s