@huggingface/hub
Version:
Utilities to interact with the Hugging Face hub
1,546 lines (1,510 loc) • 103 kB
JavaScript
// src/lib/cache-management.ts
import { homedir } from "os";
import { join, basename } from "path";
import { stat, readdir, readFile, realpath, lstat } from "fs/promises";
function getDefaultHome() {
return join(homedir(), ".cache");
}
function getDefaultCachePath() {
return join(process.env["HF_HOME"] ?? join(process.env["XDG_CACHE_HOME"] ?? getDefaultHome(), "huggingface"), "hub");
}
function getHuggingFaceHubCache() {
return process.env["HUGGINGFACE_HUB_CACHE"] ?? getDefaultCachePath();
}
function getHFHubCachePath() {
return process.env["HF_HUB_CACHE"] ?? getHuggingFaceHubCache();
}
var FILES_TO_IGNORE = [".DS_Store"];
var REPO_ID_SEPARATOR = "--";
function getRepoFolderName({ name, type }) {
const parts = [`${type}s`, ...name.split("/")];
return parts.join(REPO_ID_SEPARATOR);
}
async function scanCacheDir(cacheDir = void 0) {
if (!cacheDir)
cacheDir = getHFHubCachePath();
const s = await stat(cacheDir);
if (!s.isDirectory()) {
throw new Error(
`Scan cache expects a directory but found a file: ${cacheDir}. Please use \`cacheDir\` argument or set \`HF_HUB_CACHE\` environment variable.`
);
}
const repos = [];
const warnings = [];
const directories = await readdir(cacheDir);
for (const repo of directories) {
if (repo === ".locks")
continue;
const absolute = join(cacheDir, repo);
const s2 = await stat(absolute);
if (!s2.isDirectory()) {
continue;
}
try {
const cached = await scanCachedRepo(absolute);
repos.push(cached);
} catch (err) {
warnings.push(err);
}
}
return {
repos,
size: [...repos.values()].reduce((sum, repo) => sum + repo.size, 0),
warnings
};
}
async function scanCachedRepo(repoPath) {
const name = basename(repoPath);
if (!name.includes(REPO_ID_SEPARATOR)) {
throw new Error(`Repo path is not a valid HuggingFace cache directory: ${name}`);
}
const [type, ...remaining] = name.split(REPO_ID_SEPARATOR);
const repoType = parseRepoType(type);
const repoId = remaining.join("/");
const snapshotsPath = join(repoPath, "snapshots");
const refsPath = join(repoPath, "refs");
const snapshotStat = await stat(snapshotsPath);
if (!snapshotStat.isDirectory()) {
throw new Error(`Snapshots dir doesn't exist in cached repo ${snapshotsPath}`);
}
const refsByHash = /* @__PURE__ */ new Map();
const refsStat = await stat(refsPath);
if (refsStat.isDirectory()) {
await scanRefsDir(refsPath, refsByHash);
}
const cachedRevisions = [];
const blobStats = /* @__PURE__ */ new Map();
const snapshotDirs = await readdir(snapshotsPath);
for (const dir of snapshotDirs) {
if (FILES_TO_IGNORE.includes(dir))
continue;
const revisionPath = join(snapshotsPath, dir);
const revisionStat = await stat(revisionPath);
if (!revisionStat.isDirectory()) {
throw new Error(`Snapshots folder corrupted. Found a file: ${revisionPath}`);
}
const cachedFiles = [];
await scanSnapshotDir(revisionPath, cachedFiles, blobStats);
const revisionLastModified = cachedFiles.length > 0 ? Math.max(...[...cachedFiles].map((file) => file.blob.lastModifiedAt.getTime())) : revisionStat.mtimeMs;
cachedRevisions.push({
commitOid: dir,
files: cachedFiles,
refs: refsByHash.get(dir) || [],
size: [...cachedFiles].reduce((sum, file) => sum + file.blob.size, 0),
path: revisionPath,
lastModifiedAt: new Date(revisionLastModified)
});
refsByHash.delete(dir);
}
if (refsByHash.size > 0) {
throw new Error(
`Reference(s) refer to missing commit hashes: ${JSON.stringify(Object.fromEntries(refsByHash))} (${repoPath})`
);
}
const repoStats = await stat(repoPath);
const repoLastAccessed = blobStats.size > 0 ? Math.max(...[...blobStats.values()].map((stat3) => stat3.atimeMs)) : repoStats.atimeMs;
const repoLastModified = blobStats.size > 0 ? Math.max(...[...blobStats.values()].map((stat3) => stat3.mtimeMs)) : repoStats.mtimeMs;
return {
id: {
name: repoId,
type: repoType
},
path: repoPath,
filesCount: blobStats.size,
revisions: cachedRevisions,
size: [...blobStats.values()].reduce((sum, stat3) => sum + stat3.size, 0),
lastAccessedAt: new Date(repoLastAccessed),
lastModifiedAt: new Date(repoLastModified)
};
}
async function scanRefsDir(refsPath, refsByHash) {
const refFiles = await readdir(refsPath, { withFileTypes: true });
for (const refFile of refFiles) {
const refFilePath = join(refsPath, refFile.name);
if (refFile.isDirectory())
continue;
const commitHash = await readFile(refFilePath, "utf-8");
const refName = refFile.name;
if (!refsByHash.has(commitHash)) {
refsByHash.set(commitHash, []);
}
refsByHash.get(commitHash)?.push(refName);
}
}
async function scanSnapshotDir(revisionPath, cachedFiles, blobStats) {
const files = await readdir(revisionPath, { withFileTypes: true });
for (const file of files) {
if (file.isDirectory())
continue;
const filePath = join(revisionPath, file.name);
const blobPath = await realpath(filePath);
const blobStat = await getBlobStat(blobPath, blobStats);
cachedFiles.push({
path: filePath,
blob: {
path: blobPath,
size: blobStat.size,
lastAccessedAt: new Date(blobStat.atimeMs),
lastModifiedAt: new Date(blobStat.mtimeMs)
}
});
}
}
async function getBlobStat(blobPath, blobStats) {
const blob = blobStats.get(blobPath);
if (!blob) {
const statResult = await lstat(blobPath);
blobStats.set(blobPath, statResult);
return statResult;
}
return blob;
}
function parseRepoType(type) {
switch (type) {
case "models":
return "model";
case "datasets":
return "dataset";
case "spaces":
return "space";
default:
throw new TypeError(`Invalid repo type: ${type}`);
}
}
// src/consts.ts
var HUB_URL = "https://huggingface.co";
// src/error.ts
async function createApiError(response, opts) {
const error = new HubApiError(response.url, response.status, response.headers.get("X-Request-Id") ?? opts?.requestId);
error.message = `Api error with status ${error.statusCode}${opts?.message ? `. ${opts.message}` : ""}`;
const trailer = [`URL: ${error.url}`, error.requestId ? `Request ID: ${error.requestId}` : void 0].filter(Boolean).join(". ");
if (response.headers.get("Content-Type")?.startsWith("application/json")) {
const json = await response.json();
error.message = json.error || json.message || error.message;
if (json.error_description) {
error.message = error.message ? error.message + `: ${json.error_description}` : json.error_description;
}
error.data = json;
} else {
error.data = { message: await response.text() };
}
error.message += `. ${trailer}`;
throw error;
}
var HubApiError = class extends Error {
statusCode;
url;
requestId;
data;
constructor(url, statusCode, requestId, message) {
super(message);
this.statusCode = statusCode;
this.requestId = requestId;
this.url = url;
}
};
var InvalidApiResponseFormatError = class extends Error {
};
// src/utils/checkCredentials.ts
function checkAccessToken(accessToken) {
if (!accessToken.startsWith("hf_")) {
throw new TypeError("Your access token must start with 'hf_'");
}
}
function checkCredentials(params) {
if (params.accessToken) {
checkAccessToken(params.accessToken);
return params.accessToken;
}
if (params.credentials?.accessToken) {
checkAccessToken(params.credentials.accessToken);
return params.credentials.accessToken;
}
}
// src/utils/toRepoId.ts
function toRepoId(repo) {
if (typeof repo !== "string") {
return repo;
}
if (repo.startsWith("model/") || repo.startsWith("models/")) {
throw new TypeError(
"A repo designation for a model should not start with 'models/', directly specify the model namespace / name"
);
}
if (repo.startsWith("space/")) {
throw new TypeError("Spaces should start with 'spaces/', plural, not 'space/'");
}
if (repo.startsWith("dataset/")) {
throw new TypeError("Datasets should start with 'dataset/', plural, not 'dataset/'");
}
const slashes = repo.split("/").length - 1;
if (repo.startsWith("spaces/")) {
if (slashes !== 2) {
throw new TypeError("Space Id must include namespace and name of the space");
}
return {
type: "space",
name: repo.slice("spaces/".length)
};
}
if (repo.startsWith("datasets/")) {
if (slashes > 2) {
throw new TypeError("Too many slashes in repo designation: " + repo);
}
return {
type: "dataset",
name: repo.slice("datasets/".length)
};
}
if (slashes > 1) {
throw new TypeError("Too many slashes in repo designation: " + repo);
}
return {
type: "model",
name: repo
};
}
// src/lib/check-repo-access.ts
async function checkRepoAccess(params) {
const accessToken = params && checkCredentials(params);
const repoId = toRepoId(params.repo);
const response = await (params.fetch || fetch)(`${params?.hubUrl || HUB_URL}/api/${repoId.type}s/${repoId.name}`, {
headers: {
...accessToken ? { Authorization: `Bearer ${accessToken}` } : {}
}
});
if (!response.ok) {
throw await createApiError(response);
}
}
// src/utils/range.ts
function range(n, b) {
return b ? Array(b - n).fill(0).map((_, i) => n + i) : Array(n).fill(0).map((_, i) => i);
}
// src/utils/chunk.ts
function chunk(arr, chunkSize) {
if (isNaN(chunkSize) || chunkSize < 1) {
throw new RangeError("Invalid chunk size: " + chunkSize);
}
if (!arr.length) {
return [];
}
if (arr.length <= chunkSize) {
return [arr];
}
return range(Math.ceil(arr.length / chunkSize)).map((i) => {
return arr.slice(i * chunkSize, (i + 1) * chunkSize);
});
}
// src/utils/promisesQueue.ts
async function promisesQueue(factories, concurrency) {
const results = [];
const executing = /* @__PURE__ */ new Set();
let index = 0;
for (const factory of factories) {
const closureIndex = index++;
const e = factory().then((r) => {
results[closureIndex] = r;
executing.delete(e);
});
executing.add(e);
if (executing.size >= concurrency) {
await Promise.race(executing);
}
}
await Promise.all(executing);
return results;
}
// src/utils/promisesQueueStreaming.ts
async function promisesQueueStreaming(factories, concurrency) {
const executing = [];
for await (const factory of factories) {
const e = factory().then(() => {
executing.splice(executing.indexOf(e), 1);
});
executing.push(e);
if (executing.length >= concurrency) {
await Promise.race(executing);
}
}
await Promise.all(executing);
}
// src/utils/eventToGenerator.ts
async function* eventToGenerator(cb) {
const promises = [];
function addPromise() {
let resolve3;
let reject;
const p = new Promise((res, rej) => {
resolve3 = res;
reject = rej;
});
promises.push({ p, resolve: resolve3, reject });
}
addPromise();
const callbackRes = Promise.resolve().then(
() => cb(
(y) => {
addPromise();
promises.at(-2)?.resolve({ done: false, value: y });
},
(r) => {
addPromise();
promises.at(-2)?.resolve({ done: true, value: r });
},
(err) => promises.shift()?.reject(err)
)
).catch((err) => promises.shift()?.reject(err));
while (1) {
const p = promises[0];
if (!p) {
throw new Error("Logic error in eventGenerator, promises should never be empty");
}
const result = await p.p;
promises.shift();
if (result.done) {
await callbackRes;
return result.value;
}
yield result.value;
}
throw new Error("Unreachable");
}
// src/utils/hexFromBytes.ts
function hexFromBytes(arr) {
if (globalThis.Buffer) {
return globalThis.Buffer.from(arr).toString("hex");
} else {
const bin = [];
arr.forEach((byte) => {
bin.push(byte.toString(16).padStart(2, "0"));
});
return bin.join("");
}
}
// src/utils/isBackend.ts
var isBrowser = typeof window !== "undefined" && typeof window.document !== "undefined";
var isWebWorker = typeof self === "object" && self.constructor && self.constructor.name === "DedicatedWorkerGlobalScope";
var isBackend = !isBrowser && !isWebWorker;
// src/utils/isFrontend.ts
var isFrontend = !isBackend;
// src/utils/sha256.ts
async function getWebWorkerCode() {
const sha256Module = await import("./sha256-wrapper-DYTB3MXW.mjs");
return URL.createObjectURL(new Blob([sha256Module.createSHA256WorkerCode()]));
}
var pendingWorkers = [];
var runningWorkers = /* @__PURE__ */ new Set();
var resolve;
var waitPromise = new Promise((r) => {
resolve = r;
});
async function getWorker(poolSize) {
{
const worker2 = pendingWorkers.pop();
if (worker2) {
runningWorkers.add(worker2);
return worker2;
}
}
if (!poolSize) {
const worker2 = new Worker(await getWebWorkerCode());
runningWorkers.add(worker2);
return worker2;
}
if (poolSize <= 0) {
throw new TypeError("Invalid webworker pool size: " + poolSize);
}
while (runningWorkers.size >= poolSize) {
await waitPromise;
}
const worker = new Worker(await getWebWorkerCode());
runningWorkers.add(worker);
return worker;
}
async function freeWorker(worker, poolSize) {
if (!poolSize) {
return destroyWorker(worker);
}
runningWorkers.delete(worker);
pendingWorkers.push(worker);
const r = resolve;
waitPromise = new Promise((r2) => {
resolve = r2;
});
r();
}
function destroyWorker(worker) {
runningWorkers.delete(worker);
worker.terminate();
const r = resolve;
waitPromise = new Promise((r2) => {
resolve = r2;
});
r();
}
async function* sha256(buffer, opts) {
yield 0;
const maxCryptoSize = typeof opts?.useWebWorker === "object" && opts?.useWebWorker.minSize !== void 0 ? opts.useWebWorker.minSize : 1e7;
if (buffer.size < maxCryptoSize && globalThis.crypto?.subtle) {
const res = hexFromBytes(
new Uint8Array(
await globalThis.crypto.subtle.digest("SHA-256", buffer instanceof Blob ? await buffer.arrayBuffer() : buffer)
)
);
yield 1;
return res;
}
if (isFrontend) {
if (opts?.useWebWorker) {
try {
const poolSize = typeof opts?.useWebWorker === "object" ? opts.useWebWorker.poolSize : void 0;
const worker = await getWorker(poolSize);
return yield* eventToGenerator((yieldCallback, returnCallback, rejectCallack) => {
worker.addEventListener("message", (event) => {
if (event.data.sha256) {
freeWorker(worker, poolSize);
returnCallback(event.data.sha256);
} else if (event.data.progress) {
yieldCallback(event.data.progress);
try {
opts.abortSignal?.throwIfAborted();
} catch (err) {
destroyWorker(worker);
rejectCallack(err);
}
} else {
destroyWorker(worker);
rejectCallack(event);
}
});
worker.addEventListener("error", (event) => {
destroyWorker(worker);
rejectCallack(event.error);
});
worker.postMessage({ file: buffer });
});
} catch (err) {
console.warn("Failed to use web worker for sha256", err);
}
}
if (!wasmModule) {
wasmModule = await import("./sha256-wrapper-DYTB3MXW.mjs");
}
const sha2562 = await wasmModule.createSHA256();
sha2562.init();
const reader = buffer.stream().getReader();
const total = buffer.size;
let bytesDone = 0;
while (true) {
const { done, value } = await reader.read();
if (done) {
break;
}
sha2562.update(value);
bytesDone += value.length;
yield bytesDone / total;
opts?.abortSignal?.throwIfAborted();
}
return sha2562.digest("hex");
}
if (!cryptoModule) {
cryptoModule = await import("./sha256-node-2YU2V4BH.mjs");
}
return yield* cryptoModule.sha256Node(buffer, { abortSignal: opts?.abortSignal });
}
var cryptoModule;
var wasmModule;
// src/utils/WebBlob.ts
var WebBlob = class extends Blob {
static async create(url, opts) {
const customFetch = opts?.fetch ?? fetch;
const response = await customFetch(url, { method: "HEAD" });
const size = Number(response.headers.get("content-length"));
const contentType = response.headers.get("content-type") || "";
const supportRange = response.headers.get("accept-ranges") === "bytes";
if (!supportRange || size < (opts?.cacheBelow ?? 1e6)) {
return await (await customFetch(url)).blob();
}
return new WebBlob(url, 0, size, contentType, true, customFetch);
}
url;
start;
end;
contentType;
full;
fetch;
constructor(url, start, end, contentType, full, customFetch) {
super([]);
this.url = url;
this.start = start;
this.end = end;
this.contentType = contentType;
this.full = full;
this.fetch = customFetch;
}
get size() {
return this.end - this.start;
}
get type() {
return this.contentType;
}
slice(start = 0, end = this.size) {
if (start < 0 || end < 0) {
new TypeError("Unsupported negative start/end on WebBlob.slice");
}
const slice = new WebBlob(
this.url,
this.start + start,
Math.min(this.start + end, this.end),
this.contentType,
start === 0 && end === this.size ? this.full : false,
this.fetch
);
return slice;
}
async arrayBuffer() {
const result = await this.fetchRange();
return result.arrayBuffer();
}
async text() {
const result = await this.fetchRange();
return result.text();
}
stream() {
const stream = new TransformStream();
this.fetchRange().then((response) => response.body?.pipeThrough(stream)).catch((error) => stream.writable.abort(error.message));
return stream.readable;
}
fetchRange() {
const fetch2 = this.fetch;
if (this.full) {
return fetch2(this.url);
}
return fetch2(this.url, {
headers: {
Range: `bytes=${this.start}-${this.end - 1}`
}
});
}
};
// src/utils/createBlob.ts
async function createBlob(url, opts) {
if (url.protocol === "http:" || url.protocol === "https:") {
return WebBlob.create(url, { fetch: opts?.fetch });
}
if (isFrontend) {
throw new TypeError(`Unsupported URL protocol "${url.protocol}"`);
}
if (url.protocol === "file:") {
const { FileBlob } = await import("./FileBlob-GRW5ZULE.mjs");
return FileBlob.create(url);
}
throw new TypeError(`Unsupported URL protocol "${url.protocol}"`);
}
// src/utils/base64FromBytes.ts
function base64FromBytes(arr) {
if (globalThis.Buffer) {
return globalThis.Buffer.from(arr).toString("base64");
} else {
const bin = [];
arr.forEach((byte) => {
bin.push(String.fromCharCode(byte));
});
return globalThis.btoa(bin.join(""));
}
}
// src/lib/commit.ts
var CONCURRENT_SHAS = 5;
var CONCURRENT_LFS_UPLOADS = 5;
var MULTIPART_PARALLEL_UPLOAD = 5;
function isFileOperation(op) {
const ret = op.operation === "addOrUpdate";
if (ret && !(op.content instanceof Blob)) {
throw new TypeError("Precondition failed: op.content should be a Blob");
}
return ret;
}
async function* commitIter(params) {
const accessToken = checkCredentials(params);
const repoId = toRepoId(params.repo);
yield { event: "phase", phase: "preuploading" };
const lfsShas = /* @__PURE__ */ new Map();
const abortController = new AbortController();
const abortSignal = abortController.signal;
if (!abortSignal.throwIfAborted) {
abortSignal.throwIfAborted = () => {
if (abortSignal.aborted) {
throw new DOMException("Aborted", "AbortError");
}
};
}
if (params.abortSignal) {
params.abortSignal.addEventListener("abort", () => abortController.abort());
}
try {
const allOperations = await Promise.all(
params.operations.map(async (operation) => {
if (operation.operation !== "addOrUpdate") {
return operation;
}
if (!(operation.content instanceof URL)) {
return { ...operation, content: operation.content };
}
const lazyBlob = await createBlob(operation.content, { fetch: params.fetch });
abortSignal?.throwIfAborted();
return {
...operation,
content: lazyBlob
};
})
);
const gitAttributes = allOperations.filter(isFileOperation).find((op) => op.path === ".gitattributes")?.content;
for (const operations of chunk(allOperations.filter(isFileOperation), 100)) {
const payload = {
gitAttributes: gitAttributes && await gitAttributes.text(),
files: await Promise.all(
operations.map(async (operation) => ({
path: operation.path,
size: operation.content.size,
sample: base64FromBytes(new Uint8Array(await operation.content.slice(0, 512).arrayBuffer()))
}))
)
};
abortSignal?.throwIfAborted();
const res = await (params.fetch ?? fetch)(
`${params.hubUrl ?? HUB_URL}/api/${repoId.type}s/${repoId.name}/preupload/${encodeURIComponent(
params.branch ?? "main"
)}` + (params.isPullRequest ? "?create_pr=1" : ""),
{
method: "POST",
headers: {
...accessToken && { Authorization: `Bearer ${accessToken}` },
"Content-Type": "application/json"
},
body: JSON.stringify(payload),
signal: abortSignal
}
);
if (!res.ok) {
throw await createApiError(res);
}
const json = await res.json();
for (const file of json.files) {
if (file.uploadMode === "lfs") {
lfsShas.set(file.path, null);
}
}
}
yield { event: "phase", phase: "uploadingLargeFiles" };
for (const operations of chunk(
allOperations.filter(isFileOperation).filter((op) => lfsShas.has(op.path)),
100
)) {
const shas = yield* eventToGenerator((yieldCallback, returnCallback, rejectCallack) => {
return promisesQueue(
operations.map((op) => async () => {
const iterator = sha256(op.content, { useWebWorker: params.useWebWorkers, abortSignal });
let res2;
do {
res2 = await iterator.next();
if (!res2.done) {
yieldCallback({ event: "fileProgress", path: op.path, progress: res2.value, state: "hashing" });
}
} while (!res2.done);
const sha = res2.value;
lfsShas.set(op.path, res2.value);
return sha;
}),
CONCURRENT_SHAS
).then(returnCallback, rejectCallack);
});
abortSignal?.throwIfAborted();
const payload = {
operation: "upload",
// multipart is a custom protocol for HF
transfers: ["basic", "multipart"],
hash_algo: "sha_256",
...!params.isPullRequest && {
ref: {
name: params.branch ?? "main"
}
},
objects: operations.map((op, i) => ({
oid: shas[i],
size: op.content.size
}))
};
const res = await (params.fetch ?? fetch)(
`${params.hubUrl ?? HUB_URL}/${repoId.type === "model" ? "" : repoId.type + "s/"}${repoId.name}.git/info/lfs/objects/batch`,
{
method: "POST",
headers: {
...accessToken && { Authorization: `Bearer ${accessToken}` },
Accept: "application/vnd.git-lfs+json",
"Content-Type": "application/vnd.git-lfs+json"
},
body: JSON.stringify(payload),
signal: abortSignal
}
);
if (!res.ok) {
throw await createApiError(res);
}
const json = await res.json();
const batchRequestId = res.headers.get("X-Request-Id") || void 0;
const shaToOperation = new Map(operations.map((op, i) => [shas[i], op]));
yield* eventToGenerator((yieldCallback, returnCallback, rejectCallback) => {
return promisesQueueStreaming(
json.objects.map((obj) => async () => {
const op = shaToOperation.get(obj.oid);
if (!op) {
throw new InvalidApiResponseFormatError("Unrequested object ID in response");
}
abortSignal?.throwIfAborted();
if (obj.error) {
const errorMessage = `Error while doing LFS batch call for ${operations[shas.indexOf(obj.oid)].path}: ${obj.error.message}${batchRequestId ? ` - Request ID: ${batchRequestId}` : ""}`;
throw new HubApiError(res.url, obj.error.code, batchRequestId, errorMessage);
}
if (!obj.actions?.upload) {
yieldCallback({
event: "fileProgress",
path: op.path,
progress: 1,
state: "uploading"
});
return;
}
yieldCallback({
event: "fileProgress",
path: op.path,
progress: 0,
state: "uploading"
});
const content = op.content;
const header = obj.actions.upload.header;
if (header?.chunk_size) {
const chunkSize = parseInt(header.chunk_size);
const completionUrl = obj.actions.upload.href;
const parts = Object.keys(header).filter((key) => /^[0-9]+$/.test(key));
if (parts.length !== Math.ceil(content.size / chunkSize)) {
throw new Error("Invalid server response to upload large LFS file, wrong number of parts");
}
const completeReq = {
oid: obj.oid,
parts: parts.map((part) => ({
partNumber: +part,
etag: ""
}))
};
const progressCallback = (progress) => yieldCallback({ event: "fileProgress", path: op.path, progress, state: "uploading" });
await promisesQueueStreaming(
parts.map((part) => async () => {
abortSignal?.throwIfAborted();
const index = parseInt(part) - 1;
const slice = content.slice(index * chunkSize, (index + 1) * chunkSize);
const res3 = await (params.fetch ?? fetch)(header[part], {
method: "PUT",
/** Unfortunately, browsers don't support our inherited version of Blob in fetch calls */
body: slice instanceof WebBlob && isFrontend ? await slice.arrayBuffer() : slice,
signal: abortSignal,
...{
progressHint: {
path: op.path,
part: index,
numParts: parts.length,
progressCallback
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
}
});
if (!res3.ok) {
throw await createApiError(res3, {
requestId: batchRequestId,
message: `Error while uploading part ${part} of ${operations[shas.indexOf(obj.oid)].path} to LFS storage`
});
}
const eTag = res3.headers.get("ETag");
if (!eTag) {
throw new Error("Cannot get ETag of part during multipart upload");
}
completeReq.parts[Number(part) - 1].etag = eTag;
}),
MULTIPART_PARALLEL_UPLOAD
);
abortSignal?.throwIfAborted();
const res2 = await (params.fetch ?? fetch)(completionUrl, {
method: "POST",
body: JSON.stringify(completeReq),
headers: {
Accept: "application/vnd.git-lfs+json",
"Content-Type": "application/vnd.git-lfs+json"
},
signal: abortSignal
});
if (!res2.ok) {
throw await createApiError(res2, {
requestId: batchRequestId,
message: `Error completing multipart upload of ${operations[shas.indexOf(obj.oid)].path} to LFS storage`
});
}
yieldCallback({
event: "fileProgress",
path: op.path,
progress: 1,
state: "uploading"
});
} else {
const res2 = await (params.fetch ?? fetch)(obj.actions.upload.href, {
method: "PUT",
headers: {
...batchRequestId ? { "X-Request-Id": batchRequestId } : void 0
},
/** Unfortunately, browsers don't support our inherited version of Blob in fetch calls */
body: content instanceof WebBlob && isFrontend ? await content.arrayBuffer() : content,
signal: abortSignal,
...{
progressHint: {
path: op.path,
progressCallback: (progress) => yieldCallback({
event: "fileProgress",
path: op.path,
progress,
state: "uploading"
})
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
}
});
if (!res2.ok) {
throw await createApiError(res2, {
requestId: batchRequestId,
message: `Error while uploading ${operations[shas.indexOf(obj.oid)].path} to LFS storage`
});
}
yieldCallback({
event: "fileProgress",
path: op.path,
progress: 1,
state: "uploading"
});
}
}),
CONCURRENT_LFS_UPLOADS
).then(returnCallback, rejectCallback);
});
}
abortSignal?.throwIfAborted();
yield { event: "phase", phase: "committing" };
return yield* eventToGenerator(
async (yieldCallback, returnCallback, rejectCallback) => (params.fetch ?? fetch)(
`${params.hubUrl ?? HUB_URL}/api/${repoId.type}s/${repoId.name}/commit/${encodeURIComponent(
params.branch ?? "main"
)}` + (params.isPullRequest ? "?create_pr=1" : ""),
{
method: "POST",
headers: {
...accessToken && { Authorization: `Bearer ${accessToken}` },
"Content-Type": "application/x-ndjson"
},
body: [
{
key: "header",
value: {
summary: params.title,
description: params.description,
parentCommit: params.parentCommit
}
},
...await Promise.all(
allOperations.map((operation) => {
if (isFileOperation(operation)) {
const sha = lfsShas.get(operation.path);
if (sha) {
return {
key: "lfsFile",
value: {
path: operation.path,
algo: "sha256",
size: operation.content.size,
oid: sha
}
};
}
}
return convertOperationToNdJson(operation);
})
)
].map((x) => JSON.stringify(x)).join("\n"),
signal: abortSignal,
...{
progressHint: {
progressCallback: (progress) => {
for (const op of allOperations) {
if (isFileOperation(op) && !lfsShas.has(op.path)) {
yieldCallback({
event: "fileProgress",
path: op.path,
progress,
state: "uploading"
});
}
}
}
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
}
}
).then(async (res) => {
if (!res.ok) {
throw await createApiError(res);
}
const json = await res.json();
returnCallback({
pullRequestUrl: json.pullRequestUrl,
commit: {
oid: json.commitOid,
url: json.commitUrl
},
hookOutput: json.hookOutput
});
}).catch(rejectCallback)
);
} catch (err) {
abortController.abort();
throw err;
}
}
async function commit(params) {
const iterator = commitIter(params);
let res = await iterator.next();
while (!res.done) {
res = await iterator.next();
}
return res.value;
}
async function convertOperationToNdJson(operation) {
switch (operation.operation) {
case "addOrUpdate": {
return {
key: "file",
value: {
content: base64FromBytes(new Uint8Array(await operation.content.arrayBuffer())),
path: operation.path,
encoding: "base64"
}
};
}
case "delete": {
return {
key: "deletedFile",
value: {
path: operation.path
}
};
}
default:
throw new TypeError("Unknown operation: " + operation.operation);
}
}
// src/lib/count-commits.ts
async function countCommits(params) {
const accessToken = checkCredentials(params);
const repoId = toRepoId(params.repo);
const url = `${params.hubUrl ?? HUB_URL}/api/${repoId.type}s/${repoId.name}/commits/${params.revision ?? "main"}?limit=1`;
const res = await (params.fetch ?? fetch)(url, {
headers: accessToken ? { Authorization: `Bearer ${accessToken}` } : {}
});
if (!res.ok) {
throw await createApiError(res);
}
return parseInt(res.headers.get("x-total-count") ?? "0", 10);
}
// src/lib/create-repo.ts
async function createRepo(params) {
const accessToken = checkCredentials(params);
const repoId = toRepoId(params.repo);
const [namespace, repoName] = repoId.name.split("/");
if (!namespace || !repoName) {
throw new TypeError(
`"${repoId.name}" is not a fully qualified repo name. It should be of the form "{namespace}/{repoName}".`
);
}
const res = await (params.fetch ?? fetch)(`${params.hubUrl ?? HUB_URL}/api/repos/create`, {
method: "POST",
body: JSON.stringify({
name: repoName,
private: params.private,
organization: namespace,
license: params.license,
...repoId.type === "space" ? {
type: "space",
sdk: "static"
} : {
type: repoId.type
},
files: params.files ? await Promise.all(
params.files.map(async (file) => ({
encoding: "base64",
path: file.path,
content: base64FromBytes(
new Uint8Array(file.content instanceof Blob ? await file.content.arrayBuffer() : file.content)
)
}))
) : void 0
}),
headers: {
Authorization: `Bearer ${accessToken}`,
"Content-Type": "application/json"
}
});
if (!res.ok) {
throw await createApiError(res);
}
const output = await res.json();
return { repoUrl: output.url };
}
// src/utils/pick.ts
function pick(o, props) {
return Object.assign(
{},
...props.map((prop) => {
if (o[prop] !== void 0) {
return { [prop]: o[prop] };
}
})
);
}
// src/utils/parseLinkHeader.ts
function parseLinkHeader(header) {
const regex = /<(https?:[/][/][^>]+)>;\s+rel="([^"]+)"/g;
return Object.fromEntries([...header.matchAll(regex)].map(([, url, rel]) => [rel, url]));
}
// src/lib/list-datasets.ts
var DATASET_EXPAND_KEYS = [
"private",
"downloads",
"gated",
"likes",
"lastModified"
];
var DATASET_EXPANDABLE_KEYS = [
"author",
"cardData",
"citation",
"createdAt",
"disabled",
"description",
"downloads",
"downloadsAllTime",
"gated",
"gitalyUid",
"lastModified",
"likes",
"paperswithcode_id",
"private",
// "siblings",
"sha",
"tags"
];
async function* listDatasets(params) {
const accessToken = params && checkCredentials(params);
let totalToFetch = params?.limit ?? Infinity;
const search = new URLSearchParams([
...Object.entries({
limit: String(Math.min(totalToFetch, 500)),
...params?.search?.owner ? { author: params.search.owner } : void 0,
...params?.search?.query ? { search: params.search.query } : void 0
}),
...params?.search?.tags?.map((tag) => ["filter", tag]) ?? [],
...DATASET_EXPAND_KEYS.map((val) => ["expand", val]),
...params?.additionalFields?.map((val) => ["expand", val]) ?? []
]).toString();
let url = `${params?.hubUrl || HUB_URL}/api/datasets` + (search ? "?" + search : "");
while (url) {
const res = await (params?.fetch ?? fetch)(url, {
headers: {
accept: "application/json",
...accessToken ? { Authorization: `Bearer ${accessToken}` } : void 0
}
});
if (!res.ok) {
throw await createApiError(res);
}
const items = await res.json();
for (const item of items) {
yield {
...params?.additionalFields && pick(item, params.additionalFields),
id: item._id,
name: item.id,
private: item.private,
downloads: item.downloads,
likes: item.likes,
gated: item.gated,
updatedAt: new Date(item.lastModified)
};
totalToFetch--;
if (totalToFetch <= 0) {
return;
}
}
const linkHeader = res.headers.get("Link");
url = linkHeader ? parseLinkHeader(linkHeader).next : void 0;
}
}
// src/lib/dataset-info.ts
async function datasetInfo(params) {
const accessToken = params && checkCredentials(params);
const search = new URLSearchParams([
...DATASET_EXPAND_KEYS.map((val) => ["expand", val]),
...params?.additionalFields?.map((val) => ["expand", val]) ?? []
]).toString();
const response = await (params.fetch || fetch)(
`${params?.hubUrl || HUB_URL}/api/datasets/${params.name}/revision/${encodeURIComponent(
params.revision ?? "HEAD"
)}?${search.toString()}`,
{
headers: {
...accessToken ? { Authorization: `Bearer ${accessToken}` } : {},
Accepts: "application/json"
}
}
);
if (!response.ok) {
throw await createApiError(response);
}
const data = await response.json();
return {
...params?.additionalFields && pick(data, params.additionalFields),
id: data._id,
name: data.id,
private: data.private,
downloads: data.downloads,
likes: data.likes,
gated: data.gated,
updatedAt: new Date(data.lastModified)
};
}
// src/lib/delete-file.ts
function deleteFile(params) {
return commit({
...params.accessToken ? { accessToken: params.accessToken } : { credentials: params.credentials },
repo: params.repo,
operations: [
{
operation: "delete",
path: params.path
}
],
title: params.commitTitle ?? `Delete ${params.path}`,
description: params.commitDescription,
hubUrl: params.hubUrl,
branch: params.branch,
isPullRequest: params.isPullRequest,
parentCommit: params.parentCommit,
fetch: params.fetch
});
}
// src/lib/delete-files.ts
function deleteFiles(params) {
return commit({
...params.accessToken ? { accessToken: params.accessToken } : { credentials: params.credentials },
repo: params.repo,
operations: params.paths.map((path2) => ({
operation: "delete",
path: path2
})),
title: params.commitTitle ?? `Deletes ${params.paths.length} files`,
description: params.commitDescription,
hubUrl: params.hubUrl,
branch: params.branch,
isPullRequest: params.isPullRequest,
parentCommit: params.parentCommit,
fetch: params.fetch
});
}
// src/lib/delete-repo.ts
async function deleteRepo(params) {
const accessToken = checkCredentials(params);
const repoId = toRepoId(params.repo);
const [namespace, repoName] = repoId.name.split("/");
const res = await (params.fetch ?? fetch)(`${params.hubUrl ?? HUB_URL}/api/repos/delete`, {
method: "DELETE",
body: JSON.stringify({
name: repoName,
organization: namespace,
type: repoId.type
}),
headers: {
Authorization: `Bearer ${accessToken}`,
"Content-Type": "application/json"
}
});
if (!res.ok) {
throw await createApiError(res);
}
}
// src/lib/download-file.ts
async function downloadFile(params) {
const accessToken = checkCredentials(params);
const repoId = toRepoId(params.repo);
const url = `${params.hubUrl ?? HUB_URL}/${repoId.type === "model" ? "" : `${repoId.type}s/`}${repoId.name}/${params.raw ? "raw" : "resolve"}/${encodeURIComponent(params.revision ?? "main")}/${params.path}`;
const resp = await (params.fetch ?? fetch)(url, {
headers: {
...accessToken ? {
Authorization: `Bearer ${accessToken}`
} : {},
...params.range ? {
Range: `bytes=${params.range[0]}-${params.range[1]}`
} : {}
}
});
if (resp.status === 404 && resp.headers.get("X-Error-Code") === "EntryNotFound") {
return null;
} else if (!resp.ok) {
throw await createApiError(resp);
}
return resp;
}
// src/lib/download-file-to-cache-dir.ts
import { dirname as dirname2, join as join2 } from "path";
import { writeFile, rename, lstat as lstat2, mkdir, stat as stat2 } from "fs/promises";
// src/lib/paths-info.ts
async function pathsInfo(params) {
const accessToken = checkCredentials(params);
const repoId = toRepoId(params.repo);
const hubUrl = params.hubUrl ?? HUB_URL;
const url = `${hubUrl}/api/${repoId.type}s/${repoId.name}/paths-info/${encodeURIComponent(
params.revision ?? "main"
)}`;
const resp = await (params.fetch ?? fetch)(url, {
method: "POST",
headers: {
...accessToken && {
Authorization: `Bearer ${accessToken}`
},
Accept: "application/json",
"Content-Type": "application/json"
},
body: JSON.stringify({
paths: params.paths,
expand: params.expand
})
});
if (!resp.ok) {
throw await createApiError(resp);
}
const json = await resp.json();
if (!Array.isArray(json))
throw new Error("malformed response: expected array");
return json.map((item) => ({
path: item.path,
lfs: item.lfs,
type: item.type,
oid: item.oid,
size: item.size,
// expand fields
securityFileStatus: item.securityFileStatus,
lastCommit: item.lastCommit ? {
date: new Date(item.lastCommit.date),
title: item.lastCommit.title,
id: item.lastCommit.id
} : void 0
}));
}
// src/utils/symlink.ts
import * as fs from "fs/promises";
import * as path from "path";
import * as os from "os";
function expandUser(path2) {
if (path2.startsWith("~")) {
return path2.replace("~", os.homedir());
}
return path2;
}
async function createSymlink(params) {
const abs_src = path.resolve(expandUser(params.sourcePath));
const abs_dst = path.resolve(expandUser(params.finalPath));
try {
await fs.rm(abs_dst);
} catch {
}
try {
await fs.symlink(path.relative(path.dirname(abs_dst), abs_src), abs_dst);
} catch {
console.info(`Symlink not supported. Copying file from ${abs_src} to ${abs_dst}`);
await fs.copyFile(abs_src, abs_dst);
}
}
// src/lib/download-file-to-cache-dir.ts
var REGEX_COMMIT_HASH = new RegExp("^[0-9a-f]{40}$");
function getFilePointer(storageFolder, revision, relativeFilename) {
const snapshotPath = join2(storageFolder, "snapshots");
return join2(snapshotPath, revision, relativeFilename);
}
async function exists(path2, followSymlinks) {
try {
if (followSymlinks) {
await stat2(path2);
} else {
await lstat2(path2);
}
return true;
} catch (err) {
return false;
}
}
async function downloadFileToCacheDir(params) {
const revision = params.revision ?? "main";
const cacheDir = params.cacheDir ?? getHFHubCachePath();
const repoId = toRepoId(params.repo);
const storageFolder = join2(cacheDir, getRepoFolderName(repoId));
let commitHash;
if (REGEX_COMMIT_HASH.test(revision)) {
commitHash = revision;
const pointerPath2 = getFilePointer(storageFolder, revision, params.path);
if (await exists(pointerPath2, true))
return pointerPath2;
}
const pathsInformation = await pathsInfo({
...params,
paths: [params.path],
revision,
expand: true
});
if (!pathsInformation || pathsInformation.length !== 1)
throw new Error(`cannot get path info for ${params.path}`);
let etag;
if (pathsInformation[0].lfs) {
etag = pathsInformation[0].lfs.oid;
} else {
etag = pathsInformation[0].oid;
}
const pointerPath = getFilePointer(storageFolder, commitHash ?? pathsInformation[0].lastCommit.id, params.path);
const blobPath = join2(storageFolder, "blobs", etag);
if (await exists(pointerPath, true))
return pointerPath;
await mkdir(dirname2(blobPath), { recursive: true });
await mkdir(dirname2(pointerPath), { recursive: true });
if (await exists(blobPath)) {
await createSymlink({ sourcePath: blobPath, finalPath: pointerPath });
return pointerPath;
}
const incomplete = `${blobPath}.incomplete`;
console.debug(`Downloading ${params.path} to ${incomplete}`);
const response = await downloadFile({
...params,
revision: commitHash
});
if (!response || !response.ok || !response.body)
throw new Error(`invalid response for file ${params.path}`);
await writeFile(incomplete, response.body);
await rename(incomplete, blobPath);
await createSymlink({ sourcePath: blobPath, finalPath: pointerPath });
return pointerPath;
}
// src/lib/file-download-info.ts
async function fileDownloadInfo(params) {
const accessToken = checkCredentials(params);
const repoId = toRepoId(params.repo);
const hubUrl = params.hubUrl ?? HUB_URL;
const url = `${hubUrl}/${repoId.type === "model" ? "" : `${repoId.type}s/`}${repoId.name}/${params.raw ? "raw" : "resolve"}/${encodeURIComponent(params.revision ?? "main")}/${params.path}` + (params.noContentDisposition ? "?noContentDisposition=1" : "");
const resp = await (params.fetch ?? fetch)(url, {
method: "GET",
headers: {
...accessToken && {
Authorization: `Bearer ${accessToken}`
},
Range: "bytes=0-0"
}
});
if (resp.status === 404 && resp.headers.get("X-Error-Code") === "EntryNotFound") {
return null;
}
if (!resp.ok) {
throw await createApiError(resp);
}
const etag = resp.headers.get("ETag");
if (!etag) {
throw new InvalidApiResponseFormatError("Expected ETag");
}
const contentRangeHeader = resp.headers.get("content-range");
if (!contentRangeHeader) {
throw new InvalidApiResponseFormatError("Expected size information");
}
const [, parsedSize] = contentRangeHeader.split("/");
const size = parseInt(parsedSize);
if (isNaN(size)) {
throw new InvalidApiResponseFormatError("Invalid file size received");
}
return {
etag,
size,
downloadLink: new URL(resp.url).hostname !== new URL(hubUrl).hostname ? resp.url : null
};
}
// src/lib/file-exists.ts
async function fileExists(params) {
const accessToken = checkCredentials(params);
const repoId = toRepoId(params.repo);
const hubUrl = params.hubUrl ?? HUB_URL;
const url = `${hubUrl}/${repoId.type === "model" ? "" : `${repoId.type}s/`}${repoId.name}/raw/${encodeURIComponent(
params.revision ?? "main"
)}/${params.path}`;
const resp = await (params.fetch ?? fetch)(url, {
method: "HEAD",
headers: accessToken ? { Authorization: `Bearer ${accessToken}` } : {}
});
if (resp.status === 404) {
return false;
}
if (!resp.ok) {
throw await createApiError(resp);
}
return true;
}
// src/lib/list-commits.ts
async function* listCommits(params) {
const accessToken = checkCredentials(params);
const repoId = toRepoId(params.repo);
let url = `${params.hubUrl ?? HUB_URL}/api/${repoId.type}s/${repoId.name}/commits/${params.revision ?? "main"}?limit=${params.batchSize ?? 100}`;
while (url) {
const res = await (params.fetch ?? fetch)(url, {
headers: accessToken ? { Authorization: `Bearer ${accessToken}` } : {}
});
if (!res.ok) {
throw await createApiError(res);
}
const resJson = await res.json();
for (const commit2 of resJson) {
yield {
oid: commit2.id,
title: commit2.title,
message: commit2.message,
authors: commit2.authors.map((author) => ({
username: author.user,
avatarUrl: author.avatar
})),
date: new Date(commit2.date)
};
}
const linkHeader = res.headers.get("Link");
url = linkHeader ? parseLinkHeader(linkHeader).next : void 0;
}
}
// src/lib/list-files.ts
async function* listFiles(params) {
const accessToken = checkCredentials(params);
const repoId = toRepoId(params.repo);
let url = `${params.hubUrl || HUB_URL}/api/${repoId.type}s/${repoId.name}/tree/${params.revision || "main"}${params.path ? "/" + params.path : ""}?recursive=${!!params.recursive}&expand=${!!params.expand}`;
while (url) {
const res = await (params.fetch ?? fetch)(url, {
headers: {
accept: "application/json",
...accessToken ? { Authorization: `Bearer ${accessToken}` } : void 0
}
});
if (!res.ok) {
throw await createApiError(res);
}
const items = await res