UNPKG

node-llama-cpp

Version:

Run AI models locally on your machine with node.js bindings for llama.cpp. Enforce a JSON schema on the model output on the generation level

303 lines 14.5 kB
import filenamify from "filenamify"; import { normalizeGgufDownloadUrl } from "../gguf/utils/normalizeGgufDownloadUrl.js"; import { getFilenameForBinarySplitGgufPartUrls, resolveBinarySplitGgufPartUrls } from "../gguf/utils/resolveBinarySplitGgufPartUrls.js"; import { createSplitPartFilename, getGgufSplitPartsInfo } from "../gguf/utils/resolveSplitGgufParts.js"; import { ggufQuantNames } from "../gguf/utils/ggufQuantNames.js"; import { isUrl } from "./isUrl.js"; import { resolveModelFileAccessTokensTryHeaders } from "./modelFileAccesTokens.js"; import { parseModelFileName } from "./parseModelFileName.js"; const defaultHuggingFaceBranch = "main"; const defaultHuggingFaceFileQuantization = "Q4_K_M"; export const genericFilePartNumber = "{:\n{number}\n:}"; export function parseModelUri(urlOrUri, convertUrlToSupportedUri = false) { if (urlOrUri.startsWith("hf://")) return parseHuggingFaceUriContent(urlOrUri.slice("hf://".length), urlOrUri); else if (urlOrUri.startsWith("huggingface://")) return parseHuggingFaceUriContent(urlOrUri.slice("huggingface://".length), urlOrUri); else if (urlOrUri.startsWith("hf:")) return parseHuggingFaceUriContent(urlOrUri.slice("hf:".length), urlOrUri); else if (urlOrUri.startsWith("huggingface:")) return parseHuggingFaceUriContent(urlOrUri.slice("huggingface:".length), urlOrUri); else if (urlOrUri.startsWith("hf.co/")) return parseHuggingFaceUriContent(urlOrUri.slice("hf.co/".length), urlOrUri); else if (urlOrUri.startsWith("huggingface.co/")) return parseHuggingFaceUriContent(urlOrUri.slice("huggingface.co/".length), urlOrUri); if (isUrl(urlOrUri)) { const parsedUrl = new URL(urlOrUri); if (parsedUrl.hostname === "huggingface.co" || parsedUrl.hostname === "hf.co") { const pathnameParts = parsedUrl.pathname.split("/"); const slashes = pathnameParts.length - 1; const [, user, model] = pathnameParts; if (slashes === 2 && user != null && model != null) { return parseHuggingFaceUriContent([ decodeURIComponent(user), "/", decodeURIComponent(model) ].join(""), urlOrUri); } } } if (convertUrlToSupportedUri && isUrl(urlOrUri)) { const parsedUrl = new URL(normalizeGgufDownloadUrl(urlOrUri)); if (parsedUrl.hostname === "huggingface.co" || parsedUrl.hostname === "hf.co") { const pathnameParts = parsedUrl.pathname.split("/"); const [, user, model, resolve, branch, ...pathParts] = pathnameParts; const filePath = pathParts.join("/"); if (user != null && model != null && resolve === "resolve" && branch != null && filePath !== "") { return parseHuggingFaceUriContent([ decodeURIComponent(user), "/", decodeURIComponent(model), "/", filePath .split("/") .map((part) => decodeURIComponent(part)) .join("/"), branch !== defaultHuggingFaceBranch ? `#${decodeURIComponent(branch)}` : "" ].join(""), urlOrUri); } } } return null; } export function isModelUri(modelUri) { try { return parseModelUri(modelUri) != null; } catch { return false; } } export async function resolveParsedModelUri(modelUri, { tokens, signal, authorizationHeader } = {}) { if (modelUri == null) return modelUri; if (modelUri.type === "resolved") return modelUri; if (modelUri.resolveDetails.type !== "hf") throw new Error(`Unsupported model URI type: ${modelUri.resolveDetails.type}`); const modelTag = modelUri.resolveDetails.tag || "latest"; const manifest = await fetchHuggingFaceModelManifest({ user: modelUri.resolveDetails.user, model: modelUri.resolveDetails.model, modelTag, fullUri: modelUri.uri, tokens, signal, authorizationHeader }); const filename = manifest.rfilename; const splitPartsInfo = getGgufSplitPartsInfo(filename); function resolveQuantizationText() { if (modelTag.toLowerCase() !== "latest") return modelTag.toUpperCase(); if (filename.toLowerCase().includes(defaultHuggingFaceFileQuantization.toLowerCase())) return defaultHuggingFaceFileQuantization; const quantizationText = parseModelFileName(filename).quantization; if (quantizationText != null && ggufQuantNames.has(quantizationText)) return quantizationText; return ""; } const quantizationText = resolveQuantizationText(); const resolvedBaseFilename = modelUri.baseFilename + (quantizationText !== "" ? ("." + quantizationText) : "") + ".gguf"; const resolvedFilename = splitPartsInfo != null ? createSplitPartFilename(resolvedBaseFilename, splitPartsInfo.part, splitPartsInfo.parts) : resolvedBaseFilename; const resolvedUrl = normalizeGgufDownloadUrl([ "https://huggingface.co/", encodeURIComponent(modelUri.resolveDetails.user), "/", encodeURIComponent(modelUri.resolveDetails.model), "/resolve/", encodeURIComponent(defaultHuggingFaceBranch), "/", filename .split("/") .map((item) => encodeURIComponent(item)) .join("/"), "?download=true" ].join("")); return { type: "resolved", uri: modelUri.uri, filePrefix: modelUri.filePrefix, filename: resolvedFilename, fullFilename: `${modelUri.filePrefix}${resolvedFilename}`, resolvedUrl }; } export function getAuthorizationHeader(headers) { return headers?.["Authorization"] || headers?.["authorization"]; } async function fetchHuggingFaceModelManifest({ user, model, modelTag, fullUri, tokens, signal, authorizationHeader }) { const manifestUrl = [ "https://huggingface.co/v2/", encodeURIComponent(user), "/", encodeURIComponent(model), "/manifests/", encodeURIComponent(modelTag) ].join(""); const headersToTry = [ {}, await resolveModelFileAccessTokensTryHeaders(manifestUrl, tokens) ]; while (headersToTry.length > 0) { const headers = headersToTry.shift(); let response; try { response = await fetch(manifestUrl, { headers: { ...(authorizationHeader != null ? { "Authorization": authorizationHeader } : {}), ...headers, // we need this to get the `ggufFile` field in the response // https://github.com/ggml-org/llama.cpp/pull/11195 "User-Agent": "llama-cpp" }, signal }); } catch (err) { throw new Error(`Failed to fetch manifest for resolving URI ${JSON.stringify(fullUri)}: ${err}`); } if ((response.status >= 500 || response.status === 429 || response.status === 401) && headersToTry.length > 0) continue; if (response.status === 400 || response.status === 404) throw new Error(`Cannot get quantization "${modelTag}" for model "hf:${user}/${model}" or it does not exist`); if (!response.ok) throw new Error(`Failed to fetch manifest for ${JSON.stringify(fullUri)}: ${response.status} ${response.statusText}`); try { const json = await response.json(); if (json?.ggufFile?.rfilename == null) throw new Error(`Invalid manifest for ${JSON.stringify(fullUri)}`); return json.ggufFile; } catch (err) { throw new Error(`Invalid manifest response for ${JSON.stringify(fullUri)}`); } } throw new Error(`Failed to fetch manifest for ${JSON.stringify(fullUri)}: no more headers to try`); } function parseHuggingFaceUriContent(uri, fullUri) { const [user, model, ...pathParts] = uri.split("/"); let rest = pathParts.join("/"); if (user != null && model != null && (rest === "" || model.includes(":"))) { const [actualModel, tag, ...tagParts] = model.split(":"); const actualTag = tagParts.length > 0 ? [tag, ...tagParts].join(":").trimEnd() : (tag ?? "").trimEnd(); const assumedQuant = ggufQuantNames.has(actualTag.toUpperCase()) ? actualTag.toUpperCase() : undefined; const resolvedTag = assumedQuant != null ? assumedQuant : actualTag; if (actualModel == null || actualModel === "" || user === "") throw new Error(`Invalid Hugging Face URI: ${fullUri}`); const baseFilename = actualModel.toLowerCase().endsWith("-gguf") ? filenamify(actualModel.slice(0, -"-gguf".length)) : filenamify(actualModel); const filePrefix = buildHuggingFaceFilePrefix(user, actualModel, defaultHuggingFaceBranch, [], baseFilename + ".gguf"); return { type: "unresolved", uri: `hf:${user}/${actualModel}${resolvedTag !== "" ? `:${resolvedTag}` : ""}`, filePrefix, baseFilename, possibleFullFilenames: assumedQuant != null ? [ `${filePrefix}${baseFilename}.${assumedQuant}.gguf`, `${filePrefix}${baseFilename}.${assumedQuant}-00001-of-${genericFilePartNumber}.gguf` ] : (resolvedTag != null && resolvedTag !== "" && resolvedTag !== "latest") ? [ `${filePrefix}${baseFilename}.${resolvedTag.toUpperCase()}.gguf`, `${filePrefix}${baseFilename}.${resolvedTag.toUpperCase()}-00001-of-${genericFilePartNumber}.gguf` ] : [ `${filePrefix}${baseFilename}.${defaultHuggingFaceFileQuantization}.gguf`, `${filePrefix}${baseFilename}.${defaultHuggingFaceFileQuantization}-00001-of-${genericFilePartNumber}.gguf`, `${filePrefix}${baseFilename}.gguf`, `${filePrefix}${baseFilename}-00001-of-${genericFilePartNumber}.gguf` ], resolveDetails: { type: "hf", user, model: actualModel, tag: resolvedTag } }; } const hashIndex = rest.indexOf("#"); let branch = defaultHuggingFaceBranch; if (hashIndex >= 0) { branch = rest.slice(hashIndex + "#".length); rest = rest.slice(0, hashIndex); if (branch === "") branch = defaultHuggingFaceBranch; } const filePathParts = rest.split("/"); const filePath = filePathParts .map((part) => encodeURIComponent(part)) .join("/"); if (!user || !model || filePath === "") throw new Error(`Invalid Hugging Face URI: ${fullUri}`); const resolvedUrl = normalizeGgufDownloadUrl([ "https://huggingface.co/", encodeURIComponent(user), "/", encodeURIComponent(model), "/resolve/", encodeURIComponent(branch), "/", filePath, "?download=true" ].join("")); const filename = resolveModelFilenameFromUrl(resolvedUrl); const filePrefix = buildHuggingFaceFilePrefix(user, model, branch, filePathParts.slice(0, -1), filename); return { type: "resolved", uri: `hf:${user}/${model}/${filePathParts.join("/")}${branch !== defaultHuggingFaceBranch ? `#${branch}` : ""}`, resolvedUrl, filePrefix, filename, fullFilename: `${filePrefix}${filename}` }; } function buildHuggingFaceFilePrefix(user, model, branch, pathParts, filename) { const res = ["hf"]; res.push(filenamify(user)); if (!doesFilenameMatchExactModelName(filename, model) || branch !== defaultHuggingFaceBranch) res.push(filenamify(model)); if (branch !== defaultHuggingFaceBranch) res.push(filenamify(branch)); if (pathParts.length > 0) { if (doesFilenameMatchExactFolderName(filename, pathParts.at(-1))) pathParts = pathParts.slice(0, -1); if (pathParts.length > 0) res.push(filenamify(pathParts.join("__"))); } return res.join("_") + "_"; } function resolveModelFilenameFromUrl(modelUrl) { const binarySplitPartUrls = resolveBinarySplitGgufPartUrls(modelUrl); if (binarySplitPartUrls instanceof Array) return getFilenameForBinarySplitGgufPartUrls(binarySplitPartUrls); const parsedUrl = new URL(modelUrl); const ggufIndex = parsedUrl.pathname.toLowerCase().indexOf(".gguf"); const urlWithoutPart = parsedUrl.pathname.slice(0, ggufIndex + ".gguf".length); const filename = decodeURIComponent(urlWithoutPart.split("/").pop()); return filenamify(filename); } function doesFilenameMatchExactModelName(filename, modelName) { if (!modelName.toLowerCase().endsWith("-gguf") || !filename.toLowerCase().endsWith(".gguf")) return false; const modelNameWithoutGguf = modelName.slice(0, -"-gguf".length); const filenameWithoutGguf = filename.slice(0, -".gguf".length); if (filenameWithoutGguf.toLowerCase().startsWith(modelNameWithoutGguf.toLowerCase())) return true; const splitPartsInfo = getGgufSplitPartsInfo(filename); if (splitPartsInfo == null) return false; const { matchLength } = splitPartsInfo; const filenameWithoutGgufAndWithoutSplitParts = filename.slice(0, filename.length - matchLength); return filenameWithoutGgufAndWithoutSplitParts.toLowerCase().startsWith(modelNameWithoutGguf.toLowerCase()); } function doesFilenameMatchExactFolderName(filename, folderName) { if (!filename.toLowerCase().endsWith(".gguf")) return false; const filenameWithoutGguf = filename.slice(0, -".gguf".length); if (folderName.toLowerCase() === filenameWithoutGguf.toLowerCase()) return true; const splitPartsInfo = getGgufSplitPartsInfo(filename); if (splitPartsInfo == null) return false; const { matchLength } = splitPartsInfo; const filenameWithoutGgufAndWithoutSplitParts = filename.slice(0, filename.length - matchLength); return folderName.toLowerCase() === filenameWithoutGgufAndWithoutSplitParts.toLowerCase(); } //# sourceMappingURL=parseModelUri.js.map