UNPKG

node-llama-cpp

Version:

Run AI models locally on your machine with node.js bindings for llama.cpp. Enforce a JSON schema on the model output on the generation level

468 lines 21.8 kB
import path from "path"; import process from "process"; import chalk from "chalk"; import fs from "fs-extra"; import stripAnsi from "strip-ansi"; import logSymbols from "log-symbols"; import { getReadableContextSize } from "../../utils/getReadableContextSize.js"; import { arrowChar } from "../../consts.js"; import { getGgufSplitPartsInfo } from "../../gguf/utils/resolveSplitGgufParts.js"; import { withProgressLog } from "../../utils/withProgressLog.js"; import { GgufInsights } from "../../gguf/insights/GgufInsights.js"; import { readGgufFileInfo } from "../../gguf/readGgufFileInfo.js"; import { getPrettyBuildGpuName } from "../../bindings/consts.js"; import { isUrl } from "../../utils/isUrl.js"; import { isModelUri, parseModelUri } from "../../utils/parseModelUri.js"; import { resolveModelRecommendationFileOptions } from "./resolveModelRecommendationFileOptions.js"; import { getReadablePath } from "./getReadablePath.js"; import { basicChooseFromListConsoleInteraction } from "./basicChooseFromListConsoleInteraction.js"; import { splitAnsiToLines } from "./splitAnsiToLines.js"; import { consolePromptQuestion } from "./consolePromptQuestion.js"; import { renderInfoLine } from "./printInfoLine.js"; import { renderModelCompatibilityPercentageWithColors } from "./renderModelCompatibilityPercentageWithColors.js"; import { toBytes } from "./toBytes.js"; const vramStateUpdateInterval = 1000; export async function interactivelyAskForModel({ llama, modelsDirectory, allowLocalModels = true, downloadIntent = true, flashAttention = false, useMmap }) { let localModelFileOptions = []; const recommendedModelOptions = []; const activeInteractionController = new AbortController(); let scheduledTitleRerenderTimeout = undefined; let vramState = await llama.getVramState(); const canUseGpu = vramState.total > 0; if (allowLocalModels && modelsDirectory != null && await fs.existsSync(modelsDirectory)) { const ggufFileNames = (await fs.readdir(modelsDirectory)) .filter((fileName) => { if (!fileName.endsWith(".gguf")) return false; const partsInfo = getGgufSplitPartsInfo(fileName); return partsInfo == null || partsInfo.part === 1; }); let readItems = 0; const renderProgress = () => ("(" + String(readItems) .padStart(String(ggufFileNames.length).length, "0") + "/" + ggufFileNames.length + ")"); if (ggufFileNames.length > 0) await withProgressLog({ loadingText: "Reading local models directory", failText: "Failed to read local models directory", successText: "Read local models directory", noSuccessLiveStatus: true, initialProgressBarText: renderProgress() }, async (progressUpdater) => { localModelFileOptions = await Promise.all(ggufFileNames.map(async (fileName) => { const filePath = path.join(modelsDirectory, fileName); let ggufInsights = undefined; try { const ggufFileInfo = await readGgufFileInfo(filePath, { sourceType: "filesystem", signal: activeInteractionController.signal }); ggufInsights = await GgufInsights.from(ggufFileInfo, llama); } catch (err) { // do nothing } readItems++; progressUpdater.setProgress(readItems / ggufFileNames.length, renderProgress()); const compatibilityScore = await ggufInsights?.configurationResolver.scoreModelConfigurationCompatibility({ flashAttention: flashAttention && ggufInsights?.flashAttentionSupported, useMmap }); return { type: "localModel", title: fileName, path: filePath, addedDate: (await fs.stat(filePath)).birthtimeMs, ggufInsights: ggufInsights, compatibilityScore: compatibilityScore?.compatibilityScore, compatibilityBonusScore: compatibilityScore?.bonusScore, compatibilityContextSize: compatibilityScore?.resolvedValues.contextSize }; })); localModelFileOptions = localModelFileOptions.sort((a, b) => { if (a.compatibilityScore == null && b.compatibilityScore == null) return b.addedDate - a.addedDate; else if (a.compatibilityScore == null) return -1; else if (b.compatibilityScore == null) return 1; else if (b.compatibilityScore === a.compatibilityScore && b.compatibilityBonusScore != null && a.compatibilityBonusScore != null) return b.compatibilityBonusScore - a.compatibilityBonusScore; return b.compatibilityScore - a.compatibilityScore; }); }); } try { // if this file gets very big, we don't want to load it on every CLI usage const { recommendedModels } = await import("../recommendedModels.js"); for (const recommendedModel of recommendedModels) { const potentialUris = resolveModelRecommendationFileOptions(recommendedModel); if (potentialUris.length > 0) recommendedModelOptions.push({ type: "recommendedModel", title: recommendedModel.name, potentialUris, description: recommendedModel.description }); } } catch (err) { // do nothing } let initialFocusIndex = 3; // first model option const options = [ { type: "action", text: allowLocalModels ? "Enter a model URI or file path..." : "Enter a model URI...", key: "getPath" }, ...((localModelFileOptions.length === 0 || modelsDirectory == null) ? [] : [ { type: "separator", text: () => " " + chalk.gray("-".repeat(4)) }, { type: "separator", text: " " + chalk.bold("Downloaded models") + " " + chalk.dim(`(${getReadablePath(modelsDirectory)})`) }, ...localModelFileOptions ]), ...(recommendedModelOptions.length === 0 ? [] : [ { type: "separator", text: () => " " + chalk.gray("-".repeat(4)) }, { type: "separator", text: " " + chalk.bold("Recommended models") + (downloadIntent ? (" " + chalk.dim("(select to download)")) : "") }, ...recommendedModelOptions ]) ]; try { while (true) { const minWidth = Math.min(80 + (flashAttention ? 26 : 0), process.stdout.columns - 1); const selectedItem = await basicChooseFromListConsoleInteraction({ title(item, rerender) { const title = chalk.bold("Select a model:") + " "; const vramStateText = vramState.total === 0 ? chalk.bgGray(" " + "No GPU" + " ") : (chalk.bgGray(" " + chalk.yellow("GPU:") + " " + getPrettyBuildGpuName(llama.gpu) + " ") + " " + chalk.bgGray(" " + chalk.yellow("VRAM usage:") + " " + (String(Math.floor((vramState.used / vramState.total) * 100 * 100) / 100) + "%") + " " + chalk.dim("(" + toBytes(vramState.used) + "/" + toBytes(vramState.total) + ")") + " ") + (!flashAttention ? "" : (" " + chalk.bgGray(" " + chalk.yellow("Flash attention:") + " " + "enabled" + " ")))); const pad = Math.max(0, minWidth - (stripAnsi(title).length + stripAnsi(vramStateText).length)); clearTimeout(scheduledTitleRerenderTimeout); scheduledTitleRerenderTimeout = setTimeout(async () => { const newVramState = await llama.getVramState(); if (vramState.used !== newVramState.used || vramState.total !== newVramState.total) { vramState = newVramState; rerender(); } }, vramStateUpdateInterval); return [ title, " ".repeat(pad), vramStateText ].join(""); }, footer(item) { if (item.type !== "recommendedModel" || item.description == null) return undefined; const leftPad = 3; const maxWidth = Math.max(1, process.stdout.columns - 2 - leftPad); const lines = splitAnsiToLines(item.description, maxWidth); return " \n" + " ".repeat(leftPad) + chalk.bold.gray("Model description") + "\n" + lines.map((line) => (" ".repeat(leftPad) + line)) .join("\n") + "\n" + splitAnsiToLines(renderRecommendedModelTechnicalInfo(item.selectedUri, maxWidth, canUseGpu), maxWidth) .map((line) => (" ".repeat(leftPad) + line)) .join("\n"); }, items: options, renderItem(item, focused, rerender) { return renderSelectionItem(item, focused, rerender, activeInteractionController.signal, llama, flashAttention, useMmap); }, canFocusItem(item) { return item.type === "recommendedModel" || item.type === "localModel" || item.type === "action"; }, canSelectItem(item) { if (item.type === "recommendedModel") return item.selectedUri != null; return item.type === "localModel" || item.type === "action"; }, initialFocusIndex: Math.min(initialFocusIndex, options.length - 1), aboveItemsPadding: 1, belowItemsPadding: 1, renderSummaryOnExit(item) { if (item == null || item.type === "action" || item.type === "separator") return ""; else if (item.type === "localModel") { const modelTitle = item.title instanceof Function ? item.title() : item.title; return logSymbols.success + " Selected model " + chalk.blue(modelTitle); } else if (item.type === "recommendedModel") { const modelTitle = item.title instanceof Function ? item.title() : item.title; return logSymbols.success + " Selected model " + chalk.blue(modelTitle); } void item; return ""; }, exitOnCtrlC: true }); if (selectedItem == null || selectedItem.type === "separator") continue; else if (selectedItem.type === "localModel") return selectedItem.path; else if (selectedItem.type === "recommendedModel" && selectedItem.selectedUri != null) return selectedItem.selectedUri.uri; else if (selectedItem.type === "action") { if (selectedItem.key === "getPath") { initialFocusIndex = 0; const selectedModelUriOrPath = await askForModelUriOrPath(allowLocalModels); if (selectedModelUriOrPath == null) continue; return selectedModelUriOrPath; } } } } finally { activeInteractionController.abort(); } } async function askForModelUriOrPath(allowLocalModels) { return await consolePromptQuestion(allowLocalModels ? chalk.bold("Enter a model URI or file path: ") : chalk.bold("Enter a model URI: "), { exitOnCtrlC: false, async validate(input) { if (isUrl(input, false)) { try { new URL(input); } catch (err) { return "Invalid URL"; } return null; } try { if (parseModelUri(input) != null) return null; } catch (err) { return err instanceof Error ? (err?.message || "Invalid model URI") : "Invalid model URI"; } if (!allowLocalModels) return "Only URIs are allowed"; try { if (await fs.pathExists(input)) return null; return "File does not exist"; } catch (err) { return "Invalid path"; } }, renderSummaryOnExit(item) { if (item == null) return ""; if (isUrl(item, false)) return logSymbols.success + " Entered model URL " + chalk.blue(item); else if (isModelUri(item)) { return logSymbols.success + " Entered model URI " + chalk.blue(item); } else return logSymbols.success + " Entered model path " + chalk.blue(item); } }); } function renderSelectionItem(item, focused, rerender, abortSignal, llama, flashAttention, useMmap) { if (item.type === "localModel") { let modelText = item.title instanceof Function ? item.title() : item.title; if (item.ggufInsights != null) modelText += " " + renderModelCompatibility(item.ggufInsights, item.compatibilityScore, item.compatibilityContextSize); else modelText += " " + chalk.bgGray.yellow(" Cannot read metadata "); return renderSelectableItem(modelText, focused); } else if (item.type === "recommendedModel") { let modelText = item.title instanceof Function ? item.title() : item.title; if (item.selectedUri == null) { if (item.uriSelectionLoadingState == null) { item.uriSelectionLoadingState = "loading"; void selectFileForModelRecommendation({ recommendedModelOption: item, abortSignal, rerenderOption: rerender, llama, flashAttention, useMmap }); } if (item.uriSelectionLoadingState === "loading") modelText += " " + chalk.bgGray.yellow(" Loading info "); else if (item.uriSelectionLoadingState === "done") modelText += " " + chalk.bgGray.yellow(" Failed to load info "); else void item.uriSelectionLoadingState; } else modelText += " " + renderModelCompatibility(item.selectedUri.ggufInsights, item.selectedUri.compatibilityScore.compatibilityScore, item.selectedUri.compatibilityScore.resolvedValues.contextSize); return renderSelectableItem(modelText, focused); } else if (item.type === "separator") { return item.text instanceof Function ? item.text() : item.text; } else if (item.type === "action") { const actionText = item.text instanceof Function ? item.text() : item.text; return renderSelectableItem(actionText, focused); } void item; return ""; } function renderSelectableItem(text, focused) { if (focused) return " " + chalk.cyan(arrowChar) + " " + chalk.cyan(text); return " * " + text; } function renderModelCompatibility(ggufInsights, compatibilityScore, compatibilityContextSize) { const info = []; if (compatibilityScore != null) info.push(renderModelCompatibilityPercentageWithColors(compatibilityScore * 100) + chalk.whiteBright(" compatibility") + (compatibilityContextSize == null ? "" : (chalk.gray(" | ") + chalk.yellow(getReadableContextSize(compatibilityContextSize)) + chalk.whiteBright(" context")))); info.push(chalk.yellow("Size:") + " " + chalk.whiteBright(toBytes(ggufInsights.modelSize))); return info .map((item) => chalk.bgGray(" " + item + " ")) .join(" "); } function renderRecommendedModelTechnicalInfo(modelSelectedUri, maxWidth, canUseGpu) { if (modelSelectedUri == null) return " \n" + chalk.bgGray.yellow(" Loading info ") + "\n "; const ggufInsights = modelSelectedUri.ggufInsights; const compatibilityScore = modelSelectedUri.compatibilityScore; const longestTitle = Math.max("Model info".length, "Resolved config".length) + 1; return " \n" + [ renderInfoLine({ title: "Model info", padTitle: longestTitle, separateLines: false, maxWidth, info: [{ title: "Size", value: toBytes(ggufInsights.modelSize) }, { show: ggufInsights.trainContextSize != null, title: "Train context size", value: () => getReadableContextSize(ggufInsights.trainContextSize ?? 0) }] }), renderInfoLine({ title: "Resolved config", padTitle: longestTitle, separateLines: false, maxWidth, info: [{ title: "", value: renderModelCompatibilityPercentageWithColors(compatibilityScore.compatibilityScore * 100) + " compatibility" }, { show: ggufInsights.trainContextSize != null, title: "Context size", value: getReadableContextSize(compatibilityScore.resolvedValues.contextSize) }, { show: canUseGpu, title: "GPU layers", value: () => (compatibilityScore.resolvedValues.gpuLayers + "/" + ggufInsights.totalLayers + " " + chalk.dim(`(${Math.floor((compatibilityScore.resolvedValues.gpuLayers / ggufInsights.totalLayers) * 100)}%)`)) }, { show: canUseGpu, title: "VRAM usage", value: () => toBytes(compatibilityScore.resolvedValues.totalVramUsage) }, { title: "RAM usage", value: () => toBytes(compatibilityScore.resolvedValues.totalRamUsage) }] }) ].join("\n"); } async function selectFileForModelRecommendation({ recommendedModelOption, llama, abortSignal, rerenderOption, flashAttention, useMmap }) { try { let bestScore = undefined; let bestScoreSelectedUri = undefined; for (const potentialUri of recommendedModelOption.potentialUris) { if (abortSignal.aborted) return; try { const ggufFileInfo = await readGgufFileInfo(potentialUri, { sourceType: "network", signal: abortSignal }); const ggufInsights = await GgufInsights.from(ggufFileInfo, llama); if (abortSignal.aborted) return; const compatibilityScore = await ggufInsights.configurationResolver.scoreModelConfigurationCompatibility({ flashAttention, useMmap }); if (bestScore == null || compatibilityScore.compatibilityScore > bestScore) { bestScore = compatibilityScore.compatibilityScore; bestScoreSelectedUri = { uri: potentialUri, ggufInsights, compatibilityScore }; if (bestScore === 1) break; } } catch (err) { // do nothing } } recommendedModelOption.selectedUri = bestScoreSelectedUri; recommendedModelOption.uriSelectionLoadingState = "done"; rerenderOption(); } catch (err) { recommendedModelOption.uriSelectionLoadingState = "done"; rerenderOption(); } } //# sourceMappingURL=interactivelyAskForModel.js.map