UNPKG

@huggingface/gguf

Version:

a GGUF parser that works on remotely hosted files

github.com/huggingface/huggingface.js

huggingface/huggingface.js

179 lines (177 loc) • 6.63 kB

JavaScript

#!/usr/bin/env node import { GGMLQuantizationType, GGML_QUANT_SIZES, ggufAllShards } from "./chunk-V6DSHS26.mjs"; // src/cli.ts var mapDtypeToName = Object.fromEntries(Object.entries(GGMLQuantizationType).map(([name, value]) => [value, name])); function showHelp(exitCode) { console.error("Usage: gguf-view [--help|-h] [--show-tensor] [--context|-c N] <path/to/gguf>"); console.error(" --help, -h Show this help message"); console.error(" --show-tensor Show tensor information"); console.error(" --context, -c N Number of tokens in context (default: 4096)"); process.exit(exitCode); } async function main() { let ggufPath = ""; let showTensors = false; let nCtx = 4096; for (let i = 2; i < process.argv.length; i++) { if (process.argv[i] === "--help" || process.argv[i] === "-h") { showHelp(0); } else if (process.argv[i] === "--show-tensor") { showTensors = true; } else if (process.argv[i] === "--context" || process.argv[i] === "-c") { nCtx = Number(process.argv[++i]); } else { ggufPath = process.argv[i]; } } if (!ggufPath.length) { console.error("Error: Missing path to gguf file"); showHelp(1); } const { shards } = await ggufAllShards(ggufPath, { allowLocalFile: true }); const { metadata, tensorInfos } = shards[0]; for (let i = 1; i < shards.length; i++) { tensorInfos.push(...shards[i].tensorInfos); } console.log(`* Dumping ${Object.keys(metadata).length} key/value pair(s)`); printTable( [ { name: "Idx", alignRight: true }, // { name: 'Type' }, // TODO: support this { name: "Count", alignRight: true }, { name: "Value" } ], Object.entries(metadata).map(([key, value], i) => { const MAX_LEN = 50; let strVal = ""; let count = 1; if (Array.isArray(value)) { strVal = JSON.stringify(value); count = value.length; } else if (value instanceof String || typeof value === "string") { strVal = JSON.stringify(value); } else { strVal = value.toString(); } strVal = strVal.length > MAX_LEN ? strVal.slice(0, MAX_LEN) + "..." : strVal; return [(i + 1).toString(), count.toString(), `${key} = ${strVal}`]; }) ); console.log(); console.log(`* Memory usage estimation (with context length of ${nCtx} tokens)`); try { const kvUsage = calcMemoryUsage(metadata, nCtx); let modelWeightInBytes = 0; for (const tensorInfo of tensorInfos) { const nElem = Number(tensorInfo.shape.reduce((a, b) => a * b, 1n)); const tensorSizeInBytes = nElem * (GGML_QUANT_SIZES[tensorInfo.dtype] / 8); modelWeightInBytes += tensorSizeInBytes; } const overhead = calcMemoryUsage(metadata, 256).totalBytes + modelWeightInBytes * 0.05; const totalMemoryUsage = kvUsage.totalBytes + overhead + modelWeightInBytes; printTable( [{ name: "Item" }, { name: "Memory usage", alignRight: true }], [ ["K cache", (kvUsage.totalBytesK / 1e9).toFixed(2) + " GB"], ["V cache", (kvUsage.totalBytesV / 1e9).toFixed(2) + " GB"], ["Weight", (modelWeightInBytes / 1e9).toFixed(2) + " GB"], ["Overhead", (overhead / 1e9).toFixed(2) + " GB"], ["", "---"], ["TOTAL", (totalMemoryUsage / 1e9).toFixed(2) + " GB"] ] ); } catch (e) { console.error(`Error: ${e.message}`); } if (showTensors) { console.log(); console.log(`* Dumping ${tensorInfos.length} tensor(s)`); printTable( [ { name: "Idx", alignRight: true }, { name: "Num Elements", alignRight: true }, { name: "Shape" }, { name: "Data Type" }, { name: "Name" } ], tensorInfos.map((tensorInfo, i) => { const shape = [1n, 1n, 1n, 1n]; tensorInfo.shape.forEach((dim, i2) => { shape[i2] = dim; }); return [ (i + 1).toString(), shape.reduce((acc, n) => acc * n, 1n).toString(), shape.map((n) => n.toString().padStart(6)).join(", "), mapDtypeToName[tensorInfo.dtype], tensorInfo.name ]; }) ); } else { console.log(); console.log(`* Use --show-tensor to display tensor information`); } } function calcMemoryUsage(metadata, kvSize, kvTypeK = GGMLQuantizationType.F16, kvTypeV = GGMLQuantizationType.F16) { const arch = metadata["general.architecture"] ?? "unknown"; const n_embd = metadata[`${arch}.embedding_length`] ?? 0; const n_head = metadata[`${arch}.attention.head_count`] ?? 0; const n_embd_head_k = metadata[`${arch}.attention.key_length`] ?? n_embd / n_head; const n_embd_head_v = metadata[`${arch}.attention.value_length`] ?? n_embd / n_head; const n_head_kv = metadata[`${arch}.attention.head_count_kv`] ?? []; const n_layer = metadata[`${arch}.block_count`] ?? 0; if (arch.startsWith("mamba") || arch.startsWith("rwkv")) { throw new Error(`Memory usage estimation for arch "${arch}" is not supported`); } const n_head_kv_arr = Array(n_layer).fill(n_head); if (Array.isArray(n_head_kv)) { for (let i = 0; i < n_layer; i++) { if (n_head_kv[i]) { n_head_kv_arr[i] = n_head_kv[i]; } } } else { for (let i = 0; i < n_layer; i++) { n_head_kv_arr[i] = n_head_kv; } } let totalElemsK = 0; let totalElemsV = 0; for (let i = 0; i < n_layer; i++) { const n_embd_k_gqa = n_embd_head_k * n_head_kv_arr[i]; const n_embd_v_gqa = n_embd_head_v * n_head_kv_arr[i]; totalElemsK += n_embd_k_gqa * kvSize; totalElemsV += n_embd_v_gqa * kvSize; } return { totalBytesK: totalElemsK * (GGML_QUANT_SIZES[kvTypeK] / 8), totalBytesV: totalElemsV * (GGML_QUANT_SIZES[kvTypeV] / 8), totalBytes: (totalElemsK + totalElemsV) * (GGML_QUANT_SIZES[kvTypeV] / 8) }; } function printTable(header, rows, leftPad = 2) { const leftPadStr = " ".repeat(leftPad); const columnWidths = header.map((h, i) => { const maxContentWidth = Math.max(h.name.length, ...rows.map((row) => (row[i] || "").length)); return h.maxWidth ? Math.min(maxContentWidth, h.maxWidth) : maxContentWidth; }); const headerLine = header.map((h, i) => { return h.name.padEnd(columnWidths[i]); }).join(" | "); console.log(leftPadStr + headerLine); console.log(leftPadStr + columnWidths.map((w) => "-".repeat(w)).join("-|-")); for (const row of rows) { const line = header.map((h, i) => { return h.alignRight ? (row[i] || "").padStart(columnWidths[i]) : (row[i] || "").padEnd(columnWidths[i]); }).join(" | "); console.log(leftPadStr + line); } } main();