@huggingface/gguf
Version:
a GGUF parser that works on remotely hosted files
89 lines • 3.79 kB
TypeScript
import type { TransformerLLM } from "./transformer-llm";
import type { GGMLQuantizationType, GGMLFileQuantizationType } from "@huggingface/tasks";
export { GGMLQuantizationType } from "@huggingface/tasks";
export type MetadataBaseValue = string | number | bigint | boolean;
export type MetadataValue = MetadataBaseValue | MetadataBaseValue[] | MetadataValue[];
export type Version = 1 | 2 | 3;
export declare enum GGUFValueType {
UINT8 = 0,
INT8 = 1,
UINT16 = 2,
INT16 = 3,
UINT32 = 4,
INT32 = 5,
FLOAT32 = 6,
BOOL = 7,
STRING = 8,
ARRAY = 9,
UINT64 = 10,
INT64 = 11,
FLOAT64 = 12
}
declare const ARCHITECTURES: readonly ["llama", "deci", "falcon", "grok", "gpt2", "gptj", "gptneox", "mpt", "baichuan", "starcoder", "refact", "bert", "nomic-bert", "jina-bert-v2", "bloom", "stablelm", "qwen", "qwen2", "qwen2moe", "qwen2vl", "phi2", "phi3", "phimoe", "plamo", "codeshell", "orion", "internlm2", "minicpm", "minicpm3", "gemma", "gemma2", "starcoder2", "mamba", "xverse", "command-r", "cohere2", "dbrx", "olmo", "olmo2", "olmoe", "openelm", "arctic", "deepseek", "deepseek2", "chatglm", "bitnet", "t5", "t5encoder", "jais", "nemotron", "exaone", "rwkv6", "rwkv6qwen2", "granite", "granitemoe", "chameleon", "wavtokenizer-dec", "rwkv", "whisper"];
export type Architecture = (typeof ARCHITECTURES)[number];
export interface GGUFGeneralInfo<TArchitecture extends Architecture> {
"general.architecture": TArchitecture;
"general.name"?: string;
"general.file_type"?: GGMLFileQuantizationType;
"general.quantization_version"?: number;
}
type ModelMetadata = Whisper | RWKV | TransformerLLM;
interface NoModelMetadata {
"general.architecture"?: undefined;
}
export type ModelBase<TArchitecture extends Architecture | `encoder.${Extract<Architecture, "whisper">}` | `decoder.${Extract<Architecture, "whisper">}`> = Record<`${TArchitecture}.context_length` | `${TArchitecture}.block_count` | `${TArchitecture}.embedding_length` | `${TArchitecture}.feed_forward_length`, number>;
type TokenizerModel = "no_vocab" | "llama" | "gpt2" | "bert";
interface Tokenizer {
"tokenizer.ggml.model": TokenizerModel;
"tokenizer.ggml.tokens": string[];
"tokenizer.ggml.scores": number[];
"tokenizer.ggml.token_type": number[];
"tokenizer.ggml.bos_token_id": number;
"tokenizer.ggml.eos_token_id": number;
"tokenizer.ggml.add_bos_token": boolean;
"tokenizer.chat_template"?: string;
}
interface NoTokenizer {
"tokenizer.ggml.model"?: undefined;
}
export type RWKV = GGUFGeneralInfo<"rwkv"> & ModelBase<"rwkv"> & {
"rwkv.architecture_version": number;
};
export type Whisper = GGUFGeneralInfo<"whisper"> & ModelBase<"encoder.whisper"> & ModelBase<"decoder.whisper"> & {
"whisper.encoder.mels_count": number;
"whisper.encoder.attention.head_count": number;
"whisper.decoder.attention.head_count": number;
};
export interface GGUFMetadataOptions {
/**
* Enable strict type for known GGUF fields.
*
* @default true
*/
strict: boolean;
}
export type GGUFMetadata<Options extends GGUFMetadataOptions = {
strict: true;
}> = {
version: Version;
tensor_count: bigint;
kv_count: bigint;
} & GGUFModelKV & (Options extends {
strict: true;
} ? unknown : Record<string, MetadataValue>);
export type GGUFModelKV = (NoModelMetadata | ModelMetadata) & (NoTokenizer | Tokenizer);
export interface GGUFTensorInfo {
name: string;
n_dims: number;
shape: bigint[];
dtype: GGMLQuantizationType;
offset: bigint;
}
export interface GGUFParseOutput<Options extends GGUFMetadataOptions = {
strict: true;
}> {
metadata: GGUFMetadata<Options>;
tensorInfos: GGUFTensorInfo[];
tensorDataOffset: bigint;
}
//# sourceMappingURL=types.d.ts.map