@huggingface/gguf
Version:
a GGUF parser that works on remotely hosted files
246 lines • 12.4 kB
TypeScript
/** This file is auto-generated by generate-llm.ts */
import type { ModelBase, GGUFGeneralInfo } from "./types";
type LLMBase<TArchitecture extends string> = Partial<Record<`${TArchitecture}.vocab_size` | `${TArchitecture}.use_parallel_residual` | `${TArchitecture}.tensor_data_layout`, number>>;
type Attention<TArchitecture extends string> = Record<`${TArchitecture}.attention.head_count`, number> & Partial<Record<`${TArchitecture}.attention.head_count_kv` | `${TArchitecture}.attention.key_length` | `${TArchitecture}.attention.value_length`, number>>;
export type TransformerLLMRopeScalingType = "none" | "linear" | "yarn";
type Rope<TArchitecture extends LLMArchitecture> = Partial<Record<`${TArchitecture}.rope.dimension_count` | `${TArchitecture}.rope.freq_base` | `${TArchitecture}.rope.scale_linear` | `${TArchitecture}.rope.scaling.factor` | `${TArchitecture}.rope.scaling.original_context_length`, number> & Record<`${TArchitecture}.rope.scaling.type`, TransformerLLMRopeScalingType> & Record<`${TArchitecture}.rope.finetuned`, boolean>>;
type MOE<TArchitecture extends LLMArchitecture> = Partial<Record<`${TArchitecture}.expert_count` | `${TArchitecture}.expert_used_count`, number>>;
export type TransformerLLMArchitecture = LLMArchitecture;
export type TransformerLLMBase<TArchitecture extends LLMArchitecture> = GGUFGeneralInfo<TArchitecture> & LLMBase<TArchitecture> & ModelBase<TArchitecture> & MOE<TArchitecture> & Attention<TArchitecture> & Rope<TArchitecture>;
export declare enum TransformerLLMPoolingType {
UNSPECIFIED = -1,
NONE = 0,
MEAN = 1,
CLS = 2
}
export declare const LLM_ARCHITECTURES: readonly ["llama", "deci", "falcon", "grok", "gpt2", "gptj", "gptneox", "mpt", "baichuan", "starcoder", "refact", "bert", "nomic-bert", "jina-bert-v2", "bloom", "stablelm", "qwen", "qwen2", "qwen2moe", "qwen2vl", "phi2", "phi3", "phimoe", "plamo", "codeshell", "orion", "internlm2", "minicpm", "minicpm3", "gemma", "gemma2", "starcoder2", "mamba", "xverse", "command-r", "cohere2", "dbrx", "olmo", "olmo2", "olmoe", "openelm", "arctic", "deepseek", "deepseek2", "chatglm", "bitnet", "t5", "t5encoder", "jais", "nemotron", "exaone", "rwkv6", "rwkv6qwen2", "granite", "granitemoe", "chameleon", "wavtokenizer-dec"];
type LLMArchitecture = (typeof LLM_ARCHITECTURES)[number];
export type ArchLlama = TransformerLLMBase<"llama"> & {
"llama.attention.layer_norm_rms_epsilon": number;
};
export type ArchDeci = TransformerLLMBase<"deci"> & {
"deci.attention.layer_norm_rms_epsilon": number;
};
export type ArchFalcon = TransformerLLMBase<"falcon"> & {
"falcon.attention.layer_norm_epsilon": number;
};
export type ArchGrok = TransformerLLMBase<"grok"> & {
"grok.attention.layer_norm_rms_epsilon": number;
};
export type ArchGpt2 = TransformerLLMBase<"gpt2"> & {
"gpt2.attention.layer_norm_epsilon": number;
};
export type ArchGptj = TransformerLLMBase<"gptj">;
export type ArchGptneox = TransformerLLMBase<"gptneox"> & {
"gptneox.attention.layer_norm_epsilon": number;
"gptneox.use_parallel_residual": boolean;
};
export type ArchMpt = TransformerLLMBase<"mpt"> & {
"mpt.attention.layer_norm_epsilon": number;
"mpt.attention.clamp_kqv": number;
"mpt.attention.max_alibi_bias": number;
};
export type ArchBaichuan = TransformerLLMBase<"baichuan"> & {
"baichuan.attention.layer_norm_rms_epsilon": number;
};
export type ArchStarcoder = TransformerLLMBase<"starcoder"> & {
"starcoder.attention.layer_norm_epsilon": number;
};
export type ArchRefact = TransformerLLMBase<"refact"> & {
"refact.attention.layer_norm_rms_epsilon": number;
};
export type ArchBert = TransformerLLMBase<"bert"> & {
"bert.attention.layer_norm_epsilon": number;
"bert.attention.causal": boolean;
"bert.pooling_type": TransformerLLMPoolingType;
};
export type ArchNomicBert = TransformerLLMBase<"nomic-bert"> & {
"nomic-bert.attention.layer_norm_epsilon": number;
"nomic-bert.attention.causal": boolean;
"nomic-bert.pooling_type": TransformerLLMPoolingType;
};
export type ArchJinaBertV2 = TransformerLLMBase<"jina-bert-v2"> & {
"jina-bert-v2.attention.layer_norm_epsilon": number;
"jina-bert-v2.attention.causal": boolean;
"jina-bert-v2.pooling_type": TransformerLLMPoolingType;
};
export type ArchBloom = TransformerLLMBase<"bloom"> & {
"bloom.attention.layer_norm_epsilon": number;
};
export type ArchStablelm = TransformerLLMBase<"stablelm"> & {
"stablelm.attention.layer_norm_epsilon": number;
};
export type ArchQwen = TransformerLLMBase<"qwen"> & {
"qwen.attention.layer_norm_rms_epsilon": number;
};
export type ArchQwen2 = TransformerLLMBase<"qwen2"> & {
"qwen2.attention.layer_norm_rms_epsilon": number;
};
export type ArchQwen2moe = TransformerLLMBase<"qwen2moe"> & {
"qwen2moe.expert_feed_forward_length": number;
"qwen2moe.expert_shared_feed_forward_length": number;
"qwen2moe.attention.layer_norm_rms_epsilon": number;
};
export type ArchQwen2vl = TransformerLLMBase<"qwen2vl"> & {
"qwen2vl.rope.dimension_sections": number[];
};
export type ArchPhi2 = TransformerLLMBase<"phi2"> & {
"phi2.attention.layer_norm_epsilon": number;
};
export type ArchPhi3 = TransformerLLMBase<"phi3"> & {
"phi3.attention.layer_norm_rms_epsilon": number;
"phi3.attention.sliding_window": number;
};
export type ArchPhimoe = TransformerLLMBase<"phimoe"> & {
"phimoe.attention.layer_norm_rms_epsilon": number;
};
export type ArchPlamo = TransformerLLMBase<"plamo"> & {
"plamo.attention.layer_norm_rms_epsilon": number;
};
export type ArchCodeshell = TransformerLLMBase<"codeshell"> & {
"codeshell.attention.layer_norm_epsilon": number;
};
export type ArchOrion = TransformerLLMBase<"orion"> & {
"orion.attention.layer_norm_epsilon": number;
};
export type ArchInternlm2 = TransformerLLMBase<"internlm2"> & {
"internlm2.attention.layer_norm_rms_epsilon": number;
};
export type ArchMinicpm = TransformerLLMBase<"minicpm"> & {
"minicpm.attention.layer_norm_rms_epsilon": number;
"minicpm.embedding_scale": number;
"minicpm.residual_scale": number;
"minicpm.logit_scale": number;
};
export type ArchMinicpm3 = TransformerLLMBase<"minicpm3"> & {
"minicpm3.attention.layer_norm_rms_epsilon": number;
"minicpm3.attention.q_lora_rank": number;
"minicpm3.attention.kv_lora_rank": number;
};
export type ArchGemma = TransformerLLMBase<"gemma"> & {
"gemma.attention.layer_norm_rms_epsilon": number;
};
export type ArchGemma2 = TransformerLLMBase<"gemma2"> & {
"gemma2.attention.sliding_window": number;
"gemma2.attention.layer_norm_rms_epsilon": number;
"gemma2.attn_logit_softcapping": number;
"gemma2.final_logit_softcapping": number;
};
export type ArchStarcoder2 = TransformerLLMBase<"starcoder2"> & {
"starcoder2.attention.layer_norm_epsilon": number;
};
export type ArchMamba = TransformerLLMBase<"mamba"> & {
"mamba.ssm.conv_kernel": number;
"mamba.ssm.inner_size": number;
"mamba.ssm.state_size": number;
"mamba.ssm.time_step_rank": number;
"mamba.ssm.dt_b_c_rms": boolean;
"mamba.attention.layer_norm_rms_epsilon": number;
};
export type ArchXverse = TransformerLLMBase<"xverse"> & {
"xverse.attention.layer_norm_rms_epsilon": number;
};
export type ArchCommandR = TransformerLLMBase<"command-r"> & {
"command-r.logit_scale": number;
"command-r.attention.layer_norm_epsilon": number;
};
export type ArchCohere2 = TransformerLLMBase<"cohere2"> & {
"cohere2.attention.sliding_window": number;
"cohere2.logit_scale": number;
"cohere2.attention.layer_norm_epsilon": number;
};
export type ArchDbrx = TransformerLLMBase<"dbrx"> & {
"dbrx.attention.layer_norm_epsilon": number;
"dbrx.attention.clamp_kqv": number;
};
export type ArchOlmo = TransformerLLMBase<"olmo"> & {
"olmo.attention.layer_norm_epsilon": number;
"olmo.attention.clamp_kqv": number;
};
export type ArchOlmo2 = TransformerLLMBase<"olmo2"> & {
"olmo2.attention.layer_norm_rms_epsilon": number;
};
export type ArchOlmoe = TransformerLLMBase<"olmoe"> & {
"olmoe.attention.layer_norm_rms_epsilon": number;
};
export type ArchOpenelm = TransformerLLMBase<"openelm"> & {
"openelm.attention.layer_norm_rms_epsilon": number;
};
export type ArchArctic = TransformerLLMBase<"arctic"> & {
"arctic.attention.layer_norm_rms_epsilon": number;
};
export type ArchDeepseek = TransformerLLMBase<"deepseek"> & {
"deepseek.attention.layer_norm_rms_epsilon": number;
"deepseek.leading_dense_block_count": number;
"deepseek.expert_feed_forward_length": number;
"deepseek.expert_shared_count": number;
"deepseek.expert_weights_scale": number;
};
export type ArchDeepseek2 = TransformerLLMBase<"deepseek2"> & {
"deepseek2.attention.layer_norm_rms_epsilon": number;
"deepseek2.leading_dense_block_count": number;
"deepseek2.attention.q_lora_rank": number;
"deepseek2.attention.kv_lora_rank": number;
"deepseek2.expert_feed_forward_length": number;
"deepseek2.expert_shared_count": number;
"deepseek2.expert_weights_scale": number;
"deepseek2.expert_weights_norm": boolean;
"deepseek2.expert_gating_func": boolean;
"deepseek2.rope.scaling.yarn_log_multiplier": number;
};
export type ArchChatglm = TransformerLLMBase<"chatglm"> & {
"chatglm.attention.layer_norm_rms_epsilon": number;
};
export type ArchBitnet = TransformerLLMBase<"bitnet"> & {
"bitnet.attention.layer_norm_rms_epsilon": number;
};
export type ArchT5 = TransformerLLMBase<"t5"> & {
"t5.attention.layer_norm_rms_epsilon": number;
"t5.attention.relative_buckets_count": number;
"t5.decoder_start_token_id": number;
};
export type ArchT5encoder = TransformerLLMBase<"t5encoder"> & {
"t5encoder.attention.layer_norm_rms_epsilon": number;
"t5encoder.attention.relative_buckets_count": number;
};
export type ArchJais = TransformerLLMBase<"jais"> & {
"jais.attention.layer_norm_epsilon": number;
"jais.attention.max_alibi_bias": number;
};
export type ArchNemotron = TransformerLLMBase<"nemotron"> & {
"nemotron.attention.layer_norm_epsilon": number;
};
export type ArchExaone = TransformerLLMBase<"exaone"> & {
"exaone.attention.layer_norm_rms_epsilon": number;
};
export type ArchRwkv6 = TransformerLLMBase<"rwkv6">;
export type ArchRwkv6qwen2 = TransformerLLMBase<"rwkv6qwen2"> & {
"rwkv6qwen2.attention.layer_norm_epsilon": number;
"rwkv6qwen2.attention.layer_norm_rms_epsilon": number;
"rwkv6qwen2.wkv.head_size": number;
"rwkv6qwen2.time_mix_extra_dim": number;
"rwkv6qwen2.time_decay_extra_dim": number;
"rwkv6qwen2.rescale_every_n_layers": boolean;
"rwkv6qwen2.token_shift_count": boolean;
};
export type ArchGranite = TransformerLLMBase<"granite">;
export type ArchGraniteMoe = TransformerLLMBase<"granitemoe"> & {
"granitemoe.attention.layer_norm_rms_epsilon": number;
"granitemoe.logit_scale": number;
"granitemoe.residual_scale": number;
"granitemoe.embedding_scale": number;
"granitemoe.attention.scale": number;
};
export type ArchChameleon = TransformerLLMBase<"chameleon"> & {
"chameleon.attention.layer_norm_rms_epsilon": number;
"chameleon.swin_norm": boolean;
};
export type ArchWavtokenizerDec = TransformerLLMBase<"wavtokenizer-dec"> & {
"wavtokenizer-dec.attention.layer_norm_epsilon": number;
"wavtokenizer-dec.attention.group_norm_epsilon": number;
"wavtokenizer-dec.attention.group_norm_groups": number;
"wavtokenizer-dec.attention.causal": boolean;
};
export type TransformerLLM = ArchLlama | ArchDeci | ArchFalcon | ArchGrok | ArchGpt2 | ArchGptj | ArchGptneox | ArchMpt | ArchBaichuan | ArchStarcoder | ArchRefact | ArchBert | ArchNomicBert | ArchJinaBertV2 | ArchBloom | ArchStablelm | ArchQwen | ArchQwen2 | ArchQwen2moe | ArchQwen2vl | ArchPhi2 | ArchPhi3 | ArchPhimoe | ArchPlamo | ArchCodeshell | ArchOrion | ArchInternlm2 | ArchMinicpm | ArchMinicpm3 | ArchGemma | ArchGemma2 | ArchStarcoder2 | ArchMamba | ArchXverse | ArchCommandR | ArchCohere2 | ArchDbrx | ArchOlmo | ArchOlmo2 | ArchOlmoe | ArchOpenelm | ArchArctic | ArchDeepseek | ArchDeepseek2 | ArchChatglm | ArchBitnet | ArchT5 | ArchT5encoder | ArchJais | ArchNemotron | ArchExaone | ArchRwkv6 | ArchRwkv6qwen2 | ArchGranite | ArchGraniteMoe | ArchChameleon | ArchWavtokenizerDec;
export {};
//# sourceMappingURL=transformer-llm.d.ts.map