UNPKG

@huggingface/gguf

Version:

a GGUF parser that works on remotely hosted files

400 lines (390 loc) 12.2 kB
/** This file is auto-generated by generate-llm.ts */ import type { ModelBase, GGUFGeneralInfo } from "./types"; type LLMBase<TArchitecture extends string> = Partial< Record< `${TArchitecture}.vocab_size` | `${TArchitecture}.use_parallel_residual` | `${TArchitecture}.tensor_data_layout`, number > >; type Attention<TArchitecture extends string> = Record<`${TArchitecture}.attention.head_count`, number> & Partial< Record< | `${TArchitecture}.attention.head_count_kv` | `${TArchitecture}.attention.key_length` | `${TArchitecture}.attention.value_length`, number > >; export type TransformerLLMRopeScalingType = "none" | "linear" | "yarn"; type Rope<TArchitecture extends LLMArchitecture> = Partial< Record< | `${TArchitecture}.rope.dimension_count` | `${TArchitecture}.rope.freq_base` | `${TArchitecture}.rope.scale_linear` | `${TArchitecture}.rope.scaling.factor` | `${TArchitecture}.rope.scaling.original_context_length`, number > & Record<`${TArchitecture}.rope.scaling.type`, TransformerLLMRopeScalingType> & Record<`${TArchitecture}.rope.finetuned`, boolean> >; type MOE<TArchitecture extends LLMArchitecture> = Partial< Record<`${TArchitecture}.expert_count` | `${TArchitecture}.expert_used_count`, number> >; export type TransformerLLMArchitecture = LLMArchitecture; // type alias export type TransformerLLMBase<TArchitecture extends LLMArchitecture> = GGUFGeneralInfo<TArchitecture> & LLMBase<TArchitecture> & ModelBase<TArchitecture> & MOE<TArchitecture> & Attention<TArchitecture> & Rope<TArchitecture>; export enum TransformerLLMPoolingType { UNSPECIFIED = -1, NONE = 0, MEAN = 1, CLS = 2, } export const LLM_ARCHITECTURES = [ "llama", "deci", "falcon", "grok", "gpt2", "gptj", "gptneox", "mpt", "baichuan", "starcoder", "refact", "bert", "nomic-bert", "jina-bert-v2", "bloom", "stablelm", "qwen", "qwen2", "qwen2moe", "qwen2vl", "phi2", "phi3", "phimoe", "plamo", "codeshell", "orion", "internlm2", "minicpm", "minicpm3", "gemma", "gemma2", "starcoder2", "mamba", "xverse", "command-r", "cohere2", "dbrx", "olmo", "olmo2", "olmoe", "openelm", "arctic", "deepseek", "deepseek2", "chatglm", "bitnet", "t5", "t5encoder", "jais", "nemotron", "exaone", "rwkv6", "rwkv6qwen2", "granite", "granitemoe", "chameleon", "wavtokenizer-dec", ] as const; type LLMArchitecture = (typeof LLM_ARCHITECTURES)[number]; export type ArchLlama = TransformerLLMBase<"llama"> & { "llama.attention.layer_norm_rms_epsilon": number; }; export type ArchDeci = TransformerLLMBase<"deci"> & { "deci.attention.layer_norm_rms_epsilon": number; }; export type ArchFalcon = TransformerLLMBase<"falcon"> & { "falcon.attention.layer_norm_epsilon": number; }; export type ArchGrok = TransformerLLMBase<"grok"> & { "grok.attention.layer_norm_rms_epsilon": number; }; export type ArchGpt2 = TransformerLLMBase<"gpt2"> & { "gpt2.attention.layer_norm_epsilon": number; }; export type ArchGptj = TransformerLLMBase<"gptj">; export type ArchGptneox = TransformerLLMBase<"gptneox"> & { "gptneox.attention.layer_norm_epsilon": number; "gptneox.use_parallel_residual": boolean; }; export type ArchMpt = TransformerLLMBase<"mpt"> & { "mpt.attention.layer_norm_epsilon": number; "mpt.attention.clamp_kqv": number; "mpt.attention.max_alibi_bias": number; }; export type ArchBaichuan = TransformerLLMBase<"baichuan"> & { "baichuan.attention.layer_norm_rms_epsilon": number; }; export type ArchStarcoder = TransformerLLMBase<"starcoder"> & { "starcoder.attention.layer_norm_epsilon": number; }; export type ArchRefact = TransformerLLMBase<"refact"> & { "refact.attention.layer_norm_rms_epsilon": number; }; export type ArchBert = TransformerLLMBase<"bert"> & { "bert.attention.layer_norm_epsilon": number; "bert.attention.causal": boolean; "bert.pooling_type": TransformerLLMPoolingType; }; export type ArchNomicBert = TransformerLLMBase<"nomic-bert"> & { "nomic-bert.attention.layer_norm_epsilon": number; "nomic-bert.attention.causal": boolean; "nomic-bert.pooling_type": TransformerLLMPoolingType; }; export type ArchJinaBertV2 = TransformerLLMBase<"jina-bert-v2"> & { "jina-bert-v2.attention.layer_norm_epsilon": number; "jina-bert-v2.attention.causal": boolean; "jina-bert-v2.pooling_type": TransformerLLMPoolingType; }; export type ArchBloom = TransformerLLMBase<"bloom"> & { "bloom.attention.layer_norm_epsilon": number; }; export type ArchStablelm = TransformerLLMBase<"stablelm"> & { "stablelm.attention.layer_norm_epsilon": number; }; export type ArchQwen = TransformerLLMBase<"qwen"> & { "qwen.attention.layer_norm_rms_epsilon": number; }; export type ArchQwen2 = TransformerLLMBase<"qwen2"> & { "qwen2.attention.layer_norm_rms_epsilon": number; }; export type ArchQwen2moe = TransformerLLMBase<"qwen2moe"> & { "qwen2moe.expert_feed_forward_length": number; "qwen2moe.expert_shared_feed_forward_length": number; "qwen2moe.attention.layer_norm_rms_epsilon": number; }; export type ArchQwen2vl = TransformerLLMBase<"qwen2vl"> & { "qwen2vl.rope.dimension_sections": number[]; }; export type ArchPhi2 = TransformerLLMBase<"phi2"> & { "phi2.attention.layer_norm_epsilon": number; }; export type ArchPhi3 = TransformerLLMBase<"phi3"> & { "phi3.attention.layer_norm_rms_epsilon": number; "phi3.attention.sliding_window": number; }; export type ArchPhimoe = TransformerLLMBase<"phimoe"> & { "phimoe.attention.layer_norm_rms_epsilon": number; }; export type ArchPlamo = TransformerLLMBase<"plamo"> & { "plamo.attention.layer_norm_rms_epsilon": number; }; export type ArchCodeshell = TransformerLLMBase<"codeshell"> & { "codeshell.attention.layer_norm_epsilon": number; }; export type ArchOrion = TransformerLLMBase<"orion"> & { "orion.attention.layer_norm_epsilon": number; }; export type ArchInternlm2 = TransformerLLMBase<"internlm2"> & { "internlm2.attention.layer_norm_rms_epsilon": number; }; export type ArchMinicpm = TransformerLLMBase<"minicpm"> & { "minicpm.attention.layer_norm_rms_epsilon": number; "minicpm.embedding_scale": number; "minicpm.residual_scale": number; "minicpm.logit_scale": number; }; export type ArchMinicpm3 = TransformerLLMBase<"minicpm3"> & { "minicpm3.attention.layer_norm_rms_epsilon": number; "minicpm3.attention.q_lora_rank": number; "minicpm3.attention.kv_lora_rank": number; }; export type ArchGemma = TransformerLLMBase<"gemma"> & { "gemma.attention.layer_norm_rms_epsilon": number; }; export type ArchGemma2 = TransformerLLMBase<"gemma2"> & { "gemma2.attention.sliding_window": number; "gemma2.attention.layer_norm_rms_epsilon": number; "gemma2.attn_logit_softcapping": number; "gemma2.final_logit_softcapping": number; }; export type ArchStarcoder2 = TransformerLLMBase<"starcoder2"> & { "starcoder2.attention.layer_norm_epsilon": number; }; export type ArchMamba = TransformerLLMBase<"mamba"> & { "mamba.ssm.conv_kernel": number; "mamba.ssm.inner_size": number; "mamba.ssm.state_size": number; "mamba.ssm.time_step_rank": number; "mamba.ssm.dt_b_c_rms": boolean; "mamba.attention.layer_norm_rms_epsilon": number; }; export type ArchXverse = TransformerLLMBase<"xverse"> & { "xverse.attention.layer_norm_rms_epsilon": number; }; export type ArchCommandR = TransformerLLMBase<"command-r"> & { "command-r.logit_scale": number; "command-r.attention.layer_norm_epsilon": number; }; export type ArchCohere2 = TransformerLLMBase<"cohere2"> & { "cohere2.attention.sliding_window": number; "cohere2.logit_scale": number; "cohere2.attention.layer_norm_epsilon": number; }; export type ArchDbrx = TransformerLLMBase<"dbrx"> & { "dbrx.attention.layer_norm_epsilon": number; "dbrx.attention.clamp_kqv": number; }; export type ArchOlmo = TransformerLLMBase<"olmo"> & { "olmo.attention.layer_norm_epsilon": number; "olmo.attention.clamp_kqv": number; }; export type ArchOlmo2 = TransformerLLMBase<"olmo2"> & { "olmo2.attention.layer_norm_rms_epsilon": number; }; export type ArchOlmoe = TransformerLLMBase<"olmoe"> & { "olmoe.attention.layer_norm_rms_epsilon": number; }; export type ArchOpenelm = TransformerLLMBase<"openelm"> & { "openelm.attention.layer_norm_rms_epsilon": number; }; export type ArchArctic = TransformerLLMBase<"arctic"> & { "arctic.attention.layer_norm_rms_epsilon": number; }; export type ArchDeepseek = TransformerLLMBase<"deepseek"> & { "deepseek.attention.layer_norm_rms_epsilon": number; "deepseek.leading_dense_block_count": number; "deepseek.expert_feed_forward_length": number; "deepseek.expert_shared_count": number; "deepseek.expert_weights_scale": number; }; export type ArchDeepseek2 = TransformerLLMBase<"deepseek2"> & { "deepseek2.attention.layer_norm_rms_epsilon": number; "deepseek2.leading_dense_block_count": number; "deepseek2.attention.q_lora_rank": number; "deepseek2.attention.kv_lora_rank": number; "deepseek2.expert_feed_forward_length": number; "deepseek2.expert_shared_count": number; "deepseek2.expert_weights_scale": number; "deepseek2.expert_weights_norm": boolean; "deepseek2.expert_gating_func": boolean; "deepseek2.rope.scaling.yarn_log_multiplier": number; }; export type ArchChatglm = TransformerLLMBase<"chatglm"> & { "chatglm.attention.layer_norm_rms_epsilon": number; }; export type ArchBitnet = TransformerLLMBase<"bitnet"> & { "bitnet.attention.layer_norm_rms_epsilon": number; }; export type ArchT5 = TransformerLLMBase<"t5"> & { "t5.attention.layer_norm_rms_epsilon": number; "t5.attention.relative_buckets_count": number; "t5.decoder_start_token_id": number; }; export type ArchT5encoder = TransformerLLMBase<"t5encoder"> & { "t5encoder.attention.layer_norm_rms_epsilon": number; "t5encoder.attention.relative_buckets_count": number; }; export type ArchJais = TransformerLLMBase<"jais"> & { "jais.attention.layer_norm_epsilon": number; "jais.attention.max_alibi_bias": number; }; export type ArchNemotron = TransformerLLMBase<"nemotron"> & { "nemotron.attention.layer_norm_epsilon": number; }; export type ArchExaone = TransformerLLMBase<"exaone"> & { "exaone.attention.layer_norm_rms_epsilon": number; }; export type ArchRwkv6 = TransformerLLMBase<"rwkv6">; export type ArchRwkv6qwen2 = TransformerLLMBase<"rwkv6qwen2"> & { "rwkv6qwen2.attention.layer_norm_epsilon": number; "rwkv6qwen2.attention.layer_norm_rms_epsilon": number; "rwkv6qwen2.wkv.head_size": number; "rwkv6qwen2.time_mix_extra_dim": number; "rwkv6qwen2.time_decay_extra_dim": number; "rwkv6qwen2.rescale_every_n_layers": boolean; "rwkv6qwen2.token_shift_count": boolean; }; export type ArchGranite = TransformerLLMBase<"granite">; export type ArchGraniteMoe = TransformerLLMBase<"granitemoe"> & { "granitemoe.attention.layer_norm_rms_epsilon": number; "granitemoe.logit_scale": number; "granitemoe.residual_scale": number; "granitemoe.embedding_scale": number; "granitemoe.attention.scale": number; }; export type ArchChameleon = TransformerLLMBase<"chameleon"> & { "chameleon.attention.layer_norm_rms_epsilon": number; "chameleon.swin_norm": boolean; }; export type ArchWavtokenizerDec = TransformerLLMBase<"wavtokenizer-dec"> & { "wavtokenizer-dec.attention.layer_norm_epsilon": number; "wavtokenizer-dec.attention.group_norm_epsilon": number; "wavtokenizer-dec.attention.group_norm_groups": number; "wavtokenizer-dec.attention.causal": boolean; }; export type TransformerLLM = | ArchLlama | ArchDeci | ArchFalcon | ArchGrok | ArchGpt2 | ArchGptj | ArchGptneox | ArchMpt | ArchBaichuan | ArchStarcoder | ArchRefact | ArchBert | ArchNomicBert | ArchJinaBertV2 | ArchBloom | ArchStablelm | ArchQwen | ArchQwen2 | ArchQwen2moe | ArchQwen2vl | ArchPhi2 | ArchPhi3 | ArchPhimoe | ArchPlamo | ArchCodeshell | ArchOrion | ArchInternlm2 | ArchMinicpm | ArchMinicpm3 | ArchGemma | ArchGemma2 | ArchStarcoder2 | ArchMamba | ArchXverse | ArchCommandR | ArchCohere2 | ArchDbrx | ArchOlmo | ArchOlmo2 | ArchOlmoe | ArchOpenelm | ArchArctic | ArchDeepseek | ArchDeepseek2 | ArchChatglm | ArchBitnet | ArchT5 | ArchT5encoder | ArchJais | ArchNemotron | ArchExaone | ArchRwkv6 | ArchRwkv6qwen2 | ArchGranite | ArchGraniteMoe | ArchChameleon | ArchWavtokenizerDec;