UNPKG

node-llama-cpp

Version:

Run AI models locally on your machine with node.js bindings for llama.cpp. Enforce a JSON schema on the model output on the generation level

390 lines (389 loc) 13.4 kB
export declare const enum GgufArchitectureType { llama = "llama", falcon = "falcon", grok = "grok", gpt2 = "gpt2", gptj = "gptj", gptneox = "gptneox", mpt = "mpt", baichuan = "baichuan", starcoder = "starcoder", refact = "refact", bert = "bert", nomicBert = "nomic-bert", jinaBertV2 = "jina-bert-v2", bloom = "bloom", stablelm = "stablelm", qwen = "qwen", qwen2 = "qwen2", qwen2moe = "qwen2moe", phi2 = "phi2", phi3 = "phi3", plamo = "plamo", codeshell = "codeshell", orion = "orion", internlm2 = "internlm2", minicpm = "minicpm", minicpm3 = "minicpm3", gemma = "gemma", gemma2 = "gemma2", starcoder2 = "starcoder2", mamba = "mamba", xverse = "xverse", commandR = "command-r", dbrx = "dbrx", olmo = "olmo", olmo2 = "olmo2", olmoe = "olmoe", openelm = "openelm", arctic = "arctic", deepseek2 = "deepseek2", chatglm = "chatglm", bitnet = "bitnet", t5 = "t5", t5encoder = "t5encoder", jais = "jais", nemotron = "nemotron", exaone = "exaone", rwkv6 = "rwkv6", granite = "granite", granitemoe = "granitemoe", chameleon = "chameleon", unknown = "(unknown)" } export type GgufMetadata<A extends GgufArchitectureType = GgufArchitectureType> = { readonly general: GgufMetadataGeneral<A>; readonly tokenizer: GgufMetadataTokenizer; } & (GgufArchitectureType extends A ? { readonly [key in GgufArchitectureType]?: key extends keyof GgufMetadataLlmToType ? GgufMetadataLlmToType[key] : GgufMetadataDefaultArchitectureType; } : { readonly [key in A]: key extends keyof GgufMetadataLlmToType ? GgufMetadataLlmToType[key] : GgufMetadataDefaultArchitectureType; }); export type GgufMetadataLlmToType = { [GgufArchitectureType.llama]: GgufMetadataLlmLLaMA; [GgufArchitectureType.mpt]: GgufMetadataMPT; [GgufArchitectureType.gptneox]: GgufMetadataGPTNeoX; [GgufArchitectureType.gptj]: GgufMetadataGPTJ; [GgufArchitectureType.gpt2]: GgufMetadataGPT2; [GgufArchitectureType.bloom]: GgufMetadataBloom; [GgufArchitectureType.falcon]: GgufMetadataFalcon; [GgufArchitectureType.mamba]: GgufMetadataMamba; }; export declare enum GgufFileType { ALL_F32 = 0, MOSTLY_F16 = 1, MOSTLY_Q4_0 = 2, MOSTLY_Q4_1 = 3, MOSTLY_Q4_1_SOME_F16 = 4,// deprecated MOSTLY_Q4_2 = 5,// deprecated MOSTLY_Q4_3 = 6,// deprecated MOSTLY_Q8_0 = 7, MOSTLY_Q5_0 = 8, MOSTLY_Q5_1 = 9, MOSTLY_Q2_K = 10, MOSTLY_Q3_K_S = 11, MOSTLY_Q3_K_M = 12, MOSTLY_Q3_K_L = 13, MOSTLY_Q4_K_S = 14, MOSTLY_Q4_K_M = 15, MOSTLY_Q5_K_S = 16, MOSTLY_Q5_K_M = 17, MOSTLY_Q6_K = 18, MOSTLY_IQ2_XXS = 19, MOSTLY_IQ2_XS = 20, MOSTLY_Q2_K_S = 21, MOSTLY_IQ3_XS = 22, MOSTLY_IQ3_XXS = 23, MOSTLY_IQ1_S = 24, MOSTLY_IQ4_NL = 25, MOSTLY_IQ3_S = 26, MOSTLY_IQ3_M = 27, MOSTLY_IQ2_S = 28, MOSTLY_IQ2_M = 29, MOSTLY_IQ4_XS = 30, MOSTLY_IQ1_M = 31, MOSTLY_BF16 = 32, MOSTLY_Q4_0_4_4 = 33,// deprecated MOSTLY_Q4_0_4_8 = 34,// deprecated MOSTLY_Q4_0_8_8 = 35,// deprecated MOSTLY_TQ1_0 = 36,// deprecated MOSTLY_TQ2_0 = 37 } export type GgufMetadataGeneral<A extends GgufArchitectureType = GgufArchitectureType> = { readonly architecture: A; /** * The version of the quantization format. Not required if the model is not * quantized (i.e. no tensors are quantized). If any tensors are quantized, * this must be present. This is separate to the quantization scheme of the * tensors itself; the quantization version may change without changing the * scheme's name (e.g. the quantization scheme is Q5_K, and the quantization * version is 4). */ readonly quantization_version: string; /** * the global alignment to use, as described above. This can vary to allow * for different alignment schemes, but it must be a multiple of 8. Some * writers may not write the alignment. If the alignment is not specified, * assume it is `32`. */ readonly alignment?: number; /** * The name of the model. This should be a human-readable name that can be * used to identify the model. It should be unique within the community * that the model is defined in. */ readonly name?: string; readonly basename?: string; readonly size_label?: string; readonly author?: string; /** * URL to the model's homepage. This can be a GitHub repo, a paper, etc. */ readonly url?: string; /** * free-form description of the model including anything that isn't * covered by the other fields */ readonly description?: string; /** * License of the model, expressed as a SPDX license expression * (e.g. `MIT OR Apache-2.0`). *Should not* include any other information, * such as the license text or the URL to the license. */ readonly license?: string; readonly "license.name"?: string; readonly "license.link"?: string; /** * Information about where this model came from. This is useful for tracking * the provenance of the model, and for finding the original source if the * model is modified. For a model that was converted from GGML, for * example, these keys would point to the model that was converted from. */ readonly source?: { /** * URL to the source of the model. Can be a GitHub repo, a paper, etc. */ readonly url?: string; readonly huggingface?: { readonly repository?: string; }; }; /** * An enumerated value describing the type of the majority of the tensors * in the file. Optional; can be inferred from the tensor types. */ readonly file_type?: GgufFileType | undefined; readonly base_model?: { readonly count: number; readonly [key: `${bigint}`]: { readonly name?: string; readonly author?: string; readonly version?: string; readonly organization?: string; readonly url?: string; readonly doi?: string; readonly uuid?: string; readonly repo_url?: string; }; }; }; export declare const enum GgufMetadataTokenizerTokenType { undefined = 0, normal = 1, unknown = 2, control = 3, userDefined = 4, unused = 5, byte = 6 } export type GgufMetadataTokenizer = { readonly ggml: { readonly model: "no_vocab" | "llama" | "gpt2" | "bert" | string; readonly pre?: "default" | "llama3" | "llama-v3" | "llama-bpe" | "deepseek-llm" | "deepseek-coder" | "falcon" | "falcon3" | "mpt" | "starcoder" | "gpt-2" | "phi-2" | "jina-es" | "jina-de" | "jina-v1-en" | "jina-v2-es" | "jina-v2-de" | "jina-v2-code" | "refact" | "command-r" | "qwen2" | "stablelm2" | "olmo" | "dbrx" | "smaug-bpe" | "poro-chat" | "chatglm-bpe" | "viking" | "jais" | "tekken" | "smollm" | "codeshell" | "bloom" | "gpt3-finnish" | "exaone" | "chameleon" | "minerva-7b" | "megrez" | "gpt-4o" | string; readonly tokens: readonly string[]; readonly token_type: GgufMetadataTokenizerTokenType[]; readonly token_type_count?: number; readonly scores?: readonly number[]; readonly merges?: readonly string[]; readonly bos_token_id?: number; readonly eos_token_id?: number; readonly eot_token_id?: number; readonly eom_token_id?: number; readonly unknown_token_id?: number; readonly separator_token_id?: number; readonly padding_token_id?: number; readonly cls_token_id?: number; readonly mask_token_id?: number; readonly add_bos_token?: boolean; readonly add_eos_token?: boolean; readonly add_space_prefix?: boolean; readonly added_tokens?: readonly string[]; readonly fim_pre_token_id?: number; readonly fim_suf_token_id?: number; readonly fim_mid_token_id?: number; readonly fim_pad_token_id?: number; readonly fim_rep_token_id?: number; readonly fim_sep_token_id?: number; /** @deprecated */ readonly prefix_token_id?: number; /** @deprecated */ readonly suffix_token_id?: number; /** @deprecated */ readonly middle_token_id?: number; }; readonly huggingface?: { readonly json?: string; }; readonly chat_template?: string; }; export declare const enum GgufMetadataArchitecturePoolingType { unspecified = -1, none = 0, mean = 1, cls = 2, last = 3, rank = 4 } export type GgufMetadataDefaultArchitectureType = { readonly vocab_size?: number; readonly context_length?: number; readonly embedding_length?: number; readonly block_count?: number; readonly feed_forward_length?: number; readonly use_parallel_residual?: boolean; readonly tensor_data_layout?: string; readonly expert_count?: number; readonly expert_used_count?: number; readonly pooling_type?: GgufMetadataArchitecturePoolingType; readonly logit_scale?: number; readonly attention?: { readonly head_count?: number; readonly head_count_kv?: number; readonly max_alibi_bias?: number; readonly clamp_kqv?: number; readonly layer_norm_epsilon?: number; readonly layer_norm_rms_epsilon?: number; readonly key_length?: number; readonly value_length?: number; readonly causal?: boolean; }; readonly rope?: { readonly dimension_count?: number; readonly freq_base?: number; readonly scale_linear?: number; readonly scaling?: { readonly type?: "none" | "linear" | "yarn" | string; readonly factor?: number; readonly original_context_length?: number; readonly finetuned?: boolean; }; }; readonly ssm?: { readonly conv_kernel?: number; readonly inner_size?: number; readonly state_size?: number; readonly time_step_rank?: number; }; }; export type GgufMetadataLlmLLaMA = { readonly context_length: number; readonly embedding_length: number; readonly block_count: number; readonly feed_forward_length: number; readonly attention: { readonly head_count: number; readonly layer_norm_rms_epsilon: number; readonly head_count_kv?: number; }; readonly rope: { readonly dimension_count: number; readonly scale?: number; }; readonly expert_count?: number; readonly expert_used_count?: number; readonly tensor_data_layout?: string; }; export type GgufMetadataMPT = { readonly context_length: number; readonly embedding_length: number; readonly block_count: number; readonly attention: { readonly head_count: number; readonly alibi_bias_max: number; readonly clip_kqv: number; readonly layer_norm_epsilon: number; }; }; export type GgufMetadataGPTNeoX = { readonly context_length: number; readonly embedding_length: number; readonly block_count: number; readonly use_parallel_residual: boolean; readonly rope: { readonly dimension_count: number; readonly scale?: number; }; readonly attention: { readonly head_count: number; readonly layer_norm_epsilon: number; }; }; export type GgufMetadataGPTJ = { readonly context_length: number; readonly embedding_length: number; readonly block_count: number; readonly rope: { readonly dimension_count: number; readonly scale?: number; }; readonly attention: { readonly head_count: number; readonly layer_norm_epsilon: number; }; }; export type GgufMetadataGPT2 = { readonly context_length: number; readonly embedding_length: number; readonly block_count: number; readonly attention: { readonly head_count: number; readonly layer_norm_epsilon: number; }; }; export type GgufMetadataBloom = { readonly context_length: number; readonly embedding_length: number; readonly block_count: number; readonly feed_forward_length: number; readonly attention: { readonly head_count: number; readonly layer_norm_epsilon: number; }; }; export type GgufMetadataFalcon = { readonly context_length: number; readonly embedding_length: number; readonly block_count: number; readonly attention: { readonly head_count: number; readonly head_count_kv: number; readonly use_norm: boolean; readonly layer_norm_epsilon: number; }; readonly tensor_data_layout?: string; }; export type GgufMetadataMamba = { readonly context_length: number; readonly embedding_length: number; readonly block_count: number; readonly ssm: { readonly conv_kernel: number; readonly inner_size: number; readonly state_size: number; readonly time_step_rank: number; }; readonly attention: { readonly layer_norm_rms_epsilon: number; }; }; export declare function isGgufMetadataOfArchitectureType<A extends GgufArchitectureType>(metadata: GgufMetadata, type: A): metadata is GgufMetadata<A>;