UNPKG

node-llama-cpp

Version:

Run AI models locally on your machine with node.js bindings for llama.cpp. Enforce a JSON schema on the model output on the generation level

41 lines (40 loc) 1.8 kB
import type { LLamaChatCompletePromptOptions } from "../LlamaChatSession.js"; export type LLamaChatPromptCompletionEngineOptions = { /** * Max tokens to allow for preloading a prompt and generating a completion for it. * * Defaults to `256` or half of the context size, whichever is smaller. */ maxPreloadTokens?: number; onGeneration?(prompt: string, completion: string): void; /** * Max number of completions to cache. * * Defaults to `100`. */ maxCachedCompletions?: number; temperature?: LLamaChatCompletePromptOptions["temperature"]; minP?: LLamaChatCompletePromptOptions["minP"]; topK?: LLamaChatCompletePromptOptions["topK"]; topP?: LLamaChatCompletePromptOptions["topP"]; seed?: LLamaChatCompletePromptOptions["seed"]; trimWhitespaceSuffix?: LLamaChatCompletePromptOptions["trimWhitespaceSuffix"]; evaluationPriority?: LLamaChatCompletePromptOptions["evaluationPriority"]; repeatPenalty?: LLamaChatCompletePromptOptions["repeatPenalty"]; tokenBias?: LLamaChatCompletePromptOptions["tokenBias"]; customStopTriggers?: LLamaChatCompletePromptOptions["customStopTriggers"]; grammar?: LLamaChatCompletePromptOptions["grammar"]; functions?: LLamaChatCompletePromptOptions["functions"]; documentFunctionParams?: LLamaChatCompletePromptOptions["documentFunctionParams"]; }; export declare class LlamaChatSessionPromptCompletionEngine { private constructor(); dispose(): void; /** * Get completion for the prompt from the cache, * and begin preloading this prompt into the context sequence and completing it. * * On completion progress, `onGeneration` (configured for this engine instance) will be called. */ complete(prompt: string): string; }