node-llama-cpp

Version:

Run AI models locally on your machine with node.js bindings for llama.cpp. Enforce a JSON schema on the model output on the generation level

node-llama-cpp.withcat.ai

withcatai/node-llama-cpp

56 lines (55 loc) • 2.39 kB

TypeScript

View Raw

import { Token } from "../../types.js"; import { SequenceEvaluateOptions } from "./types.js"; import { LlamaContextSequence } from "./LlamaContext.js"; /** * @see [Using Token Predictors](https://node-llama-cpp.withcat.ai/guide/token-prediction#custom) */ export declare abstract class TokenPredictor { /** * Resets the state of the predictor. * * Called before the generation starts. */ abstract reset(params: { /** The target sequence that this token predictor is generating tokens for */ targetSequence: LlamaContextSequence; /** * The tokens that are or will be loaded into the state. * * The initial predictions should be based on these tokens. * * When additional tokens are pushed into the state, the `pushTokens` method will be called with those tokens. */ stateTokens: Token[]; /** * Options used for the evaluation on the target sequence. * * The `grammarEvaluationState` is cloned before being passed to the token predictor, * so it can be modified without affecting the original state. */ evaluateOptions: Readonly<SequenceEvaluateOptions>; }): Promise<void> | void; abstract pushTokens(tokens: Token[]): void; /** * Predicts the next tokens based on the current state. * * If the generation should wait until the minimum predications are ready, * this method should return a promise that resolves when the minimum predictions are ready. * * A background prediction process can be started when this function is called, * so that the next predictions will be ready when this function is called again. */ abstract predictTokens(): Promise<Token[]> | Token[]; /** * Stops the prediction process when it runs in the background. * @param untilPredictionsExhausted - If true, the prediction process should not resume until the current predictions are exhausted. */ stop(untilPredictionsExhausted?: boolean): Promise<void> | void; /** * Called with the input tokens before the generation starts when using `LlamaChatSession`, `LlamaChat`, and `LlamaCompletion`. */ updateInputTokens(tokens: Token[]): void; dispose(): Promise<void> | void; /** @hidden */ [Symbol.dispose](): void | Promise<void>; }