node-llama-cpp
Version:
Run AI models locally on your machine with node.js bindings for llama.cpp. Enforce a JSON schema on the model output on the generation level
77 lines (76 loc) • 3.41 kB
TypeScript
import { EventRelay } from "lifecycle-utils";
import { Token } from "../types.js";
import { LlamaText } from "../utils/LlamaText.js";
import type { LlamaModel } from "./LlamaModel/LlamaModel.js";
export type LlamaRankingContextOptions = {
/**
* The number of tokens the model can see at once.
* - **`"auto"`** - adapt to the current VRAM state and attemp to set the context size as high as possible up to the size
* the model was trained on.
* - **`number`** - set the context size to a specific number of tokens.
* If there's not enough VRAM, an error will be thrown.
* Use with caution.
* - **`{min?: number, max?: number}`** - adapt to the current VRAM state and attemp to set the context size as high as possible
* up to the size the model was trained on, but at least `min` and at most `max`.
*
* Defaults to `"auto"`.
*/
contextSize?: "auto" | number | {
min?: number;
max?: number;
};
/** prompt processing batch size */
batchSize?: number;
/**
* number of threads to use to evaluate tokens.
* set to 0 to use the maximum threads supported by the current machine hardware
*/
threads?: number;
/** An abort signal to abort the context creation */
createSignal?: AbortSignal;
/**
* Ignore insufficient memory errors and continue with the context creation.
* Can cause the process to crash if there's not enough VRAM for the new context.
*
* Defaults to `false`.
*/
ignoreMemorySafetyChecks?: boolean;
};
/**
* @see [Reranking Documents](https://node-llama-cpp.withcat.ai/guide/embedding#reranking) tutorial
*/
export declare class LlamaRankingContext {
readonly onDispose: EventRelay<void>;
private constructor();
/**
* Get the ranking score for a document for a query.
*
* A ranking score is a number between 0 and 1 representing the probability that the document is relevant to the query.
* @returns a ranking score between 0 and 1 representing the probability that the document is relevant to the query.
*/
rank(query: Token[] | string | LlamaText, document: Token[] | string | LlamaText): Promise<number>;
/**
* Get the ranking scores for all the given documents for a query.
*
* A ranking score is a number between 0 and 1 representing the probability that the document is relevant to the query.
* @returns an array of ranking scores between 0 and 1 representing the probability that the document is relevant to the query.
*/
rankAll(query: Token[] | string | LlamaText, documents: Array<Token[] | string | LlamaText>): Promise<number[]>;
/**
* Get the ranking scores for all the given documents for a query and sort them by score from highest to lowest.
*
* A ranking score is a number between 0 and 1 representing the probability that the document is relevant to the query.
*/
rankAndSort<const T extends string>(query: Token[] | string | LlamaText, documents: T[]): Promise<Array<{
document: T;
/**
* A ranking score is a number between 0 and 1 representing the probability that the document is relevant to the query.
*/
score: number;
}>>;
dispose(): Promise<void>;
/** @hidden */
[Symbol.asyncDispose](): Promise<void>;
get disposed(): boolean;
get model(): LlamaModel;
}