ai-utils.js
Version:
Build AI applications, chatbots, and agents with JavaScript and TypeScript.
400 lines (399 loc) • 13.7 kB
TypeScript
import z from "zod";
import { AbstractModel } from "../../model-function/AbstractModel.js";
import { FunctionOptions } from "../../model-function/FunctionOptions.js";
import { DeltaEvent } from "../../model-function/generate-text/DeltaEvent.js";
import { TextGenerationModel, TextGenerationModelSettings } from "../../model-function/generate-text/TextGenerationModel.js";
import { PromptMapping } from "../../prompt/PromptMapping.js";
import { PromptMappingTextGenerationModel } from "../../prompt/PromptMappingTextGenerationModel.js";
import { RetryFunction } from "../../util/api/RetryFunction.js";
import { ThrottleFunction } from "../../util/api/ThrottleFunction.js";
import { ResponseHandler } from "../../util/api/postToApi.js";
import { LlamaCppTokenizer } from "./LlamaCppTokenizer.js";
export interface LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE extends number | undefined> extends TextGenerationModelSettings {
baseUrl?: string;
retry?: RetryFunction;
throttle?: ThrottleFunction;
tokenizerSettings?: {
retry?: RetryFunction;
throttle?: ThrottleFunction;
};
/**
* Specify the context window size of the model that you have loaded in your
* Llama.cpp server.
*/
contextWindowSize?: CONTEXT_WINDOW_SIZE;
temperature?: number;
topK?: number;
topP?: number;
nPredict?: number;
nKeep?: number;
stop?: string[];
tfsZ?: number;
typicalP?: number;
repeatPenalty?: number;
repeatLastN?: number;
penalizeNl?: boolean;
mirostat?: number;
mirostatTau?: number;
mirostatEta?: number;
seed?: number;
ignoreEos?: boolean;
logitBias?: Array<[number, number | false]>;
}
export declare class LlamaCppTextGenerationModel<CONTEXT_WINDOW_SIZE extends number | undefined> extends AbstractModel<LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>> implements TextGenerationModel<string, LlamaCppTextGenerationResponse, LlamaCppTextGenerationDelta, LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>> {
constructor(settings?: LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>);
readonly provider = "llamacpp";
get modelName(): null;
get contextWindowSize(): CONTEXT_WINDOW_SIZE;
readonly tokenizer: LlamaCppTokenizer;
callAPI<RESPONSE>(prompt: string, options: {
responseFormat: LlamaCppTextGenerationResponseFormatType<RESPONSE>;
} & FunctionOptions<LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>>): Promise<RESPONSE>;
countPromptTokens(prompt: string): Promise<number>;
generateTextResponse(prompt: string, options?: FunctionOptions<LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>>): Promise<{
model: string;
prompt: string;
content: string;
stop: true;
generation_settings: {
model: string;
stream: boolean;
seed: number;
stop: string[];
mirostat: number;
frequency_penalty: number;
ignore_eos: boolean;
logit_bias: number[];
mirostat_eta: number;
mirostat_tau: number;
n_ctx: number;
n_keep: number;
n_predict: number;
n_probs: number;
penalize_nl: boolean;
presence_penalty: number;
repeat_last_n: number;
repeat_penalty: number;
temp: number;
tfs_z: number;
top_k: number;
top_p: number;
typical_p: number;
};
stopped_eos: boolean;
stopped_limit: boolean;
stopped_word: boolean;
stopping_word: string;
timings: {
predicted_ms: number;
predicted_n: number;
predicted_per_second: number | null;
predicted_per_token_ms: number | null;
prompt_ms: number | null;
prompt_n: number;
prompt_per_second: number | null;
prompt_per_token_ms: number | null;
};
tokens_cached: number;
tokens_evaluated: number;
tokens_predicted: number;
truncated: boolean;
}>;
extractText(response: LlamaCppTextGenerationResponse): string;
generateDeltaStreamResponse(prompt: string, options?: FunctionOptions<LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>>): Promise<AsyncIterable<DeltaEvent<LlamaCppTextGenerationDelta>>>;
extractTextDelta(fullDelta: LlamaCppTextGenerationDelta): string | undefined;
mapPrompt<INPUT_PROMPT>(promptMapping: PromptMapping<INPUT_PROMPT, string>): PromptMappingTextGenerationModel<INPUT_PROMPT, string, LlamaCppTextGenerationResponse, LlamaCppTextGenerationDelta, LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>, this>;
withSettings(additionalSettings: Partial<LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>>): this;
get maxCompletionTokens(): number | undefined;
withMaxCompletionTokens(maxCompletionTokens: number): this;
withStopTokens(stopTokens: string[]): this;
}
declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
content: z.ZodString;
stop: z.ZodLiteral<true>;
generation_settings: z.ZodObject<{
frequency_penalty: z.ZodNumber;
ignore_eos: z.ZodBoolean;
logit_bias: z.ZodArray<z.ZodNumber, "many">;
mirostat: z.ZodNumber;
mirostat_eta: z.ZodNumber;
mirostat_tau: z.ZodNumber;
model: z.ZodString;
n_ctx: z.ZodNumber;
n_keep: z.ZodNumber;
n_predict: z.ZodNumber;
n_probs: z.ZodNumber;
penalize_nl: z.ZodBoolean;
presence_penalty: z.ZodNumber;
repeat_last_n: z.ZodNumber;
repeat_penalty: z.ZodNumber;
seed: z.ZodNumber;
stop: z.ZodArray<z.ZodString, "many">;
stream: z.ZodBoolean;
temp: z.ZodNumber;
tfs_z: z.ZodNumber;
top_k: z.ZodNumber;
top_p: z.ZodNumber;
typical_p: z.ZodNumber;
}, "strip", z.ZodTypeAny, {
model: string;
stream: boolean;
seed: number;
stop: string[];
mirostat: number;
frequency_penalty: number;
ignore_eos: boolean;
logit_bias: number[];
mirostat_eta: number;
mirostat_tau: number;
n_ctx: number;
n_keep: number;
n_predict: number;
n_probs: number;
penalize_nl: boolean;
presence_penalty: number;
repeat_last_n: number;
repeat_penalty: number;
temp: number;
tfs_z: number;
top_k: number;
top_p: number;
typical_p: number;
}, {
model: string;
stream: boolean;
seed: number;
stop: string[];
mirostat: number;
frequency_penalty: number;
ignore_eos: boolean;
logit_bias: number[];
mirostat_eta: number;
mirostat_tau: number;
n_ctx: number;
n_keep: number;
n_predict: number;
n_probs: number;
penalize_nl: boolean;
presence_penalty: number;
repeat_last_n: number;
repeat_penalty: number;
temp: number;
tfs_z: number;
top_k: number;
top_p: number;
typical_p: number;
}>;
model: z.ZodString;
prompt: z.ZodString;
stopped_eos: z.ZodBoolean;
stopped_limit: z.ZodBoolean;
stopped_word: z.ZodBoolean;
stopping_word: z.ZodString;
timings: z.ZodObject<{
predicted_ms: z.ZodNumber;
predicted_n: z.ZodNumber;
predicted_per_second: z.ZodNullable<z.ZodNumber>;
predicted_per_token_ms: z.ZodNullable<z.ZodNumber>;
prompt_ms: z.ZodNullable<z.ZodNumber>;
prompt_n: z.ZodNumber;
prompt_per_second: z.ZodNullable<z.ZodNumber>;
prompt_per_token_ms: z.ZodNullable<z.ZodNumber>;
}, "strip", z.ZodTypeAny, {
predicted_ms: number;
predicted_n: number;
predicted_per_second: number | null;
predicted_per_token_ms: number | null;
prompt_ms: number | null;
prompt_n: number;
prompt_per_second: number | null;
prompt_per_token_ms: number | null;
}, {
predicted_ms: number;
predicted_n: number;
predicted_per_second: number | null;
predicted_per_token_ms: number | null;
prompt_ms: number | null;
prompt_n: number;
prompt_per_second: number | null;
prompt_per_token_ms: number | null;
}>;
tokens_cached: z.ZodNumber;
tokens_evaluated: z.ZodNumber;
tokens_predicted: z.ZodNumber;
truncated: z.ZodBoolean;
}, "strip", z.ZodTypeAny, {
model: string;
prompt: string;
content: string;
stop: true;
generation_settings: {
model: string;
stream: boolean;
seed: number;
stop: string[];
mirostat: number;
frequency_penalty: number;
ignore_eos: boolean;
logit_bias: number[];
mirostat_eta: number;
mirostat_tau: number;
n_ctx: number;
n_keep: number;
n_predict: number;
n_probs: number;
penalize_nl: boolean;
presence_penalty: number;
repeat_last_n: number;
repeat_penalty: number;
temp: number;
tfs_z: number;
top_k: number;
top_p: number;
typical_p: number;
};
stopped_eos: boolean;
stopped_limit: boolean;
stopped_word: boolean;
stopping_word: string;
timings: {
predicted_ms: number;
predicted_n: number;
predicted_per_second: number | null;
predicted_per_token_ms: number | null;
prompt_ms: number | null;
prompt_n: number;
prompt_per_second: number | null;
prompt_per_token_ms: number | null;
};
tokens_cached: number;
tokens_evaluated: number;
tokens_predicted: number;
truncated: boolean;
}, {
model: string;
prompt: string;
content: string;
stop: true;
generation_settings: {
model: string;
stream: boolean;
seed: number;
stop: string[];
mirostat: number;
frequency_penalty: number;
ignore_eos: boolean;
logit_bias: number[];
mirostat_eta: number;
mirostat_tau: number;
n_ctx: number;
n_keep: number;
n_predict: number;
n_probs: number;
penalize_nl: boolean;
presence_penalty: number;
repeat_last_n: number;
repeat_penalty: number;
temp: number;
tfs_z: number;
top_k: number;
top_p: number;
typical_p: number;
};
stopped_eos: boolean;
stopped_limit: boolean;
stopped_word: boolean;
stopping_word: string;
timings: {
predicted_ms: number;
predicted_n: number;
predicted_per_second: number | null;
predicted_per_token_ms: number | null;
prompt_ms: number | null;
prompt_n: number;
prompt_per_second: number | null;
prompt_per_token_ms: number | null;
};
tokens_cached: number;
tokens_evaluated: number;
tokens_predicted: number;
truncated: boolean;
}>;
export type LlamaCppTextGenerationResponse = z.infer<typeof llamaCppTextGenerationResponseSchema>;
export type LlamaCppTextGenerationDelta = {
content: string;
isComplete: boolean;
delta: string;
};
export type LlamaCppTextGenerationResponseFormatType<T> = {
stream: boolean;
handler: ResponseHandler<T>;
};
export declare const LlamaCppTextGenerationResponseFormat: {
/**
* Returns the response as a JSON object.
*/
json: {
stream: false;
handler: ResponseHandler<{
model: string;
prompt: string;
content: string;
stop: true;
generation_settings: {
model: string;
stream: boolean;
seed: number;
stop: string[];
mirostat: number;
frequency_penalty: number;
ignore_eos: boolean;
logit_bias: number[];
mirostat_eta: number;
mirostat_tau: number;
n_ctx: number;
n_keep: number;
n_predict: number;
n_probs: number;
penalize_nl: boolean;
presence_penalty: number;
repeat_last_n: number;
repeat_penalty: number;
temp: number;
tfs_z: number;
top_k: number;
top_p: number;
typical_p: number;
};
stopped_eos: boolean;
stopped_limit: boolean;
stopped_word: boolean;
stopping_word: string;
timings: {
predicted_ms: number;
predicted_n: number;
predicted_per_second: number | null;
predicted_per_token_ms: number | null;
prompt_ms: number | null;
prompt_n: number;
prompt_per_second: number | null;
prompt_per_token_ms: number | null;
};
tokens_cached: number;
tokens_evaluated: number;
tokens_predicted: number;
truncated: boolean;
}>;
};
/**
* Returns an async iterable over the full deltas (all choices, including full current state at time of event)
* of the response stream.
*/
deltaIterable: {
stream: true;
handler: ({ response }: {
response: Response;
}) => Promise<AsyncIterable<DeltaEvent<LlamaCppTextGenerationDelta>>>;
};
};
export {};