sandai-react
Version:
React components and utilities for the Sandai 3D AI Characters.
89 lines • 3.81 kB
TypeScript
import { SentenceSplitter } from "../misc/Misc";
import { DTYPE } from "./types/qantization-types";
import { GenerateOptions, MessageStatus, Message } from "./types/types";
/**
* High-level client wrapper for the LLM web worker.
*
* Features:
* - Initialization with progress handled by the worker.
* - Text generation with streaming and per-request callbacks.
* - Status tracking: each request ID is recorded as `pending | streaming | completed | error | cancelled`.
* - Auto-cancel previous in-flight request before starting a new one.
* - Streams **only complete sentences** to onDelta.
*/
export declare class LlmClient {
private _mediapipeClient;
private _currentPipeline;
private _worker;
private _syncWorker;
private _sentenceSplitter;
private _silencedCancels;
private _cancelledIds;
/**
* In-flight request state.
* - streamBuffer: raw text accumulated from worker deltas (or final text on 'response')
* - emittedCount: number of complete sentences already forwarded to onDelta
*/
private _pending;
messageStatus: Record<string, {
id: string;
status: MessageStatus;
error?: string;
}>;
private _activeId;
static readonly builtinModels: {
readonly "gemma3-1b": {
readonly model: "gemma3-1b";
readonly dtype: "int8";
readonly pipeline: "text";
};
readonly "gemma3-270M": {
readonly model: "gemma-3-270m-it-ONNX";
readonly dtype: "fp16";
readonly pipeline: "text";
};
readonly "Qwen2.5-0.5B-Instruct": {
readonly model: "Qwen2.5-0.5B-Instruct";
readonly dtype: "q4";
readonly pipeline: "text";
};
readonly "mediapipe-gemma3n-E4B-it": {
readonly model: "/aic-runtime-deps/llm-deps/dist-mediapipe-genai/gemma-3n-E4B-it-int4-Web.litertlm";
readonly dtype: "int4";
readonly pipeline: "mediapipe";
};
};
static readonly defaultModelProvider: {
readonly model: "gemma-3-270m-it-ONNX";
readonly dtype: "fp16";
readonly pipeline: "text";
};
constructor(workerPath?: string, sync?: boolean, sentenceSplitter?: SentenceSplitter);
/**
* Try to emit any newly-completed sentences from entry.streamBuffer.
* Updates entry.emittedCount and calls onDelta with the new text (joined).
*/
private _emitCompletedIfAny;
private _handleMessage;
/** Initialize a given model
*
* @param modelPath - one of the predefined models or a huggingface id to an llm,
* like onnx-community/gemma-3-270m-it-ONNX. Some hf ones work, some dont.
* @param dtype - The quantization level like int8 or q4f16.
* @param pipeline - different models need different pipelines. For example, and in theory, the
* gemma3n models require the AutoProcessor and the ImageTextToText transformersjs onnx engine,
* which I called "auto" here. The normal Gemma 3 onnx models use the "text" pipeline. The Gemma3n
* onnx models don't work with transformersjs onnx backend yet (if ever, it's a constant struggle with that thing),
* so use the mediapipe litertlm one instead.
* @returns
*/
initialize(modelPath: (typeof LlmClient)["builtinModels"][keyof (typeof LlmClient)["builtinModels"]]["model"] | string, dtype?: DTYPE, pipeline?: "text" | "auto" | "mediapipe"): Promise<{
type: "response" | "error";
data: string;
}>;
cancel(requestId: string): Promise<void>;
generateResponse(messages: Message[], maxTokens?: number, options?: GenerateOptions): Promise<string>;
terminate(): void;
getStatus(id: string): MessageStatus | undefined;
}
//# sourceMappingURL=llm-client.d.ts.map