UNPKG

sandai-react

Version:

React components and utilities for the Sandai 3D AI Characters.

89 lines 3.81 kB
import { SentenceSplitter } from "../misc/Misc"; import { DTYPE } from "./types/qantization-types"; import { GenerateOptions, MessageStatus, Message } from "./types/types"; /** * High-level client wrapper for the LLM web worker. * * Features: * - Initialization with progress handled by the worker. * - Text generation with streaming and per-request callbacks. * - Status tracking: each request ID is recorded as `pending | streaming | completed | error | cancelled`. * - Auto-cancel previous in-flight request before starting a new one. * - Streams **only complete sentences** to onDelta. */ export declare class LlmClient { private _mediapipeClient; private _currentPipeline; private _worker; private _syncWorker; private _sentenceSplitter; private _silencedCancels; private _cancelledIds; /** * In-flight request state. * - streamBuffer: raw text accumulated from worker deltas (or final text on 'response') * - emittedCount: number of complete sentences already forwarded to onDelta */ private _pending; messageStatus: Record<string, { id: string; status: MessageStatus; error?: string; }>; private _activeId; static readonly builtinModels: { readonly "gemma3-1b": { readonly model: "gemma3-1b"; readonly dtype: "int8"; readonly pipeline: "text"; }; readonly "gemma3-270M": { readonly model: "gemma-3-270m-it-ONNX"; readonly dtype: "fp16"; readonly pipeline: "text"; }; readonly "Qwen2.5-0.5B-Instruct": { readonly model: "Qwen2.5-0.5B-Instruct"; readonly dtype: "q4"; readonly pipeline: "text"; }; readonly "mediapipe-gemma3n-E4B-it": { readonly model: "/aic-runtime-deps/llm-deps/dist-mediapipe-genai/gemma-3n-E4B-it-int4-Web.litertlm"; readonly dtype: "int4"; readonly pipeline: "mediapipe"; }; }; static readonly defaultModelProvider: { readonly model: "gemma-3-270m-it-ONNX"; readonly dtype: "fp16"; readonly pipeline: "text"; }; constructor(workerPath?: string, sync?: boolean, sentenceSplitter?: SentenceSplitter); /** * Try to emit any newly-completed sentences from entry.streamBuffer. * Updates entry.emittedCount and calls onDelta with the new text (joined). */ private _emitCompletedIfAny; private _handleMessage; /** Initialize a given model * * @param modelPath - one of the predefined models or a huggingface id to an llm, * like onnx-community/gemma-3-270m-it-ONNX. Some hf ones work, some dont. * @param dtype - The quantization level like int8 or q4f16. * @param pipeline - different models need different pipelines. For example, and in theory, the * gemma3n models require the AutoProcessor and the ImageTextToText transformersjs onnx engine, * which I called "auto" here. The normal Gemma 3 onnx models use the "text" pipeline. The Gemma3n * onnx models don't work with transformersjs onnx backend yet (if ever, it's a constant struggle with that thing), * so use the mediapipe litertlm one instead. * @returns */ initialize(modelPath: (typeof LlmClient)["builtinModels"][keyof (typeof LlmClient)["builtinModels"]]["model"] | string, dtype?: DTYPE, pipeline?: "text" | "auto" | "mediapipe"): Promise<{ type: "response" | "error"; data: string; }>; cancel(requestId: string): Promise<void>; generateResponse(messages: Message[], maxTokens?: number, options?: GenerateOptions): Promise<string>; terminate(): void; getStatus(id: string): MessageStatus | undefined; } //# sourceMappingURL=llm-client.d.ts.map