inference-server
Version:
Libraries and server to build AI applications. Adapters to various native bindings allowing local inference. Integrate it with your application, or use as a microservice.
64 lines (63 loc) • 3.07 kB
TypeScript
import { ModelEngine, ModelConfig, ModelInstanceRequest, ChatCompletionTaskResult, TextCompletionTaskResult, ChatCompletionTaskArgs, TextCompletionTaskArgs, EmbeddingTaskArgs, ImageToTextTaskArgs, ImageToImageTaskArgs, SpeechToTextTaskArgs, TextToSpeechTaskArgs, TextToImageTaskArgs, ObjectDetectionTaskArgs, TextClassificationTaskArgs, EmbeddingTaskResult, InferenceTask, ImageToTextTaskResult, ImageToImageTaskResult, SpeechToTextTaskResult, TextToSpeechTaskResult, TextToImageTaskResult, TextClassificationTaskResult, ObjectDetectionTaskResult } from './types/index.js';
import { Logger } from './lib/logger.js';
type ModelInstanceStatus = 'idle' | 'busy' | 'error' | 'loading' | 'preparing';
interface ModelInstanceOptions extends ModelConfig {
log?: Logger;
gpu: boolean;
}
export declare class ModelInstance<TEngineRef = unknown> {
id: string;
status: ModelInstanceStatus;
modelId: string;
config: ModelConfig;
fingerprint: string;
createdAt: Date;
lastUsed: number;
gpu: boolean;
ttl: number;
log: Logger;
private engine;
private engineRef?;
private contextIdentity?;
private needsContextReset;
private currentRequest?;
private shutdownController;
constructor(engine: ModelEngine, { log, gpu, ...options }: ModelInstanceOptions);
private generateInstanceId;
private generateTaskId;
getEngineRef(): unknown;
load(signal?: AbortSignal): Promise<void>;
dispose(): Promise<void>;
lock(request: ModelInstanceRequest): void;
unlock(): void;
resetContext(): void;
getContextStateIdentity(): string | undefined;
hasContextState(): boolean;
matchesContextState(request: ModelInstanceRequest): boolean;
matchesRequirements(request: ModelInstanceRequest): boolean;
private createTaskController;
processChatCompletionTask(args: ChatCompletionTaskArgs): {
id: string;
model: string;
createdAt: Date;
result: Promise<ChatCompletionTaskResult>;
cancel: () => void;
};
processTextCompletionTask(args: TextCompletionTaskArgs): {
id: string;
model: string;
createdAt: Date;
cancel: () => void;
result: Promise<TextCompletionTaskResult>;
};
private processTask;
processEmbeddingTask(args: EmbeddingTaskArgs): InferenceTask<EmbeddingTaskResult>;
processImageToTextTask(args: ImageToTextTaskArgs): InferenceTask<ImageToTextTaskResult>;
processImageToImageTask(args: ImageToImageTaskArgs): InferenceTask<ImageToImageTaskResult>;
processSpeechToTextTask(args: SpeechToTextTaskArgs): InferenceTask<SpeechToTextTaskResult>;
processTextToSpeechTask(args: TextToSpeechTaskArgs): InferenceTask<TextToSpeechTaskResult>;
processTextToImageTask(args: TextToImageTaskArgs): InferenceTask<TextToImageTaskResult>;
processTextClassificationTask(args: TextClassificationTaskArgs): InferenceTask<TextClassificationTaskResult>;
processObjectDetectionTask(args: ObjectDetectionTaskArgs): InferenceTask<ObjectDetectionTaskResult>;
}
export {};