UNPKG

inference-server

Version:

Libraries and server to build AI applications. Adapters to various native bindings allowing local inference. Integrate it with your application, or use as a microservice.

207 lines (206 loc) 8.54 kB
import type { ModelPool } from '../pool.js'; import type { ModelStore } from '../store.js'; import type { Logger } from '../lib/logger.js'; import type { StableDiffusionSamplingMethod } from '../engines/stable-diffusion-cpp/types.js'; import { AssistantMessage, ChatMessage, CompletionFinishReason, ToolDefinition } from '../types/chat.js'; import { Image, Audio, ModelConfig, TaskArgs, ChatCompletionTaskArgs, TextCompletionTaskArgs, EmbeddingTaskArgs, ImageToTextTaskArgs, SpeechToTextTaskArgs, TextToSpeechTaskArgs, TextToImageTaskArgs, ImageToImageTaskArgs, ObjectDetectionTaskArgs, TextClassificationTaskArgs } from '../types/index.js'; export interface TextCompletionParamsBase { temperature?: number; maxTokens?: number; seed?: number; stop?: string[]; repeatPenalty?: number; repeatPenaltyNum?: number; frequencyPenalty?: number; presencePenalty?: number; grammar?: string; topP?: number; minP?: number; topK?: number; tokenBias?: Record<string, number>; } export interface TextCompletionParams extends TextCompletionParamsBase { model: string; prompt: string; } export interface ChatCompletionParams extends TextCompletionParamsBase { model: string; messages: ChatMessage[]; grammar?: string; tools?: { definitions: Record<string, ToolDefinition>; documentParams?: boolean; maxParallelCalls?: number; }; } export interface TextEmbeddingInput { type: 'text'; content: string; } export interface ImageEmbeddingInput { type: 'image'; content: Image; } export type EmbeddingInput = TextEmbeddingInput | ImageEmbeddingInput | string; export interface EmbeddingParams { model: string; input: EmbeddingInput | EmbeddingInput[]; dimensions?: number; pooling?: 'cls' | 'mean'; } export interface ImageToTextParams { model: string; image: Image; prompt?: string; maxTokens?: number; } export interface StableDiffusionParams { negativePrompt?: string; guidance?: number; styleRatio?: number; strength?: number; sampleSteps?: number; batchCount?: number; samplingMethod?: StableDiffusionSamplingMethod; cfgScale?: number; controlStrength?: number; } export interface TextToImageParams extends StableDiffusionParams { model: string; prompt: string; width?: number; height?: number; seed?: number; } export interface ImageToImageParams extends StableDiffusionParams { model: string; image: Image; prompt: string; width?: number; height?: number; seed?: number; } export interface ObjectDetectionParams { model: string; image: Image; threshold?: number; labels?: string[]; } export interface TextClassificationParams { model: string; input: string | string[]; hypothesisTemplate?: string; threshold?: number; topK?: number; labels?: string[]; } export interface SpeechToTextParams { model: string; audio: Audio; language?: string; prompt?: string; maxTokens?: number; } export interface TextToSpeechParams { model: string; text: string; voice?: string; } export type InferenceParams = TextCompletionParams | ChatCompletionParams | EmbeddingParams | ImageToTextParams | SpeechToTextParams | TextToSpeechParams | TextToImageParams | ImageToImageParams | ObjectDetectionParams; export interface EngineContext<TModelConfig = ModelConfig, TModelMeta = unknown> { config: TModelConfig; meta?: TModelMeta; log: Logger; } export interface FileDownloadProgress { file: string; loadedBytes: number; totalBytes: number; } export interface EngineStartContext { pool: ModelPool; store: ModelStore; } export interface EngineTaskContext<TModelInstance = unknown, TModelConfig = ModelConfig, TModelMeta = unknown> extends EngineContext<TModelConfig, TModelMeta> { instance: TModelInstance; } export interface EngineTextCompletionTaskContext<TModelInstance = unknown, TModelConfig = ModelConfig, TModelMeta = unknown> extends EngineTaskContext<TModelInstance, TModelConfig, TModelMeta> { resetContext?: boolean; } export interface ModelEngine<TModelInstance = unknown, TModelConfig extends ModelConfig = ModelConfig, TModelMeta = unknown> { autoGpu?: boolean; start?: (ctx: EngineStartContext) => Promise<void>; prepareModel: (ctx: EngineContext<TModelConfig, TModelMeta>, onProgress?: (progress: FileDownloadProgress) => void, signal?: AbortSignal) => Promise<TModelMeta>; createInstance: (ctx: EngineContext<TModelConfig, TModelMeta>, signal?: AbortSignal) => Promise<TModelInstance>; disposeInstance: (instance: TModelInstance) => Promise<void>; processChatCompletionTask?: (task: ChatCompletionTaskArgs, ctx: EngineTextCompletionTaskContext<TModelInstance, TModelConfig, TModelMeta>, signal?: AbortSignal) => Promise<ChatCompletionTaskResult>; processTextCompletionTask?: (task: TextCompletionTaskArgs, ctx: EngineTextCompletionTaskContext<TModelInstance, TModelConfig, TModelMeta>, signal?: AbortSignal) => Promise<TextCompletionTaskResult>; processEmbeddingTask?: (task: EmbeddingTaskArgs, ctx: EngineTaskContext<TModelInstance, TModelConfig, TModelMeta>, signal?: AbortSignal) => Promise<EmbeddingTaskResult>; processImageToTextTask?: (task: ImageToTextTaskArgs, ctx: EngineTaskContext<TModelInstance, TModelConfig, TModelMeta>, signal?: AbortSignal) => Promise<ImageToTextTaskResult>; processSpeechToTextTask?: (task: SpeechToTextTaskArgs, ctx: EngineTaskContext<TModelInstance, TModelConfig, TModelMeta>, signal?: AbortSignal) => Promise<SpeechToTextTaskResult>; processTextToSpeechTask?: (task: TextToSpeechTaskArgs, ctx: EngineTaskContext<TModelInstance, TModelConfig, TModelMeta>, signal?: AbortSignal) => Promise<TextToSpeechTaskResult>; processTextToImageTask?: (task: TextToImageTaskArgs, ctx: EngineTaskContext<TModelInstance, TModelConfig, TModelMeta>, signal?: AbortSignal) => Promise<TextToImageTaskResult>; processImageToImageTask?: (task: ImageToImageTaskArgs, ctx: EngineTaskContext<TModelInstance, TModelConfig, TModelMeta>, signal?: AbortSignal) => Promise<ImageToImageTaskResult>; processObjectDetectionTask?: (task: ObjectDetectionTaskArgs, ctx: EngineTaskContext<TModelInstance, TModelConfig, TModelMeta>, signal?: AbortSignal) => Promise<ObjectDetectionTaskResult>; processTextClassificationTask?: (task: TextClassificationTaskArgs, ctx: EngineTaskContext<TModelInstance, TModelConfig, TModelMeta>, signal?: AbortSignal) => Promise<TextClassificationTaskResult>; } export type TaskProcessorName = keyof Omit<ModelEngine, 'createInstance' | 'disposeInstance' | 'prepareModel' | 'start' | 'autoGpu'>; export type TaskProcessor<TModelInstance, TModelConfig extends ModelConfig, TModelMeta> = (task: TaskArgs, ctx: EngineTaskContext<TModelInstance, TModelConfig, TModelMeta>, signal?: AbortSignal) => Promise<TaskResult>; export interface EmbeddingTaskResult { embeddings: Float32Array[]; inputTokens: number; } export interface ChatCompletionTaskResult { message: AssistantMessage; finishReason: CompletionFinishReason; promptTokens: number; completionTokens: number; contextTokens: number; } export interface TextCompletionTaskResult { text: string; finishReason: CompletionFinishReason; promptTokens: number; completionTokens: number; contextTokens: number; } export interface ImageToTextTaskResult { text: string; } export interface TextToImageTaskResult { images: Image[]; seed: number; } export interface ImageToImageTaskResult { images: Image[]; seed: number; } export interface SpeechToTextTaskResult { text: string; } export interface TextToSpeechTaskResult { audio: Audio; } export interface ObjectDetectionResult { label: string; score: number; box: { x: number; y: number; width: number; height: number; }; } export interface ObjectDetectionTaskResult { detections: ObjectDetectionResult[]; } export interface TextClassificationResult { labels: Array<{ name: string; score: number; }>; } export interface TextClassificationTaskResult { classifications: TextClassificationResult[]; } export type TaskResult = ChatCompletionTaskResult | TextCompletionTaskResult | EmbeddingTaskResult | ImageToTextTaskResult | SpeechToTextTaskResult | TextToSpeechTaskResult | TextToImageTaskResult | ImageToImageTaskResult | ObjectDetectionTaskResult | TextClassificationTaskResult;