inference-server
Version:
Libraries and server to build AI applications. Adapters to various native bindings allowing local inference. Integrate it with your application, or use as a microservice.
62 lines (61 loc) • 2.23 kB
TypeScript
import PQueue from 'p-queue';
import EventEmitter3 from 'eventemitter3';
import { ModelInstance } from './instance.js';
import { ModelConfig, InferenceParams, ModelEngine } from './types/index.js';
import { Logger, LogLevel } from './lib/logger.js';
export interface ModelInstanceHandle {
instance: ModelInstance;
release: () => Promise<void>;
}
type PrepareModelInstanceCallback = (instance: ModelInstance, signal?: AbortSignal) => Promise<void>;
interface ModelPoolConfig {
concurrency: number;
models: Record<string, ModelConfig>;
}
export interface ModelPoolOptions {
concurrency?: number;
models: Record<string, ModelConfig>;
log?: Logger | LogLevel;
}
type ModelPoolEvent = 'ready' | 'spawn' | 'release';
export declare class ModelPool extends EventEmitter3<ModelPoolEvent> {
queue: PQueue;
config: ModelPoolConfig;
instances: Record<string, ModelInstance>;
private engines?;
private cleanupInterval?;
private log;
private requestSequence;
private pendingRequests;
private shutdownController;
private gpuLock;
private prepareInstance?;
constructor(options: ModelPoolOptions, prepareInstance?: PrepareModelInstanceCallback);
init(engines: Record<string, ModelEngine>): Promise<void>;
ensureModelInstances(model: ModelConfig): Promise<ModelInstance<unknown>>[];
dispose(): Promise<void>;
private disposeOutdatedInstances;
getStatus(): {
processing: number;
pending: number;
instances: {
[k: string]: {
model: string;
status: "error" | "preparing" | "idle" | "busy" | "loading";
engine: (string & {}) | import("./engines").BuiltInEngineName;
device: string;
contextState: string | undefined;
lastUsed: string;
};
};
};
canSpawnInstance(modelId: string): boolean;
private disposeInstance;
private spawnInstance;
private acquireGpuInstance;
private acquireIdleInstance;
private acquireInstance;
private createRequestSequence;
requestInstance(params: Partial<InferenceParams>, signal?: AbortSignal): Promise<ModelInstanceHandle>;
}
export {};