inference-server
Version:
Libraries and server to build AI applications. Adapters to various native bindings allowing local inference. Integrate it with your application, or use as a microservice.
61 lines (60 loc) • 1.89 kB
TypeScript
import PQueue from 'p-queue';
import { FileDownloadProgress, ModelConfig, ModelEngine } from './types/index.js';
import { Logger, LogLevel } from './lib/logger.js';
export interface StoredModel extends ModelConfig {
meta?: unknown;
downloads?: Map<string, DownloadTracker>;
status: 'unloaded' | 'preparing' | 'ready' | 'error';
}
export interface ModelStoreOptions {
modelsCachePath: string;
models: Record<string, ModelConfig>;
prepareConcurrency?: number;
log?: Logger | LogLevel;
}
export declare class ModelStore {
prepareQueue: PQueue;
models: Record<string, StoredModel>;
engines?: Record<string, ModelEngine>;
private prepareController;
private modelsCachePath;
private log;
constructor(options: ModelStoreOptions);
init(engines: Record<string, ModelEngine>): Promise<void>;
dispose(): void;
private onDownloadProgress;
prepareModel(modelId: string, signal?: AbortSignal): Promise<void>;
getStatus(): {
[k: string]: {
engine: (string & {}) | import("./engines").BuiltInEngineName;
device: {
gpu?: boolean | "auto" | (string & {});
} | undefined;
minInstances: number;
maxInstances: number;
status: "error" | "unloaded" | "preparing" | "ready";
downloads: any;
};
};
}
type ProgressState = {
loadedBytes: number;
totalBytes: number;
timestamp: number;
};
type DownloadStatus = {
percent: number;
speed: number;
etaSeconds: number;
loadedBytes: number;
totalBytes: number;
};
declare class DownloadTracker {
progressBuffer: ProgressState[];
private timeWindow;
constructor(timeWindow?: number);
pushProgress({ loadedBytes, totalBytes }: FileDownloadProgress): void;
private cleanup;
getStatus(): DownloadStatus | null;
}
export {};