UNPKG

@axflow/models

Version:

Zero-dependency, modular SDK for building robust natural language applications

101 lines (98 loc) 3.88 kB
type OllamaModelOptions = { top_k?: number; top_p?: number; stop?: string; temperature?: number; repeat_penalty?: number; repeat_last_n?: number; num_threads?: number; num_gpu?: number; num_gqa?: number; num_ctx?: number; mirostat?: number; mirostat_eta?: number; mirostat_tau?: number; tfs_z?: number; }; declare namespace OllamaGenerationTypes { type Request = { model: string; prompt: string; system?: string; template?: string; context?: string; options?: OllamaModelOptions; }; type RequestOptions = { apiUrl?: string; fetch?: typeof fetch; headers?: Record<string, string>; signal?: AbortSignal; }; type Chunk = { model: string; created_at: string; response: string; done: boolean; context?: Array<number>; total_duration?: number; load_duration?: number; prompt_eval_count?: number; prompt_eval_duration?: number; eval_count?: number; eval_duration?: number; }; } /** * Stream a generation against an ollama serving endpoint. Return a stream of bytes. * Docs: https://github.com/jmorganca/ollama/blob/main/docs/api.md * * @param request The request body containing the model, prompt, and options. * @param options * @param options.apiurl The ollama serving url. defaults to http://127.0.0.1:11343. * @param options.fetch The fetch implementation to use. defaults to globalthis.fetch. * @param options.headers Optionally add additional http headers to the request. * @param options.signal An abortsignal that can be used to abort the fetch request. * @returns A stream of bytes directly from the API. */ declare function streamBytes(request: OllamaGenerationTypes.Request, options: OllamaGenerationTypes.RequestOptions): Promise<ReadableStream<Uint8Array>>; /** * Stream a generation against an ollama serving endpoint, return javascript objects. * * Example chunk: * { * token: { id: 11, text: ' and', logprob: -0.00002193451, special: false }, * generated_text: null, * details: null * } * * @param request The request body containing the model, prompt, and options. * @param options * @param options.apiurl The ollama serving url. defaults to http://127.0.0.1:11343. * @param options.fetch The fetch implementation to use. defaults to globalthis.fetch. * @param options.headers Optionally add additional http headers to the request. * @param options.signal An abortsignal that can be used to abort the fetch request. * @returns A stream of objects representing each chunk from the api. */ declare function stream(request: OllamaGenerationTypes.Request, options: OllamaGenerationTypes.RequestOptions): Promise<ReadableStream<OllamaGenerationTypes.Chunk>>; /** * Stream a generation against an ollama serving endpoint, return only the text tokens. * * @param request The request body containing the model, prompt, and options. * @param options * @param options.apiurl The ollama serving url. defaults to http://127.0.0.1:11343. * @param options.fetch The fetch implementation to use. defaults to globalthis.fetch. * @param options.headers Optionally add additional http headers to the request. * @param options.signal An abortsignal that can be used to abort the fetch request. * @returns A stream of tokens from the API. */ declare function streamTokens(request: OllamaGenerationTypes.Request, options: OllamaGenerationTypes.RequestOptions): Promise<ReadableStream<string>>; /** * An object that encapsulates methods for calling the HF inference API. */ declare class OllamaGeneration { static streamBytes: typeof streamBytes; static stream: typeof stream; static streamTokens: typeof streamTokens; } export { OllamaGeneration, OllamaGenerationTypes };