@axflow/models
Version:
Zero-dependency, modular SDK for building robust natural language applications
101 lines (98 loc) • 3.88 kB
text/typescript
type OllamaModelOptions = {
top_k?: number;
top_p?: number;
stop?: string;
temperature?: number;
repeat_penalty?: number;
repeat_last_n?: number;
num_threads?: number;
num_gpu?: number;
num_gqa?: number;
num_ctx?: number;
mirostat?: number;
mirostat_eta?: number;
mirostat_tau?: number;
tfs_z?: number;
};
declare namespace OllamaGenerationTypes {
type Request = {
model: string;
prompt: string;
system?: string;
template?: string;
context?: string;
options?: OllamaModelOptions;
};
type RequestOptions = {
apiUrl?: string;
fetch?: typeof fetch;
headers?: Record<string, string>;
signal?: AbortSignal;
};
type Chunk = {
model: string;
created_at: string;
response: string;
done: boolean;
context?: Array<number>;
total_duration?: number;
load_duration?: number;
prompt_eval_count?: number;
prompt_eval_duration?: number;
eval_count?: number;
eval_duration?: number;
};
}
/**
* Stream a generation against an ollama serving endpoint. Return a stream of bytes.
* Docs: https://github.com/jmorganca/ollama/blob/main/docs/api.md
*
* @param request The request body containing the model, prompt, and options.
* @param options
* @param options.apiurl The ollama serving url. defaults to http://127.0.0.1:11343.
* @param options.fetch The fetch implementation to use. defaults to globalthis.fetch.
* @param options.headers Optionally add additional http headers to the request.
* @param options.signal An abortsignal that can be used to abort the fetch request.
* @returns A stream of bytes directly from the API.
*/
declare function streamBytes(request: OllamaGenerationTypes.Request, options: OllamaGenerationTypes.RequestOptions): Promise<ReadableStream<Uint8Array>>;
/**
* Stream a generation against an ollama serving endpoint, return javascript objects.
*
* Example chunk:
* {
* token: { id: 11, text: ' and', logprob: -0.00002193451, special: false },
* generated_text: null,
* details: null
* }
*
* @param request The request body containing the model, prompt, and options.
* @param options
* @param options.apiurl The ollama serving url. defaults to http://127.0.0.1:11343.
* @param options.fetch The fetch implementation to use. defaults to globalthis.fetch.
* @param options.headers Optionally add additional http headers to the request.
* @param options.signal An abortsignal that can be used to abort the fetch request.
* @returns A stream of objects representing each chunk from the api.
*/
declare function stream(request: OllamaGenerationTypes.Request, options: OllamaGenerationTypes.RequestOptions): Promise<ReadableStream<OllamaGenerationTypes.Chunk>>;
/**
* Stream a generation against an ollama serving endpoint, return only the text tokens.
*
* @param request The request body containing the model, prompt, and options.
* @param options
* @param options.apiurl The ollama serving url. defaults to http://127.0.0.1:11343.
* @param options.fetch The fetch implementation to use. defaults to globalthis.fetch.
* @param options.headers Optionally add additional http headers to the request.
* @param options.signal An abortsignal that can be used to abort the fetch request.
* @returns A stream of tokens from the API.
*/
declare function streamTokens(request: OllamaGenerationTypes.Request, options: OllamaGenerationTypes.RequestOptions): Promise<ReadableStream<string>>;
/**
* An object that encapsulates methods for calling the HF inference API.
*/
declare class OllamaGeneration {
static streamBytes: typeof streamBytes;
static stream: typeof stream;
static streamTokens: typeof streamTokens;
}
export { OllamaGeneration, OllamaGenerationTypes };