UNPKG

llama.rn

Version:

React Native binding of llama.cpp

71 lines 3.76 kB
import type { NativeContextParams, NativeLlamaContext, NativeCompletionParams, NativeCompletionTokenProb, NativeCompletionResult, NativeTokenizeResult, NativeEmbeddingResult, NativeSessionLoadResult, NativeEmbeddingParams, NativeCompletionTokenProbItem, NativeCompletionResultTimings } from './NativeRNLlama'; import type { SchemaGrammarConverterPropOrder, SchemaGrammarConverterBuiltinRule } from './grammar'; import { SchemaGrammarConverter, convertJsonSchemaToGrammar } from './grammar'; import type { RNLlamaMessagePart, RNLlamaOAICompatibleMessage } from './chat'; export type { NativeContextParams, NativeLlamaContext, NativeCompletionParams, NativeCompletionTokenProb, NativeCompletionResult, NativeTokenizeResult, NativeEmbeddingResult, NativeSessionLoadResult, NativeEmbeddingParams, NativeCompletionTokenProbItem, NativeCompletionResultTimings, RNLlamaMessagePart, RNLlamaOAICompatibleMessage, SchemaGrammarConverterPropOrder, SchemaGrammarConverterBuiltinRule, }; export { SchemaGrammarConverter, convertJsonSchemaToGrammar }; export type TokenData = { token: string; completion_probabilities?: Array<NativeCompletionTokenProb>; }; export type ContextParams = Omit<NativeContextParams, 'cache_type_k' | 'cache_type_v' | 'pooling_type'> & { cache_type_k?: 'f16' | 'f32' | 'q8_0' | 'q4_0' | 'q4_1' | 'iq4_nl' | 'q5_0' | 'q5_1'; cache_type_v?: 'f16' | 'f32' | 'q8_0' | 'q4_0' | 'q4_1' | 'iq4_nl' | 'q5_0' | 'q5_1'; pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank'; }; export type EmbeddingParams = NativeEmbeddingParams; export type CompletionParams = Omit<NativeCompletionParams, 'emit_partial_completion' | 'prompt'> & { prompt?: string; messages?: RNLlamaOAICompatibleMessage[]; chatTemplate?: string; }; export type BenchResult = { modelDesc: string; modelSize: number; modelNParams: number; ppAvg: number; ppStd: number; tgAvg: number; tgStd: number; }; export declare class LlamaContext { id: number; gpu: boolean; reasonNoGPU: string; model: { isChatTemplateSupported?: boolean; }; constructor({ contextId, gpu, reasonNoGPU, model }: NativeLlamaContext); /** * Load cached prompt & completion state from a file. */ loadSession(filepath: string): Promise<NativeSessionLoadResult>; /** * Save current cached prompt & completion state to a file. */ saveSession(filepath: string, options?: { tokenSize: number; }): Promise<number>; getFormattedChat(messages: RNLlamaOAICompatibleMessage[], template?: string): Promise<string>; completion(params: CompletionParams, callback?: (data: TokenData) => void): Promise<NativeCompletionResult>; stopCompletion(): Promise<void>; tokenize(text: string): Promise<NativeTokenizeResult>; detokenize(tokens: number[]): Promise<string>; embedding(text: string, params?: EmbeddingParams): Promise<NativeEmbeddingResult>; bench(pp: number, tg: number, pl: number, nr: number): Promise<BenchResult>; applyLoraAdapters(loraList: Array<{ path: string; scaled?: number; }>): Promise<void>; removeLoraAdapters(): Promise<void>; getLoadedLoraAdapters(): Promise<Array<{ path: string; scaled?: number; }>>; release(): Promise<void>; } export declare function setContextLimit(limit: number): Promise<void>; export declare function loadLlamaModelInfo(model: string): Promise<Object>; export declare function initLlama({ model, is_model_asset: isModelAsset, pooling_type: poolingType, lora, lora_list: loraList, ...rest }: ContextParams, onProgress?: (progress: number) => void): Promise<LlamaContext>; export declare function releaseAllLlama(): Promise<void>; //# sourceMappingURL=index.d.ts.map