UNPKG

cui-llama.rn

Version:
229 lines 9.29 kB
import type { NativeContextParams, NativeLlamaContext, NativeCompletionParams, NativeCompletionTokenProb, NativeCompletionResult, NativeTokenizeResult, NativeEmbeddingResult, NativeSessionLoadResult, NativeCPUFeatures, NativeEmbeddingParams, NativeRerankParams, NativeRerankResult, NativeCompletionTokenProbItem, NativeCompletionResultTimings, JinjaFormattedChatResult, FormattedChatResult, NativeImageProcessingResult } from './NativeRNLlama'; import type { SchemaGrammarConverterPropOrder, SchemaGrammarConverterBuiltinRule } from './grammar'; import { SchemaGrammarConverter, convertJsonSchemaToGrammar } from './grammar'; export type RNLlamaMessagePart = { type: string; text?: string; image_url?: { url?: string; }; input_audio?: { format: string; data?: string; url?: string; }; }; export type RNLlamaOAICompatibleMessage = { role: string; content?: string | RNLlamaMessagePart[]; }; export type { NativeContextParams, NativeLlamaContext, NativeCompletionParams, NativeCompletionTokenProb, NativeCompletionResult, NativeTokenizeResult, NativeEmbeddingResult, NativeSessionLoadResult, NativeEmbeddingParams, NativeRerankParams, NativeRerankResult, NativeCompletionTokenProbItem, NativeCompletionResultTimings, FormattedChatResult, JinjaFormattedChatResult, NativeImageProcessingResult, SchemaGrammarConverterPropOrder, SchemaGrammarConverterBuiltinRule, }; export declare const RNLLAMA_MTMD_DEFAULT_MEDIA_MARKER = "<__media__>"; export { SchemaGrammarConverter, convertJsonSchemaToGrammar }; export type TokenData = { token: string; completion_probabilities?: Array<NativeCompletionTokenProb>; }; export declare enum CACHE_TYPE { F16 = "f16", F32 = "f32", Q8_0 = "q8_0", Q4_0 = "q4_0", Q4_1 = "q4_1", IQ4_NL = "iq4_nl", Q5_0 = "q5_0", Q5_1 = "q5_1" } export type ContextParams = Omit<NativeContextParams, 'cache_type_k' | 'cache_type_v' | 'pooling_type'> & { cache_type_k?: CACHE_TYPE; cache_type_v?: CACHE_TYPE; pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank'; }; export type EmbeddingParams = NativeEmbeddingParams; export type RerankParams = { normalize?: number; }; export type RerankResult = { score: number; index: number; document?: string; }; export type CompletionResponseFormat = { type: 'text' | 'json_object' | 'json_schema'; json_schema?: { strict?: boolean; schema: object; }; schema?: object; }; export type CompletionBaseParams = { prompt?: string; messages?: RNLlamaOAICompatibleMessage[]; chatTemplate?: string; chat_template?: string; jinja?: boolean; tools?: object; parallel_tool_calls?: object; tool_choice?: string; response_format?: CompletionResponseFormat; media_paths?: string | string[]; }; export type CompletionParams = Omit<NativeCompletionParams, 'emit_partial_completion' | 'prompt'> & CompletionBaseParams; export type BenchResult = { modelDesc: string; modelSize: number; modelNParams: number; ppAvg: number; ppStd: number; tgAvg: number; tgStd: number; }; export declare class LlamaContext { id: number; gpu: boolean; reasonNoGPU: string; model: NativeLlamaContext['model']; constructor({ contextId, gpu, reasonNoGPU, model }: NativeLlamaContext); /** * Load cached prompt & completion state from a file. */ loadSession(filepath: string): Promise<NativeSessionLoadResult>; /** * Save current cached prompt & completion state to a file. */ saveSession(filepath: string, options?: { tokenSize: number; }): Promise<number>; isLlamaChatSupported(): boolean; isJinjaSupported(): boolean; getFormattedChat(messages: RNLlamaOAICompatibleMessage[], template?: string | null, params?: { jinja?: boolean; response_format?: CompletionResponseFormat; tools?: object; parallel_tool_calls?: object; tool_choice?: string; enable_thinking?: boolean; }): Promise<FormattedChatResult | JinjaFormattedChatResult>; /** * Generate a completion based on the provided parameters * @param params Completion parameters including prompt or messages * @param callback Optional callback for token-by-token streaming * @returns Promise resolving to the completion result * * Note: For multimodal support, you can include an media_paths parameter. * This will process the images and add them to the context before generating text. * Multimodal support must be enabled via initMultimodal() first. */ completion(params: CompletionParams, callback?: (data: TokenData) => void): Promise<NativeCompletionResult>; stopCompletion(): Promise<void>; /** * Tokenize text or text with images * @param text Text to tokenize * @param params.media_paths Array of image paths to tokenize (if multimodal is enabled) * @returns Promise resolving to the tokenize result */ tokenizeAsync(text: string, { media_paths: mediaPaths, }?: { media_paths?: string[]; }): Promise<NativeTokenizeResult>; tokenizeSync(text: string, { media_paths: mediaPaths, }?: { media_paths?: string[]; }): NativeTokenizeResult; detokenize(tokens: number[]): Promise<string>; embedding(text: string, params?: EmbeddingParams): Promise<NativeEmbeddingResult>; /** * Rerank documents based on relevance to a query * @param query The query text to rank documents against * @param documents Array of document texts to rank * @param params Optional reranking parameters * @returns Promise resolving to an array of ranking results with scores and indices */ rerank(query: string, documents: string[], params?: RerankParams): Promise<RerankResult[]>; bench(pp: number, tg: number, pl: number, nr: number): Promise<BenchResult>; applyLoraAdapters(loraList: Array<{ path: string; scaled?: number; }>): Promise<void>; removeLoraAdapters(): Promise<void>; getLoadedLoraAdapters(): Promise<Array<{ path: string; scaled?: number; }>>; /** * Initialize multimodal support with a mmproj file * @param params Parameters for multimodal support * @param params.path Path to the multimodal projector file * @param params.use_gpu Whether to use GPU * @returns Promise resolving to true if initialization was successful */ initMultimodal({ path, use_gpu: useGpu, }: { path: string; use_gpu?: boolean; }): Promise<boolean>; /** * Check if multimodal support is enabled * @returns Promise resolving to true if multimodal is enabled */ isMultimodalEnabled(): Promise<boolean>; /** * Check multimodal support * @returns Promise resolving to an object with vision and audio support */ getMultimodalSupport(): Promise<{ vision: boolean; audio: boolean; }>; /** * Release multimodal support * @returns Promise resolving to void */ releaseMultimodal(): Promise<void>; /** * Initialize TTS support with a vocoder model * @param params Parameters for TTS support * @param params.path Path to the vocoder model * @returns Promise resolving to true if initialization was successful */ initVocoder({ path }: { path: string; }): Promise<boolean>; /** * Check if TTS support is enabled * @returns Promise resolving to true if TTS is enabled */ isVocoderEnabled(): Promise<boolean>; /** * Get a formatted audio completion prompt * @param speakerJsonStr JSON string representing the speaker * @param textToSpeak Text to speak * @returns Promise resolving to the formatted audio completion prompt */ getFormattedAudioCompletion(speaker: object | null, textToSpeak: string): Promise<string>; /** * Get guide tokens for audio completion * @param textToSpeak Text to speak * @returns Promise resolving to the guide tokens */ getAudioCompletionGuideTokens(textToSpeak: string): Promise<Array<number>>; /** * Decode audio tokens * @param tokens Array of audio tokens * @returns Promise resolving to the decoded audio tokens */ decodeAudioTokens(tokens: number[]): Promise<Array<number>>; /** * Release TTS support * @returns Promise resolving to void */ releaseVocoder(): Promise<void>; release(): Promise<void>; } export declare function getCpuFeatures(): Promise<NativeCPUFeatures>; export declare function toggleNativeLog(enabled: boolean): Promise<void>; export declare function addNativeLogListener(listener: (level: string, text: string) => void): { remove: () => void; }; export declare function setContextLimit(limit: number): Promise<void>; export declare function loadLlamaModelInfo(model: string): Promise<Object>; export declare function initLlama({ model, is_model_asset: isModelAsset, pooling_type: poolingType, lora, lora_list: loraList, ...rest }: ContextParams, onProgress?: (progress: number) => void): Promise<LlamaContext>; export declare function releaseAllLlama(): Promise<void>; //# sourceMappingURL=index.d.ts.map