cui-llama.rn
Version:
Fork of llama.rn for ChatterUI
229 lines • 9.29 kB
TypeScript
import type { NativeContextParams, NativeLlamaContext, NativeCompletionParams, NativeCompletionTokenProb, NativeCompletionResult, NativeTokenizeResult, NativeEmbeddingResult, NativeSessionLoadResult, NativeCPUFeatures, NativeEmbeddingParams, NativeRerankParams, NativeRerankResult, NativeCompletionTokenProbItem, NativeCompletionResultTimings, JinjaFormattedChatResult, FormattedChatResult, NativeImageProcessingResult } from './NativeRNLlama';
import type { SchemaGrammarConverterPropOrder, SchemaGrammarConverterBuiltinRule } from './grammar';
import { SchemaGrammarConverter, convertJsonSchemaToGrammar } from './grammar';
export type RNLlamaMessagePart = {
type: string;
text?: string;
image_url?: {
url?: string;
};
input_audio?: {
format: string;
data?: string;
url?: string;
};
};
export type RNLlamaOAICompatibleMessage = {
role: string;
content?: string | RNLlamaMessagePart[];
};
export type { NativeContextParams, NativeLlamaContext, NativeCompletionParams, NativeCompletionTokenProb, NativeCompletionResult, NativeTokenizeResult, NativeEmbeddingResult, NativeSessionLoadResult, NativeEmbeddingParams, NativeRerankParams, NativeRerankResult, NativeCompletionTokenProbItem, NativeCompletionResultTimings, FormattedChatResult, JinjaFormattedChatResult, NativeImageProcessingResult, SchemaGrammarConverterPropOrder, SchemaGrammarConverterBuiltinRule, };
export declare const RNLLAMA_MTMD_DEFAULT_MEDIA_MARKER = "<__media__>";
export { SchemaGrammarConverter, convertJsonSchemaToGrammar };
export type TokenData = {
token: string;
completion_probabilities?: Array<NativeCompletionTokenProb>;
};
export declare enum CACHE_TYPE {
F16 = "f16",
F32 = "f32",
Q8_0 = "q8_0",
Q4_0 = "q4_0",
Q4_1 = "q4_1",
IQ4_NL = "iq4_nl",
Q5_0 = "q5_0",
Q5_1 = "q5_1"
}
export type ContextParams = Omit<NativeContextParams, 'cache_type_k' | 'cache_type_v' | 'pooling_type'> & {
cache_type_k?: CACHE_TYPE;
cache_type_v?: CACHE_TYPE;
pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank';
};
export type EmbeddingParams = NativeEmbeddingParams;
export type RerankParams = {
normalize?: number;
};
export type RerankResult = {
score: number;
index: number;
document?: string;
};
export type CompletionResponseFormat = {
type: 'text' | 'json_object' | 'json_schema';
json_schema?: {
strict?: boolean;
schema: object;
};
schema?: object;
};
export type CompletionBaseParams = {
prompt?: string;
messages?: RNLlamaOAICompatibleMessage[];
chatTemplate?: string;
chat_template?: string;
jinja?: boolean;
tools?: object;
parallel_tool_calls?: object;
tool_choice?: string;
response_format?: CompletionResponseFormat;
media_paths?: string | string[];
};
export type CompletionParams = Omit<NativeCompletionParams, 'emit_partial_completion' | 'prompt'> & CompletionBaseParams;
export type BenchResult = {
modelDesc: string;
modelSize: number;
modelNParams: number;
ppAvg: number;
ppStd: number;
tgAvg: number;
tgStd: number;
};
export declare class LlamaContext {
id: number;
gpu: boolean;
reasonNoGPU: string;
model: NativeLlamaContext['model'];
constructor({ contextId, gpu, reasonNoGPU, model }: NativeLlamaContext);
/**
* Load cached prompt & completion state from a file.
*/
loadSession(filepath: string): Promise<NativeSessionLoadResult>;
/**
* Save current cached prompt & completion state to a file.
*/
saveSession(filepath: string, options?: {
tokenSize: number;
}): Promise<number>;
isLlamaChatSupported(): boolean;
isJinjaSupported(): boolean;
getFormattedChat(messages: RNLlamaOAICompatibleMessage[], template?: string | null, params?: {
jinja?: boolean;
response_format?: CompletionResponseFormat;
tools?: object;
parallel_tool_calls?: object;
tool_choice?: string;
enable_thinking?: boolean;
}): Promise<FormattedChatResult | JinjaFormattedChatResult>;
/**
* Generate a completion based on the provided parameters
* @param params Completion parameters including prompt or messages
* @param callback Optional callback for token-by-token streaming
* @returns Promise resolving to the completion result
*
* Note: For multimodal support, you can include an media_paths parameter.
* This will process the images and add them to the context before generating text.
* Multimodal support must be enabled via initMultimodal() first.
*/
completion(params: CompletionParams, callback?: (data: TokenData) => void): Promise<NativeCompletionResult>;
stopCompletion(): Promise<void>;
/**
* Tokenize text or text with images
* @param text Text to tokenize
* @param params.media_paths Array of image paths to tokenize (if multimodal is enabled)
* @returns Promise resolving to the tokenize result
*/
tokenizeAsync(text: string, { media_paths: mediaPaths, }?: {
media_paths?: string[];
}): Promise<NativeTokenizeResult>;
tokenizeSync(text: string, { media_paths: mediaPaths, }?: {
media_paths?: string[];
}): NativeTokenizeResult;
detokenize(tokens: number[]): Promise<string>;
embedding(text: string, params?: EmbeddingParams): Promise<NativeEmbeddingResult>;
/**
* Rerank documents based on relevance to a query
* @param query The query text to rank documents against
* @param documents Array of document texts to rank
* @param params Optional reranking parameters
* @returns Promise resolving to an array of ranking results with scores and indices
*/
rerank(query: string, documents: string[], params?: RerankParams): Promise<RerankResult[]>;
bench(pp: number, tg: number, pl: number, nr: number): Promise<BenchResult>;
applyLoraAdapters(loraList: Array<{
path: string;
scaled?: number;
}>): Promise<void>;
removeLoraAdapters(): Promise<void>;
getLoadedLoraAdapters(): Promise<Array<{
path: string;
scaled?: number;
}>>;
/**
* Initialize multimodal support with a mmproj file
* @param params Parameters for multimodal support
* @param params.path Path to the multimodal projector file
* @param params.use_gpu Whether to use GPU
* @returns Promise resolving to true if initialization was successful
*/
initMultimodal({ path, use_gpu: useGpu, }: {
path: string;
use_gpu?: boolean;
}): Promise<boolean>;
/**
* Check if multimodal support is enabled
* @returns Promise resolving to true if multimodal is enabled
*/
isMultimodalEnabled(): Promise<boolean>;
/**
* Check multimodal support
* @returns Promise resolving to an object with vision and audio support
*/
getMultimodalSupport(): Promise<{
vision: boolean;
audio: boolean;
}>;
/**
* Release multimodal support
* @returns Promise resolving to void
*/
releaseMultimodal(): Promise<void>;
/**
* Initialize TTS support with a vocoder model
* @param params Parameters for TTS support
* @param params.path Path to the vocoder model
* @returns Promise resolving to true if initialization was successful
*/
initVocoder({ path }: {
path: string;
}): Promise<boolean>;
/**
* Check if TTS support is enabled
* @returns Promise resolving to true if TTS is enabled
*/
isVocoderEnabled(): Promise<boolean>;
/**
* Get a formatted audio completion prompt
* @param speakerJsonStr JSON string representing the speaker
* @param textToSpeak Text to speak
* @returns Promise resolving to the formatted audio completion prompt
*/
getFormattedAudioCompletion(speaker: object | null, textToSpeak: string): Promise<string>;
/**
* Get guide tokens for audio completion
* @param textToSpeak Text to speak
* @returns Promise resolving to the guide tokens
*/
getAudioCompletionGuideTokens(textToSpeak: string): Promise<Array<number>>;
/**
* Decode audio tokens
* @param tokens Array of audio tokens
* @returns Promise resolving to the decoded audio tokens
*/
decodeAudioTokens(tokens: number[]): Promise<Array<number>>;
/**
* Release TTS support
* @returns Promise resolving to void
*/
releaseVocoder(): Promise<void>;
release(): Promise<void>;
}
export declare function getCpuFeatures(): Promise<NativeCPUFeatures>;
export declare function toggleNativeLog(enabled: boolean): Promise<void>;
export declare function addNativeLogListener(listener: (level: string, text: string) => void): {
remove: () => void;
};
export declare function setContextLimit(limit: number): Promise<void>;
export declare function loadLlamaModelInfo(model: string): Promise<Object>;
export declare function initLlama({ model, is_model_asset: isModelAsset, pooling_type: poolingType, lora, lora_list: loraList, ...rest }: ContextParams, onProgress?: (progress: number) => void): Promise<LlamaContext>;
export declare function releaseAllLlama(): Promise<void>;
//# sourceMappingURL=index.d.ts.map