llama.rn
Version:
React Native binding of llama.cpp
71 lines • 3.76 kB
TypeScript
import type { NativeContextParams, NativeLlamaContext, NativeCompletionParams, NativeCompletionTokenProb, NativeCompletionResult, NativeTokenizeResult, NativeEmbeddingResult, NativeSessionLoadResult, NativeEmbeddingParams, NativeCompletionTokenProbItem, NativeCompletionResultTimings } from './NativeRNLlama';
import type { SchemaGrammarConverterPropOrder, SchemaGrammarConverterBuiltinRule } from './grammar';
import { SchemaGrammarConverter, convertJsonSchemaToGrammar } from './grammar';
import type { RNLlamaMessagePart, RNLlamaOAICompatibleMessage } from './chat';
export type { NativeContextParams, NativeLlamaContext, NativeCompletionParams, NativeCompletionTokenProb, NativeCompletionResult, NativeTokenizeResult, NativeEmbeddingResult, NativeSessionLoadResult, NativeEmbeddingParams, NativeCompletionTokenProbItem, NativeCompletionResultTimings, RNLlamaMessagePart, RNLlamaOAICompatibleMessage, SchemaGrammarConverterPropOrder, SchemaGrammarConverterBuiltinRule, };
export { SchemaGrammarConverter, convertJsonSchemaToGrammar };
export type TokenData = {
token: string;
completion_probabilities?: Array<NativeCompletionTokenProb>;
};
export type ContextParams = Omit<NativeContextParams, 'cache_type_k' | 'cache_type_v' | 'pooling_type'> & {
cache_type_k?: 'f16' | 'f32' | 'q8_0' | 'q4_0' | 'q4_1' | 'iq4_nl' | 'q5_0' | 'q5_1';
cache_type_v?: 'f16' | 'f32' | 'q8_0' | 'q4_0' | 'q4_1' | 'iq4_nl' | 'q5_0' | 'q5_1';
pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank';
};
export type EmbeddingParams = NativeEmbeddingParams;
export type CompletionParams = Omit<NativeCompletionParams, 'emit_partial_completion' | 'prompt'> & {
prompt?: string;
messages?: RNLlamaOAICompatibleMessage[];
chatTemplate?: string;
};
export type BenchResult = {
modelDesc: string;
modelSize: number;
modelNParams: number;
ppAvg: number;
ppStd: number;
tgAvg: number;
tgStd: number;
};
export declare class LlamaContext {
id: number;
gpu: boolean;
reasonNoGPU: string;
model: {
isChatTemplateSupported?: boolean;
};
constructor({ contextId, gpu, reasonNoGPU, model }: NativeLlamaContext);
/**
* Load cached prompt & completion state from a file.
*/
loadSession(filepath: string): Promise<NativeSessionLoadResult>;
/**
* Save current cached prompt & completion state to a file.
*/
saveSession(filepath: string, options?: {
tokenSize: number;
}): Promise<number>;
getFormattedChat(messages: RNLlamaOAICompatibleMessage[], template?: string): Promise<string>;
completion(params: CompletionParams, callback?: (data: TokenData) => void): Promise<NativeCompletionResult>;
stopCompletion(): Promise<void>;
tokenize(text: string): Promise<NativeTokenizeResult>;
detokenize(tokens: number[]): Promise<string>;
embedding(text: string, params?: EmbeddingParams): Promise<NativeEmbeddingResult>;
bench(pp: number, tg: number, pl: number, nr: number): Promise<BenchResult>;
applyLoraAdapters(loraList: Array<{
path: string;
scaled?: number;
}>): Promise<void>;
removeLoraAdapters(): Promise<void>;
getLoadedLoraAdapters(): Promise<Array<{
path: string;
scaled?: number;
}>>;
release(): Promise<void>;
}
export declare function setContextLimit(limit: number): Promise<void>;
export declare function loadLlamaModelInfo(model: string): Promise<Object>;
export declare function initLlama({ model, is_model_asset: isModelAsset, pooling_type: poolingType, lora, lora_list: loraList, ...rest }: ContextParams, onProgress?: (progress: number) => void): Promise<LlamaContext>;
export declare function releaseAllLlama(): Promise<void>;
//# sourceMappingURL=index.d.ts.map