cui-llama.rn
Version:
Fork of llama.rn for ChatterUI
98 lines • 4.45 kB
TypeScript
import type { NativeContextParams, NativeLlamaContext, NativeCompletionParams, NativeCompletionTokenProb, NativeCompletionResult, NativeTokenizeResult, NativeEmbeddingResult, NativeSessionLoadResult, NativeCPUFeatures, NativeEmbeddingParams, NativeCompletionTokenProbItem, NativeCompletionResultTimings } from './NativeRNLlama';
import type { SchemaGrammarConverterPropOrder, SchemaGrammarConverterBuiltinRule } from './grammar';
import { SchemaGrammarConverter, convertJsonSchemaToGrammar } from './grammar';
import type { RNLlamaMessagePart, RNLlamaOAICompatibleMessage } from './chat';
export type { NativeContextParams, NativeLlamaContext, NativeCompletionParams, NativeCompletionTokenProb, NativeCompletionResult, NativeTokenizeResult, NativeEmbeddingResult, NativeSessionLoadResult, NativeEmbeddingParams, NativeCompletionTokenProbItem, NativeCompletionResultTimings, RNLlamaMessagePart, RNLlamaOAICompatibleMessage, SchemaGrammarConverterPropOrder, SchemaGrammarConverterBuiltinRule, };
export { SchemaGrammarConverter, convertJsonSchemaToGrammar };
export type TokenData = {
token: string;
completion_probabilities?: Array<NativeCompletionTokenProb>;
};
export declare enum GGML_TYPE {
LM_GGML_TYPE_F32 = 0,
LM_GGML_TYPE_F16 = 1,
LM_GGML_TYPE_Q4_0 = 2,
LM_GGML_TYPE_Q4_1 = 3,
LM_GGML_TYPE_Q5_0 = 6,
LM_GGML_TYPE_Q5_1 = 7,
LM_GGML_TYPE_Q8_0 = 8,
LM_GGML_TYPE_Q8_1 = 9,
LM_GGML_TYPE_Q2_K = 10,
LM_GGML_TYPE_Q3_K = 11,
LM_GGML_TYPE_Q4_K = 12,
LM_GGML_TYPE_Q5_K = 13,
LM_GGML_TYPE_Q6_K = 14,
LM_GGML_TYPE_Q8_K = 15,
LM_GGML_TYPE_IQ2_XXS = 16,
LM_GGML_TYPE_IQ2_XS = 17,
LM_GGML_TYPE_IQ3_XXS = 18,
LM_GGML_TYPE_IQ1_S = 19,
LM_GGML_TYPE_IQ4_NL = 20,
LM_GGML_TYPE_IQ3_S = 21,
LM_GGML_TYPE_IQ2_S = 22,
LM_GGML_TYPE_IQ4_XS = 23,
LM_GGML_TYPE_I8 = 24,
LM_GGML_TYPE_I16 = 25,
LM_GGML_TYPE_I32 = 26,
LM_GGML_TYPE_I64 = 27,
LM_GGML_TYPE_F64 = 28,
LM_GGML_TYPE_IQ1_M = 29,
LM_GGML_TYPE_BF16 = 30,
LM_GGML_TYPE_TQ1_0 = 34,
LM_GGML_TYPE_TQ2_0 = 35,
LM_GGML_TYPE_COUNT = 39
}
export type ContextParams = Omit<NativeContextParams, 'cache_type_k' | 'cache_type_v' | 'pooling_type'> & {
cache_type_k?: GGML_TYPE;
cache_type_v?: GGML_TYPE;
pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank';
};
export type EmbeddingParams = NativeEmbeddingParams;
export type CompletionParams = Omit<NativeCompletionParams, 'emit_partial_completion' | 'prompt'> & {
prompt?: string;
messages?: RNLlamaOAICompatibleMessage[];
chatTemplate?: string;
};
export type BenchResult = {
modelDesc: string;
modelSize: number;
modelNParams: number;
ppAvg: number;
ppStd: number;
tgAvg: number;
tgStd: number;
};
export declare class LlamaContext {
id: number;
gpu: boolean;
reasonNoGPU: string;
model: {
isChatTemplateSupported?: boolean;
};
constructor({ contextId, gpu, reasonNoGPU, model }: NativeLlamaContext);
/**
* Load cached prompt & completion state from a file.
*/
loadSession(filepath: string): Promise<NativeSessionLoadResult>;
/**
* Save current cached prompt & completion state to a file.
*/
saveSession(filepath: string, options?: {
tokenSize: number;
}): Promise<number>;
getFormattedChat(messages: RNLlamaOAICompatibleMessage[], template?: string): Promise<string>;
completion(params: CompletionParams, callback?: (data: TokenData) => void): Promise<NativeCompletionResult>;
stopCompletion(): Promise<void>;
tokenizeAsync(text: string): Promise<NativeTokenizeResult>;
tokenizeSync(text: string): NativeTokenizeResult;
detokenize(tokens: number[]): Promise<string>;
embedding(text: string, params?: EmbeddingParams): Promise<NativeEmbeddingResult>;
bench(pp: number, tg: number, pl: number, nr: number): Promise<BenchResult>;
release(): Promise<void>;
}
export declare function getCpuFeatures(): Promise<NativeCPUFeatures>;
export declare function setContextLimit(limit: number): Promise<void>;
export declare function loadLlamaModelInfo(model: string): Promise<Object>;
export declare function initLlama({ model, is_model_asset: isModelAsset, pooling_type: poolingType, lora, ...rest }: ContextParams, onProgress?: (progress: number) => void): Promise<LlamaContext>;
export declare function releaseAllLlama(): Promise<void>;
//# sourceMappingURL=index.d.ts.map