cui-llama.rn
Version:
Fork of llama.rn for ChatterUI
534 lines (515 loc) • 17.3 kB
JavaScript
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.RNLLAMA_MTMD_DEFAULT_MEDIA_MARKER = exports.LlamaContext = exports.CACHE_TYPE = void 0;
Object.defineProperty(exports, "SchemaGrammarConverter", {
enumerable: true,
get: function () {
return _grammar.SchemaGrammarConverter;
}
});
exports.addNativeLogListener = addNativeLogListener;
Object.defineProperty(exports, "convertJsonSchemaToGrammar", {
enumerable: true,
get: function () {
return _grammar.convertJsonSchemaToGrammar;
}
});
exports.getCpuFeatures = getCpuFeatures;
exports.initLlama = initLlama;
exports.loadLlamaModelInfo = loadLlamaModelInfo;
exports.releaseAllLlama = releaseAllLlama;
exports.setContextLimit = setContextLimit;
exports.toggleNativeLog = toggleNativeLog;
var _reactNative = require("react-native");
var _NativeRNLlama = _interopRequireDefault(require("./NativeRNLlama"));
var _grammar = require("./grammar");
function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
const RNLLAMA_MTMD_DEFAULT_MEDIA_MARKER = exports.RNLLAMA_MTMD_DEFAULT_MEDIA_MARKER = '<__media__>';
const EVENT_ON_INIT_CONTEXT_PROGRESS = '@RNLlama_onInitContextProgress';
const EVENT_ON_TOKEN = '@RNLlama_onToken';
const EVENT_ON_NATIVE_LOG = '@RNLlama_onNativeLog';
let EventEmitter;
if (_reactNative.Platform.OS === 'ios') {
// @ts-ignore
EventEmitter = new _reactNative.NativeEventEmitter(_NativeRNLlama.default);
}
if (_reactNative.Platform.OS === 'android') {
EventEmitter = _reactNative.DeviceEventEmitter;
}
const logListeners = [];
// @ts-ignore
if (EventEmitter) {
EventEmitter.addListener(EVENT_ON_NATIVE_LOG, evt => {
logListeners.forEach(listener => listener(evt.level, evt.text));
});
// Trigger unset to use default log callback
_NativeRNLlama.default?.toggleNativeLog?.(false)?.catch?.(() => {});
}
let CACHE_TYPE = exports.CACHE_TYPE = /*#__PURE__*/function (CACHE_TYPE) {
CACHE_TYPE["F16"] = "f16";
CACHE_TYPE["F32"] = "f32";
CACHE_TYPE["Q8_0"] = "q8_0";
CACHE_TYPE["Q4_0"] = "q4_0";
CACHE_TYPE["Q4_1"] = "q4_1";
CACHE_TYPE["IQ4_NL"] = "iq4_nl";
CACHE_TYPE["Q5_0"] = "q5_0";
CACHE_TYPE["Q5_1"] = "q5_1";
return CACHE_TYPE;
}({});
const getJsonSchema = responseFormat => {
if (responseFormat?.type === 'json_schema') {
return responseFormat.json_schema?.schema;
}
if (responseFormat?.type === 'json_object') {
return responseFormat.schema || {};
}
return null;
};
class LlamaContext {
gpu = false;
reasonNoGPU = '';
constructor({
contextId,
gpu,
reasonNoGPU,
model
}) {
this.id = contextId;
this.gpu = gpu;
this.reasonNoGPU = reasonNoGPU;
this.model = model;
}
/**
* Load cached prompt & completion state from a file.
*/
async loadSession(filepath) {
let path = filepath;
if (path.startsWith('file://')) path = path.slice(7);
return _NativeRNLlama.default.loadSession(this.id, path);
}
/**
* Save current cached prompt & completion state to a file.
*/
async saveSession(filepath, options) {
return _NativeRNLlama.default.saveSession(this.id, filepath, options?.tokenSize || -1);
}
isLlamaChatSupported() {
return !!this.model.chatTemplates.llamaChat;
}
isJinjaSupported() {
const {
minja
} = this.model.chatTemplates;
return !!minja?.toolUse || !!minja?.default;
}
async getFormattedChat(messages, template, params) {
const mediaPaths = [];
const chat = messages.map(msg => {
if (Array.isArray(msg.content)) {
const content = msg.content.map(part => {
// Handle multimodal content
if (part.type === 'image_url') {
let path = part.image_url?.url || '';
if (path?.startsWith('file://')) path = path.slice(7);
mediaPaths.push(path);
return {
type: 'text',
text: RNLLAMA_MTMD_DEFAULT_MEDIA_MARKER
};
} else if (part.type === 'input_audio') {
const {
input_audio: audio
} = part;
if (!audio) throw new Error('input_audio is required');
const {
format
} = audio;
if (format != 'wav' && format != 'mp3') {
throw new Error(`Unsupported audio format: ${format}`);
}
if (audio.url) {
const path = audio.url.replace(/file:\/\//, '');
mediaPaths.push(path);
} else if (audio.data) {
mediaPaths.push(audio.data);
}
return {
type: 'text',
text: RNLLAMA_MTMD_DEFAULT_MEDIA_MARKER
};
}
return part;
});
return {
...msg,
content
};
}
return msg;
});
const useJinja = this.isJinjaSupported() && params?.jinja;
let tmpl;
if (template) tmpl = template; // Force replace if provided
const jsonSchema = getJsonSchema(params?.response_format);
const result = await _NativeRNLlama.default.getFormattedChat(this.id, JSON.stringify(chat), tmpl, {
jinja: useJinja,
json_schema: jsonSchema ? JSON.stringify(jsonSchema) : undefined,
tools: params?.tools ? JSON.stringify(params.tools) : undefined,
parallel_tool_calls: params?.parallel_tool_calls ? JSON.stringify(params.parallel_tool_calls) : undefined,
tool_choice: params?.tool_choice,
enable_thinking: params?.enable_thinking ?? true
});
if (!useJinja) {
return {
type: 'llama-chat',
prompt: result,
has_media: mediaPaths.length > 0,
media_paths: mediaPaths
};
}
const jinjaResult = result;
jinjaResult.type = 'jinja';
jinjaResult.has_media = mediaPaths.length > 0;
jinjaResult.media_paths = mediaPaths;
return jinjaResult;
}
/**
* Generate a completion based on the provided parameters
* @param params Completion parameters including prompt or messages
* @param callback Optional callback for token-by-token streaming
* @returns Promise resolving to the completion result
*
* Note: For multimodal support, you can include an media_paths parameter.
* This will process the images and add them to the context before generating text.
* Multimodal support must be enabled via initMultimodal() first.
*/
async completion(params, callback) {
const nativeParams = {
...params,
prompt: params.prompt || '',
emit_partial_completion: !!callback
};
if (params.messages) {
const formattedResult = await this.getFormattedChat(params.messages, params.chat_template || params.chatTemplate, {
jinja: params.jinja,
tools: params.tools,
parallel_tool_calls: params.parallel_tool_calls,
tool_choice: params.tool_choice,
enable_thinking: params.enable_thinking
});
if (formattedResult.type === 'jinja') {
const jinjaResult = formattedResult;
nativeParams.prompt = jinjaResult.prompt || '';
if (typeof jinjaResult.chat_format === 'number') nativeParams.chat_format = jinjaResult.chat_format;
if (jinjaResult.grammar) nativeParams.grammar = jinjaResult.grammar;
if (typeof jinjaResult.grammar_lazy === 'boolean') nativeParams.grammar_lazy = jinjaResult.grammar_lazy;
if (jinjaResult.grammar_triggers) nativeParams.grammar_triggers = jinjaResult.grammar_triggers;
if (jinjaResult.preserved_tokens) nativeParams.preserved_tokens = jinjaResult.preserved_tokens;
if (jinjaResult.additional_stops) {
if (!nativeParams.stop) nativeParams.stop = [];
nativeParams.stop.push(...jinjaResult.additional_stops);
}
if (jinjaResult.has_media) {
nativeParams.media_paths = jinjaResult.media_paths;
}
} else if (formattedResult.type === 'llama-chat') {
const llamaChatResult = formattedResult;
nativeParams.prompt = llamaChatResult.prompt || '';
if (llamaChatResult.has_media) {
nativeParams.media_paths = llamaChatResult.media_paths;
}
}
} else {
nativeParams.prompt = params.prompt || '';
}
// If media_paths were explicitly provided or extracted from messages, use them
if (!nativeParams.media_paths && params.media_paths) {
nativeParams.media_paths = params.media_paths;
}
if (nativeParams.response_format && !nativeParams.grammar) {
const jsonSchema = getJsonSchema(params.response_format);
if (jsonSchema) nativeParams.json_schema = JSON.stringify(jsonSchema);
}
let tokenListener = callback && EventEmitter.addListener(EVENT_ON_TOKEN, evt => {
const {
contextId,
tokenResult
} = evt;
if (contextId !== this.id) return;
callback(tokenResult);
});
if (!nativeParams.prompt) throw new Error('Prompt is required');
const promise = _NativeRNLlama.default.completion(this.id, nativeParams);
return promise.then(completionResult => {
tokenListener?.remove();
tokenListener = null;
return completionResult;
}).catch(err => {
tokenListener?.remove();
tokenListener = null;
throw err;
});
}
stopCompletion() {
return _NativeRNLlama.default.stopCompletion(this.id);
}
/**
* Tokenize text or text with images
* @param text Text to tokenize
* @param params.media_paths Array of image paths to tokenize (if multimodal is enabled)
* @returns Promise resolving to the tokenize result
*/
tokenizeAsync(text, {
media_paths: mediaPaths
} = {}) {
return _NativeRNLlama.default.tokenizeAsync(this.id, text, mediaPaths);
}
tokenizeSync(text, {
media_paths: mediaPaths
} = {}) {
return _NativeRNLlama.default.tokenizeSync(this.id, text, mediaPaths);
}
detokenize(tokens) {
return _NativeRNLlama.default.detokenize(this.id, tokens);
}
embedding(text, params) {
return _NativeRNLlama.default.embedding(this.id, text, params || {});
}
/**
* Rerank documents based on relevance to a query
* @param query The query text to rank documents against
* @param documents Array of document texts to rank
* @param params Optional reranking parameters
* @returns Promise resolving to an array of ranking results with scores and indices
*/
async rerank(query, documents, params) {
const results = await _NativeRNLlama.default.rerank(this.id, query, documents, params || {});
// Sort by score descending and add document text if requested
return results.map(result => ({
...result,
document: documents[result.index]
})).sort((a, b) => b.score - a.score);
}
async bench(pp, tg, pl, nr) {
const result = await _NativeRNLlama.default.bench(this.id, pp, tg, pl, nr);
const [modelDesc, modelSize, modelNParams, ppAvg, ppStd, tgAvg, tgStd] = JSON.parse(result);
return {
modelDesc,
modelSize,
modelNParams,
ppAvg,
ppStd,
tgAvg,
tgStd
};
}
async applyLoraAdapters(loraList) {
let loraAdapters = [];
if (loraList) loraAdapters = loraList.map(l => ({
path: l.path.replace(/file:\/\//, ''),
scaled: l.scaled
}));
return _NativeRNLlama.default.applyLoraAdapters(this.id, loraAdapters);
}
async removeLoraAdapters() {
return _NativeRNLlama.default.removeLoraAdapters(this.id);
}
async getLoadedLoraAdapters() {
return _NativeRNLlama.default.getLoadedLoraAdapters(this.id);
}
/**
* Initialize multimodal support with a mmproj file
* @param params Parameters for multimodal support
* @param params.path Path to the multimodal projector file
* @param params.use_gpu Whether to use GPU
* @returns Promise resolving to true if initialization was successful
*/
async initMultimodal({
path,
use_gpu: useGpu
}) {
if (path.startsWith('file://')) path = path.slice(7);
return _NativeRNLlama.default.initMultimodal(this.id, {
path,
use_gpu: useGpu ?? true
});
}
/**
* Check if multimodal support is enabled
* @returns Promise resolving to true if multimodal is enabled
*/
async isMultimodalEnabled() {
return await _NativeRNLlama.default.isMultimodalEnabled(this.id);
}
/**
* Check multimodal support
* @returns Promise resolving to an object with vision and audio support
*/
async getMultimodalSupport() {
return await _NativeRNLlama.default.getMultimodalSupport(this.id);
}
/**
* Release multimodal support
* @returns Promise resolving to void
*/
async releaseMultimodal() {
return await _NativeRNLlama.default.releaseMultimodal(this.id);
}
/**
* Initialize TTS support with a vocoder model
* @param params Parameters for TTS support
* @param params.path Path to the vocoder model
* @returns Promise resolving to true if initialization was successful
*/
async initVocoder({
path
}) {
if (path.startsWith('file://')) path = path.slice(7);
return await _NativeRNLlama.default.initVocoder(this.id, path);
}
/**
* Check if TTS support is enabled
* @returns Promise resolving to true if TTS is enabled
*/
async isVocoderEnabled() {
return await _NativeRNLlama.default.isVocoderEnabled(this.id);
}
/**
* Get a formatted audio completion prompt
* @param speakerJsonStr JSON string representing the speaker
* @param textToSpeak Text to speak
* @returns Promise resolving to the formatted audio completion prompt
*/
async getFormattedAudioCompletion(speaker, textToSpeak) {
return await _NativeRNLlama.default.getFormattedAudioCompletion(this.id, speaker ? JSON.stringify(speaker) : '', textToSpeak);
}
/**
* Get guide tokens for audio completion
* @param textToSpeak Text to speak
* @returns Promise resolving to the guide tokens
*/
async getAudioCompletionGuideTokens(textToSpeak) {
return await _NativeRNLlama.default.getAudioCompletionGuideTokens(this.id, textToSpeak);
}
/**
* Decode audio tokens
* @param tokens Array of audio tokens
* @returns Promise resolving to the decoded audio tokens
*/
async decodeAudioTokens(tokens) {
return await _NativeRNLlama.default.decodeAudioTokens(this.id, tokens);
}
/**
* Release TTS support
* @returns Promise resolving to void
*/
async releaseVocoder() {
return await _NativeRNLlama.default.releaseVocoder(this.id);
}
async release() {
return _NativeRNLlama.default.releaseContext(this.id);
}
}
exports.LlamaContext = LlamaContext;
async function getCpuFeatures() {
if (_reactNative.Platform.OS === 'android') {
return _NativeRNLlama.default.getCpuFeatures();
}
console.warn("getCpuFeatures() is an android only feature");
return {
i8mm: false,
armv8: false,
dotprod: false
};
}
async function toggleNativeLog(enabled) {
return _NativeRNLlama.default.toggleNativeLog(enabled);
}
function addNativeLogListener(listener) {
logListeners.push(listener);
return {
remove: () => {
logListeners.splice(logListeners.indexOf(listener), 1);
}
};
}
async function setContextLimit(limit) {
return _NativeRNLlama.default.setContextLimit(limit);
}
let contextIdCounter = 0;
const contextIdRandom = () => process.env.NODE_ENV === 'test' ? 0 : Math.floor(Math.random() * 100000);
const modelInfoSkip = [
// Large fields
'tokenizer.ggml.tokens', 'tokenizer.ggml.token_type', 'tokenizer.ggml.merges', 'tokenizer.ggml.scores'];
async function loadLlamaModelInfo(model) {
let path = model;
if (path.startsWith('file://')) path = path.slice(7);
return _NativeRNLlama.default.modelInfo(path, modelInfoSkip);
}
const poolTypeMap = {
// -1 is unspecified as undefined
none: 0,
mean: 1,
cls: 2,
last: 3,
rank: 4
};
async function initLlama({
model,
is_model_asset: isModelAsset,
pooling_type: poolingType,
lora,
lora_list: loraList,
...rest
}, onProgress) {
let path = model;
if (path.startsWith('file://')) path = path.slice(7);
let loraPath = lora;
if (loraPath?.startsWith('file://')) loraPath = loraPath.slice(7);
let loraAdapters = [];
if (loraList) loraAdapters = loraList.map(l => ({
path: l.path.replace(/file:\/\//, ''),
scaled: l.scaled
}));
const contextId = contextIdCounter + contextIdRandom();
contextIdCounter += 1;
let removeProgressListener = null;
if (onProgress) {
removeProgressListener = EventEmitter.addListener(EVENT_ON_INIT_CONTEXT_PROGRESS, evt => {
if (evt.contextId !== contextId) return;
onProgress(evt.progress);
});
}
const poolType = poolTypeMap[poolingType];
const {
gpu,
reasonNoGPU,
model: modelDetails,
androidLib
} = await _NativeRNLlama.default.initContext(contextId, {
model: path,
is_model_asset: !!isModelAsset,
use_progress_callback: !!onProgress,
pooling_type: poolType,
lora: loraPath,
lora_list: loraAdapters,
...rest
}).catch(err => {
removeProgressListener?.remove();
throw err;
});
removeProgressListener?.remove();
return new LlamaContext({
contextId,
gpu,
reasonNoGPU,
model: modelDetails,
androidLib
});
}
async function releaseAllLlama() {
return _NativeRNLlama.default.releaseAllContexts();
}
//# sourceMappingURL=index.js.map
;