UNPKG

cui-llama.rn

Version:
534 lines (515 loc) 17.3 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.RNLLAMA_MTMD_DEFAULT_MEDIA_MARKER = exports.LlamaContext = exports.CACHE_TYPE = void 0; Object.defineProperty(exports, "SchemaGrammarConverter", { enumerable: true, get: function () { return _grammar.SchemaGrammarConverter; } }); exports.addNativeLogListener = addNativeLogListener; Object.defineProperty(exports, "convertJsonSchemaToGrammar", { enumerable: true, get: function () { return _grammar.convertJsonSchemaToGrammar; } }); exports.getCpuFeatures = getCpuFeatures; exports.initLlama = initLlama; exports.loadLlamaModelInfo = loadLlamaModelInfo; exports.releaseAllLlama = releaseAllLlama; exports.setContextLimit = setContextLimit; exports.toggleNativeLog = toggleNativeLog; var _reactNative = require("react-native"); var _NativeRNLlama = _interopRequireDefault(require("./NativeRNLlama")); var _grammar = require("./grammar"); function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; } const RNLLAMA_MTMD_DEFAULT_MEDIA_MARKER = exports.RNLLAMA_MTMD_DEFAULT_MEDIA_MARKER = '<__media__>'; const EVENT_ON_INIT_CONTEXT_PROGRESS = '@RNLlama_onInitContextProgress'; const EVENT_ON_TOKEN = '@RNLlama_onToken'; const EVENT_ON_NATIVE_LOG = '@RNLlama_onNativeLog'; let EventEmitter; if (_reactNative.Platform.OS === 'ios') { // @ts-ignore EventEmitter = new _reactNative.NativeEventEmitter(_NativeRNLlama.default); } if (_reactNative.Platform.OS === 'android') { EventEmitter = _reactNative.DeviceEventEmitter; } const logListeners = []; // @ts-ignore if (EventEmitter) { EventEmitter.addListener(EVENT_ON_NATIVE_LOG, evt => { logListeners.forEach(listener => listener(evt.level, evt.text)); }); // Trigger unset to use default log callback _NativeRNLlama.default?.toggleNativeLog?.(false)?.catch?.(() => {}); } let CACHE_TYPE = exports.CACHE_TYPE = /*#__PURE__*/function (CACHE_TYPE) { CACHE_TYPE["F16"] = "f16"; CACHE_TYPE["F32"] = "f32"; CACHE_TYPE["Q8_0"] = "q8_0"; CACHE_TYPE["Q4_0"] = "q4_0"; CACHE_TYPE["Q4_1"] = "q4_1"; CACHE_TYPE["IQ4_NL"] = "iq4_nl"; CACHE_TYPE["Q5_0"] = "q5_0"; CACHE_TYPE["Q5_1"] = "q5_1"; return CACHE_TYPE; }({}); const getJsonSchema = responseFormat => { if (responseFormat?.type === 'json_schema') { return responseFormat.json_schema?.schema; } if (responseFormat?.type === 'json_object') { return responseFormat.schema || {}; } return null; }; class LlamaContext { gpu = false; reasonNoGPU = ''; constructor({ contextId, gpu, reasonNoGPU, model }) { this.id = contextId; this.gpu = gpu; this.reasonNoGPU = reasonNoGPU; this.model = model; } /** * Load cached prompt & completion state from a file. */ async loadSession(filepath) { let path = filepath; if (path.startsWith('file://')) path = path.slice(7); return _NativeRNLlama.default.loadSession(this.id, path); } /** * Save current cached prompt & completion state to a file. */ async saveSession(filepath, options) { return _NativeRNLlama.default.saveSession(this.id, filepath, options?.tokenSize || -1); } isLlamaChatSupported() { return !!this.model.chatTemplates.llamaChat; } isJinjaSupported() { const { minja } = this.model.chatTemplates; return !!minja?.toolUse || !!minja?.default; } async getFormattedChat(messages, template, params) { const mediaPaths = []; const chat = messages.map(msg => { if (Array.isArray(msg.content)) { const content = msg.content.map(part => { // Handle multimodal content if (part.type === 'image_url') { let path = part.image_url?.url || ''; if (path?.startsWith('file://')) path = path.slice(7); mediaPaths.push(path); return { type: 'text', text: RNLLAMA_MTMD_DEFAULT_MEDIA_MARKER }; } else if (part.type === 'input_audio') { const { input_audio: audio } = part; if (!audio) throw new Error('input_audio is required'); const { format } = audio; if (format != 'wav' && format != 'mp3') { throw new Error(`Unsupported audio format: ${format}`); } if (audio.url) { const path = audio.url.replace(/file:\/\//, ''); mediaPaths.push(path); } else if (audio.data) { mediaPaths.push(audio.data); } return { type: 'text', text: RNLLAMA_MTMD_DEFAULT_MEDIA_MARKER }; } return part; }); return { ...msg, content }; } return msg; }); const useJinja = this.isJinjaSupported() && params?.jinja; let tmpl; if (template) tmpl = template; // Force replace if provided const jsonSchema = getJsonSchema(params?.response_format); const result = await _NativeRNLlama.default.getFormattedChat(this.id, JSON.stringify(chat), tmpl, { jinja: useJinja, json_schema: jsonSchema ? JSON.stringify(jsonSchema) : undefined, tools: params?.tools ? JSON.stringify(params.tools) : undefined, parallel_tool_calls: params?.parallel_tool_calls ? JSON.stringify(params.parallel_tool_calls) : undefined, tool_choice: params?.tool_choice, enable_thinking: params?.enable_thinking ?? true }); if (!useJinja) { return { type: 'llama-chat', prompt: result, has_media: mediaPaths.length > 0, media_paths: mediaPaths }; } const jinjaResult = result; jinjaResult.type = 'jinja'; jinjaResult.has_media = mediaPaths.length > 0; jinjaResult.media_paths = mediaPaths; return jinjaResult; } /** * Generate a completion based on the provided parameters * @param params Completion parameters including prompt or messages * @param callback Optional callback for token-by-token streaming * @returns Promise resolving to the completion result * * Note: For multimodal support, you can include an media_paths parameter. * This will process the images and add them to the context before generating text. * Multimodal support must be enabled via initMultimodal() first. */ async completion(params, callback) { const nativeParams = { ...params, prompt: params.prompt || '', emit_partial_completion: !!callback }; if (params.messages) { const formattedResult = await this.getFormattedChat(params.messages, params.chat_template || params.chatTemplate, { jinja: params.jinja, tools: params.tools, parallel_tool_calls: params.parallel_tool_calls, tool_choice: params.tool_choice, enable_thinking: params.enable_thinking }); if (formattedResult.type === 'jinja') { const jinjaResult = formattedResult; nativeParams.prompt = jinjaResult.prompt || ''; if (typeof jinjaResult.chat_format === 'number') nativeParams.chat_format = jinjaResult.chat_format; if (jinjaResult.grammar) nativeParams.grammar = jinjaResult.grammar; if (typeof jinjaResult.grammar_lazy === 'boolean') nativeParams.grammar_lazy = jinjaResult.grammar_lazy; if (jinjaResult.grammar_triggers) nativeParams.grammar_triggers = jinjaResult.grammar_triggers; if (jinjaResult.preserved_tokens) nativeParams.preserved_tokens = jinjaResult.preserved_tokens; if (jinjaResult.additional_stops) { if (!nativeParams.stop) nativeParams.stop = []; nativeParams.stop.push(...jinjaResult.additional_stops); } if (jinjaResult.has_media) { nativeParams.media_paths = jinjaResult.media_paths; } } else if (formattedResult.type === 'llama-chat') { const llamaChatResult = formattedResult; nativeParams.prompt = llamaChatResult.prompt || ''; if (llamaChatResult.has_media) { nativeParams.media_paths = llamaChatResult.media_paths; } } } else { nativeParams.prompt = params.prompt || ''; } // If media_paths were explicitly provided or extracted from messages, use them if (!nativeParams.media_paths && params.media_paths) { nativeParams.media_paths = params.media_paths; } if (nativeParams.response_format && !nativeParams.grammar) { const jsonSchema = getJsonSchema(params.response_format); if (jsonSchema) nativeParams.json_schema = JSON.stringify(jsonSchema); } let tokenListener = callback && EventEmitter.addListener(EVENT_ON_TOKEN, evt => { const { contextId, tokenResult } = evt; if (contextId !== this.id) return; callback(tokenResult); }); if (!nativeParams.prompt) throw new Error('Prompt is required'); const promise = _NativeRNLlama.default.completion(this.id, nativeParams); return promise.then(completionResult => { tokenListener?.remove(); tokenListener = null; return completionResult; }).catch(err => { tokenListener?.remove(); tokenListener = null; throw err; }); } stopCompletion() { return _NativeRNLlama.default.stopCompletion(this.id); } /** * Tokenize text or text with images * @param text Text to tokenize * @param params.media_paths Array of image paths to tokenize (if multimodal is enabled) * @returns Promise resolving to the tokenize result */ tokenizeAsync(text, { media_paths: mediaPaths } = {}) { return _NativeRNLlama.default.tokenizeAsync(this.id, text, mediaPaths); } tokenizeSync(text, { media_paths: mediaPaths } = {}) { return _NativeRNLlama.default.tokenizeSync(this.id, text, mediaPaths); } detokenize(tokens) { return _NativeRNLlama.default.detokenize(this.id, tokens); } embedding(text, params) { return _NativeRNLlama.default.embedding(this.id, text, params || {}); } /** * Rerank documents based on relevance to a query * @param query The query text to rank documents against * @param documents Array of document texts to rank * @param params Optional reranking parameters * @returns Promise resolving to an array of ranking results with scores and indices */ async rerank(query, documents, params) { const results = await _NativeRNLlama.default.rerank(this.id, query, documents, params || {}); // Sort by score descending and add document text if requested return results.map(result => ({ ...result, document: documents[result.index] })).sort((a, b) => b.score - a.score); } async bench(pp, tg, pl, nr) { const result = await _NativeRNLlama.default.bench(this.id, pp, tg, pl, nr); const [modelDesc, modelSize, modelNParams, ppAvg, ppStd, tgAvg, tgStd] = JSON.parse(result); return { modelDesc, modelSize, modelNParams, ppAvg, ppStd, tgAvg, tgStd }; } async applyLoraAdapters(loraList) { let loraAdapters = []; if (loraList) loraAdapters = loraList.map(l => ({ path: l.path.replace(/file:\/\//, ''), scaled: l.scaled })); return _NativeRNLlama.default.applyLoraAdapters(this.id, loraAdapters); } async removeLoraAdapters() { return _NativeRNLlama.default.removeLoraAdapters(this.id); } async getLoadedLoraAdapters() { return _NativeRNLlama.default.getLoadedLoraAdapters(this.id); } /** * Initialize multimodal support with a mmproj file * @param params Parameters for multimodal support * @param params.path Path to the multimodal projector file * @param params.use_gpu Whether to use GPU * @returns Promise resolving to true if initialization was successful */ async initMultimodal({ path, use_gpu: useGpu }) { if (path.startsWith('file://')) path = path.slice(7); return _NativeRNLlama.default.initMultimodal(this.id, { path, use_gpu: useGpu ?? true }); } /** * Check if multimodal support is enabled * @returns Promise resolving to true if multimodal is enabled */ async isMultimodalEnabled() { return await _NativeRNLlama.default.isMultimodalEnabled(this.id); } /** * Check multimodal support * @returns Promise resolving to an object with vision and audio support */ async getMultimodalSupport() { return await _NativeRNLlama.default.getMultimodalSupport(this.id); } /** * Release multimodal support * @returns Promise resolving to void */ async releaseMultimodal() { return await _NativeRNLlama.default.releaseMultimodal(this.id); } /** * Initialize TTS support with a vocoder model * @param params Parameters for TTS support * @param params.path Path to the vocoder model * @returns Promise resolving to true if initialization was successful */ async initVocoder({ path }) { if (path.startsWith('file://')) path = path.slice(7); return await _NativeRNLlama.default.initVocoder(this.id, path); } /** * Check if TTS support is enabled * @returns Promise resolving to true if TTS is enabled */ async isVocoderEnabled() { return await _NativeRNLlama.default.isVocoderEnabled(this.id); } /** * Get a formatted audio completion prompt * @param speakerJsonStr JSON string representing the speaker * @param textToSpeak Text to speak * @returns Promise resolving to the formatted audio completion prompt */ async getFormattedAudioCompletion(speaker, textToSpeak) { return await _NativeRNLlama.default.getFormattedAudioCompletion(this.id, speaker ? JSON.stringify(speaker) : '', textToSpeak); } /** * Get guide tokens for audio completion * @param textToSpeak Text to speak * @returns Promise resolving to the guide tokens */ async getAudioCompletionGuideTokens(textToSpeak) { return await _NativeRNLlama.default.getAudioCompletionGuideTokens(this.id, textToSpeak); } /** * Decode audio tokens * @param tokens Array of audio tokens * @returns Promise resolving to the decoded audio tokens */ async decodeAudioTokens(tokens) { return await _NativeRNLlama.default.decodeAudioTokens(this.id, tokens); } /** * Release TTS support * @returns Promise resolving to void */ async releaseVocoder() { return await _NativeRNLlama.default.releaseVocoder(this.id); } async release() { return _NativeRNLlama.default.releaseContext(this.id); } } exports.LlamaContext = LlamaContext; async function getCpuFeatures() { if (_reactNative.Platform.OS === 'android') { return _NativeRNLlama.default.getCpuFeatures(); } console.warn("getCpuFeatures() is an android only feature"); return { i8mm: false, armv8: false, dotprod: false }; } async function toggleNativeLog(enabled) { return _NativeRNLlama.default.toggleNativeLog(enabled); } function addNativeLogListener(listener) { logListeners.push(listener); return { remove: () => { logListeners.splice(logListeners.indexOf(listener), 1); } }; } async function setContextLimit(limit) { return _NativeRNLlama.default.setContextLimit(limit); } let contextIdCounter = 0; const contextIdRandom = () => process.env.NODE_ENV === 'test' ? 0 : Math.floor(Math.random() * 100000); const modelInfoSkip = [ // Large fields 'tokenizer.ggml.tokens', 'tokenizer.ggml.token_type', 'tokenizer.ggml.merges', 'tokenizer.ggml.scores']; async function loadLlamaModelInfo(model) { let path = model; if (path.startsWith('file://')) path = path.slice(7); return _NativeRNLlama.default.modelInfo(path, modelInfoSkip); } const poolTypeMap = { // -1 is unspecified as undefined none: 0, mean: 1, cls: 2, last: 3, rank: 4 }; async function initLlama({ model, is_model_asset: isModelAsset, pooling_type: poolingType, lora, lora_list: loraList, ...rest }, onProgress) { let path = model; if (path.startsWith('file://')) path = path.slice(7); let loraPath = lora; if (loraPath?.startsWith('file://')) loraPath = loraPath.slice(7); let loraAdapters = []; if (loraList) loraAdapters = loraList.map(l => ({ path: l.path.replace(/file:\/\//, ''), scaled: l.scaled })); const contextId = contextIdCounter + contextIdRandom(); contextIdCounter += 1; let removeProgressListener = null; if (onProgress) { removeProgressListener = EventEmitter.addListener(EVENT_ON_INIT_CONTEXT_PROGRESS, evt => { if (evt.contextId !== contextId) return; onProgress(evt.progress); }); } const poolType = poolTypeMap[poolingType]; const { gpu, reasonNoGPU, model: modelDetails, androidLib } = await _NativeRNLlama.default.initContext(contextId, { model: path, is_model_asset: !!isModelAsset, use_progress_callback: !!onProgress, pooling_type: poolType, lora: loraPath, lora_list: loraAdapters, ...rest }).catch(err => { removeProgressListener?.remove(); throw err; }); removeProgressListener?.remove(); return new LlamaContext({ contextId, gpu, reasonNoGPU, model: modelDetails, androidLib }); } async function releaseAllLlama() { return _NativeRNLlama.default.releaseAllContexts(); } //# sourceMappingURL=index.js.map