UNPKG

llama.rn

Version:

React Native binding of llama.cpp

github.com/mybigday/llama.rn

mybigday/llama.rn

233 lines (230 loc) • 7.42 kB

JavaScript

"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.LlamaContext = void 0; Object.defineProperty(exports, "SchemaGrammarConverter", { enumerable: true, get: function () { return _grammar.SchemaGrammarConverter; } }); Object.defineProperty(exports, "convertJsonSchemaToGrammar", { enumerable: true, get: function () { return _grammar.convertJsonSchemaToGrammar; } }); exports.initLlama = initLlama; exports.loadLlamaModelInfo = loadLlamaModelInfo; exports.releaseAllLlama = releaseAllLlama; exports.setContextLimit = setContextLimit; var _reactNative = require("react-native"); var _NativeRNLlama = _interopRequireDefault(require("./NativeRNLlama")); var _grammar = require("./grammar"); var _chat = require("./chat"); function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } const EVENT_ON_INIT_CONTEXT_PROGRESS = '@RNLlama_onInitContextProgress'; const EVENT_ON_TOKEN = '@RNLlama_onToken'; let EventEmitter; if (_reactNative.Platform.OS === 'ios') { // @ts-ignore EventEmitter = new _reactNative.NativeEventEmitter(_NativeRNLlama.default); } if (_reactNative.Platform.OS === 'android') { EventEmitter = _reactNative.DeviceEventEmitter; } class LlamaContext { gpu = false; reasonNoGPU = ''; model = {}; constructor(_ref) { let { contextId, gpu, reasonNoGPU, model } = _ref; this.id = contextId; this.gpu = gpu; this.reasonNoGPU = reasonNoGPU; this.model = model; } /** * Load cached prompt & completion state from a file. */ async loadSession(filepath) { let path = filepath; if (path.startsWith('file://')) path = path.slice(7); return _NativeRNLlama.default.loadSession(this.id, path); } /** * Save current cached prompt & completion state to a file. */ async saveSession(filepath, options) { return _NativeRNLlama.default.saveSession(this.id, filepath, (options === null || options === void 0 ? void 0 : options.tokenSize) || -1); } async getFormattedChat(messages, template) { var _this$model; const chat = (0, _chat.formatChat)(messages); let tmpl = (_this$model = this.model) !== null && _this$model !== void 0 && _this$model.isChatTemplateSupported ? undefined : 'chatml'; if (template) tmpl = template; // Force replace if provided return _NativeRNLlama.default.getFormattedChat(this.id, chat, tmpl); } async completion(params, callback) { let finalPrompt = params.prompt; if (params.messages) { // messages always win finalPrompt = await this.getFormattedChat(params.messages, params.chatTemplate); } let tokenListener = callback && EventEmitter.addListener(EVENT_ON_TOKEN, evt => { const { contextId, tokenResult } = evt; if (contextId !== this.id) return; callback(tokenResult); }); if (!finalPrompt) throw new Error('Prompt is required'); const promise = _NativeRNLlama.default.completion(this.id, { ...params, prompt: finalPrompt, emit_partial_completion: !!callback }); return promise.then(completionResult => { var _tokenListener; (_tokenListener = tokenListener) === null || _tokenListener === void 0 ? void 0 : _tokenListener.remove(); tokenListener = null; return completionResult; }).catch(err => { var _tokenListener2; (_tokenListener2 = tokenListener) === null || _tokenListener2 === void 0 ? void 0 : _tokenListener2.remove(); tokenListener = null; throw err; }); } stopCompletion() { return _NativeRNLlama.default.stopCompletion(this.id); } tokenize(text) { return _NativeRNLlama.default.tokenize(this.id, text); } detokenize(tokens) { return _NativeRNLlama.default.detokenize(this.id, tokens); } embedding(text, params) { return _NativeRNLlama.default.embedding(this.id, text, params || {}); } async bench(pp, tg, pl, nr) { const result = await _NativeRNLlama.default.bench(this.id, pp, tg, pl, nr); const [modelDesc, modelSize, modelNParams, ppAvg, ppStd, tgAvg, tgStd] = JSON.parse(result); return { modelDesc, modelSize, modelNParams, ppAvg, ppStd, tgAvg, tgStd }; } async applyLoraAdapters(loraList) { let loraAdapters = []; if (loraList) loraAdapters = loraList.map(l => ({ path: l.path.replace(/file:\/\//, ''), scaled: l.scaled })); return _NativeRNLlama.default.applyLoraAdapters(this.id, loraAdapters); } async removeLoraAdapters() { return _NativeRNLlama.default.removeLoraAdapters(this.id); } async getLoadedLoraAdapters() { return _NativeRNLlama.default.getLoadedLoraAdapters(this.id); } async release() { return _NativeRNLlama.default.releaseContext(this.id); } } exports.LlamaContext = LlamaContext; async function setContextLimit(limit) { return _NativeRNLlama.default.setContextLimit(limit); } let contextIdCounter = 0; const contextIdRandom = () => process.env.NODE_ENV === 'test' ? 0 : Math.floor(Math.random() * 100000); const modelInfoSkip = [ // Large fields 'tokenizer.ggml.tokens', 'tokenizer.ggml.token_type', 'tokenizer.ggml.merges']; async function loadLlamaModelInfo(model) { let path = model; if (path.startsWith('file://')) path = path.slice(7); return _NativeRNLlama.default.modelInfo(path, modelInfoSkip); } const poolTypeMap = { // -1 is unspecified as undefined none: 0, mean: 1, cls: 2, last: 3, rank: 4 }; async function initLlama(_ref2, onProgress) { var _loraPath, _removeProgressListen2; let { model, is_model_asset: isModelAsset, pooling_type: poolingType, lora, lora_list: loraList, ...rest } = _ref2; let path = model; if (path.startsWith('file://')) path = path.slice(7); let loraPath = lora; if ((_loraPath = loraPath) !== null && _loraPath !== void 0 && _loraPath.startsWith('file://')) loraPath = loraPath.slice(7); let loraAdapters = []; if (loraList) loraAdapters = loraList.map(l => ({ path: l.path.replace(/file:\/\//, ''), scaled: l.scaled })); const contextId = contextIdCounter + contextIdRandom(); contextIdCounter += 1; let removeProgressListener = null; if (onProgress) { removeProgressListener = EventEmitter.addListener(EVENT_ON_INIT_CONTEXT_PROGRESS, evt => { if (evt.contextId !== contextId) return; onProgress(evt.progress); }); } const poolType = poolTypeMap[poolingType]; const { gpu, reasonNoGPU, model: modelDetails, androidLib } = await _NativeRNLlama.default.initContext(contextId, { model: path, is_model_asset: !!isModelAsset, use_progress_callback: !!onProgress, pooling_type: poolType, lora: loraPath, lora_list: loraAdapters, ...rest }).catch(err => { var _removeProgressListen; (_removeProgressListen = removeProgressListener) === null || _removeProgressListen === void 0 ? void 0 : _removeProgressListen.remove(); throw err; }); (_removeProgressListen2 = removeProgressListener) === null || _removeProgressListen2 === void 0 ? void 0 : _removeProgressListen2.remove(); return new LlamaContext({ contextId, gpu, reasonNoGPU, model: modelDetails, androidLib }); } async function releaseAllLlama() { return _NativeRNLlama.default.releaseAllContexts(); } //# sourceMappingURL=index.js.map