cui-llama.rn
Version:
Fork of llama.rn for ChatterUI
254 lines (251 loc) • 9.39 kB
JavaScript
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.LlamaContext = exports.GGML_TYPE = void 0;
Object.defineProperty(exports, "SchemaGrammarConverter", {
enumerable: true,
get: function () {
return _grammar.SchemaGrammarConverter;
}
});
Object.defineProperty(exports, "convertJsonSchemaToGrammar", {
enumerable: true,
get: function () {
return _grammar.convertJsonSchemaToGrammar;
}
});
exports.getCpuFeatures = getCpuFeatures;
exports.initLlama = initLlama;
exports.loadLlamaModelInfo = loadLlamaModelInfo;
exports.releaseAllLlama = releaseAllLlama;
exports.setContextLimit = setContextLimit;
var _reactNative = require("react-native");
var _NativeRNLlama = _interopRequireDefault(require("./NativeRNLlama"));
var _grammar = require("./grammar");
var _chat = require("./chat");
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
const EVENT_ON_INIT_CONTEXT_PROGRESS = '@RNLlama_onInitContextProgress';
const EVENT_ON_TOKEN = '@RNLlama_onToken';
let EventEmitter;
if (_reactNative.Platform.OS === 'ios') {
// @ts-ignore
EventEmitter = new _reactNative.NativeEventEmitter(_NativeRNLlama.default);
}
if (_reactNative.Platform.OS === 'android') {
EventEmitter = _reactNative.DeviceEventEmitter;
}
let GGML_TYPE = /*#__PURE__*/function (GGML_TYPE) {
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_F32"] = 0] = "LM_GGML_TYPE_F32";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_F16"] = 1] = "LM_GGML_TYPE_F16";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_Q4_0"] = 2] = "LM_GGML_TYPE_Q4_0";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_Q4_1"] = 3] = "LM_GGML_TYPE_Q4_1";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_Q5_0"] = 6] = "LM_GGML_TYPE_Q5_0";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_Q5_1"] = 7] = "LM_GGML_TYPE_Q5_1";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_Q8_0"] = 8] = "LM_GGML_TYPE_Q8_0";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_Q8_1"] = 9] = "LM_GGML_TYPE_Q8_1";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_Q2_K"] = 10] = "LM_GGML_TYPE_Q2_K";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_Q3_K"] = 11] = "LM_GGML_TYPE_Q3_K";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_Q4_K"] = 12] = "LM_GGML_TYPE_Q4_K";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_Q5_K"] = 13] = "LM_GGML_TYPE_Q5_K";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_Q6_K"] = 14] = "LM_GGML_TYPE_Q6_K";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_Q8_K"] = 15] = "LM_GGML_TYPE_Q8_K";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_IQ2_XXS"] = 16] = "LM_GGML_TYPE_IQ2_XXS";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_IQ2_XS"] = 17] = "LM_GGML_TYPE_IQ2_XS";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_IQ3_XXS"] = 18] = "LM_GGML_TYPE_IQ3_XXS";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_IQ1_S"] = 19] = "LM_GGML_TYPE_IQ1_S";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_IQ4_NL"] = 20] = "LM_GGML_TYPE_IQ4_NL";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_IQ3_S"] = 21] = "LM_GGML_TYPE_IQ3_S";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_IQ2_S"] = 22] = "LM_GGML_TYPE_IQ2_S";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_IQ4_XS"] = 23] = "LM_GGML_TYPE_IQ4_XS";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_I8"] = 24] = "LM_GGML_TYPE_I8";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_I16"] = 25] = "LM_GGML_TYPE_I16";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_I32"] = 26] = "LM_GGML_TYPE_I32";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_I64"] = 27] = "LM_GGML_TYPE_I64";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_F64"] = 28] = "LM_GGML_TYPE_F64";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_IQ1_M"] = 29] = "LM_GGML_TYPE_IQ1_M";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_BF16"] = 30] = "LM_GGML_TYPE_BF16";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_TQ1_0"] = 34] = "LM_GGML_TYPE_TQ1_0";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_TQ2_0"] = 35] = "LM_GGML_TYPE_TQ2_0";
GGML_TYPE[GGML_TYPE["LM_GGML_TYPE_COUNT"] = 39] = "LM_GGML_TYPE_COUNT";
return GGML_TYPE;
}({});
exports.GGML_TYPE = GGML_TYPE;
;
class LlamaContext {
gpu = false;
reasonNoGPU = '';
model = {};
constructor(_ref) {
let {
contextId,
gpu,
reasonNoGPU,
model
} = _ref;
this.id = contextId;
this.gpu = gpu;
this.reasonNoGPU = reasonNoGPU;
this.model = model;
}
/**
* Load cached prompt & completion state from a file.
*/
async loadSession(filepath) {
let path = filepath;
if (path.startsWith('file://')) path = path.slice(7);
return _NativeRNLlama.default.loadSession(this.id, path);
}
/**
* Save current cached prompt & completion state to a file.
*/
async saveSession(filepath, options) {
return _NativeRNLlama.default.saveSession(this.id, filepath, (options === null || options === void 0 ? void 0 : options.tokenSize) || -1);
}
async getFormattedChat(messages, template) {
var _this$model;
const chat = (0, _chat.formatChat)(messages);
let tmpl = (_this$model = this.model) !== null && _this$model !== void 0 && _this$model.isChatTemplateSupported ? undefined : 'chatml';
if (template) tmpl = template; // Force replace if provided
return _NativeRNLlama.default.getFormattedChat(this.id, chat, tmpl);
}
async completion(params, callback) {
let finalPrompt = params.prompt;
if (params.messages) {
// messages always win
finalPrompt = await this.getFormattedChat(params.messages, params.chatTemplate);
}
let tokenListener = callback && EventEmitter.addListener(EVENT_ON_TOKEN, evt => {
const {
contextId,
tokenResult
} = evt;
if (contextId !== this.id) return;
callback(tokenResult);
});
if (!finalPrompt) throw new Error('Prompt is required');
const promise = _NativeRNLlama.default.completion(this.id, {
...params,
prompt: finalPrompt,
emit_partial_completion: !!callback
});
return promise.then(completionResult => {
var _tokenListener;
(_tokenListener = tokenListener) === null || _tokenListener === void 0 ? void 0 : _tokenListener.remove();
tokenListener = null;
return completionResult;
}).catch(err => {
var _tokenListener2;
(_tokenListener2 = tokenListener) === null || _tokenListener2 === void 0 ? void 0 : _tokenListener2.remove();
tokenListener = null;
throw err;
});
}
stopCompletion() {
return _NativeRNLlama.default.stopCompletion(this.id);
}
tokenizeAsync(text) {
return _NativeRNLlama.default.tokenizeAsync(this.id, text);
}
tokenizeSync(text) {
return _NativeRNLlama.default.tokenizeSync(this.id, text);
}
detokenize(tokens) {
return _NativeRNLlama.default.detokenize(this.id, tokens);
}
embedding(text, params) {
return _NativeRNLlama.default.embedding(this.id, text, params || {});
}
async bench(pp, tg, pl, nr) {
const result = await _NativeRNLlama.default.bench(this.id, pp, tg, pl, nr);
const [modelDesc, modelSize, modelNParams, ppAvg, ppStd, tgAvg, tgStd] = JSON.parse(result);
return {
modelDesc,
modelSize,
modelNParams,
ppAvg,
ppStd,
tgAvg,
tgStd
};
}
async release() {
return _NativeRNLlama.default.releaseContext(this.id);
}
}
exports.LlamaContext = LlamaContext;
async function getCpuFeatures() {
return _NativeRNLlama.default.getCpuFeatures();
}
async function setContextLimit(limit) {
return _NativeRNLlama.default.setContextLimit(limit);
}
let contextIdCounter = 0;
const contextIdRandom = () => process.env.NODE_ENV === 'test' ? 0 : Math.floor(Math.random() * 100000);
const modelInfoSkip = [
// Large fields
'tokenizer.ggml.tokens', 'tokenizer.ggml.token_type', 'tokenizer.ggml.merges'];
async function loadLlamaModelInfo(model) {
let path = model;
if (path.startsWith('file://')) path = path.slice(7);
return _NativeRNLlama.default.modelInfo(path, modelInfoSkip);
}
const poolTypeMap = {
// -1 is unspecified as undefined
none: 0,
mean: 1,
cls: 2,
last: 3,
rank: 4
};
async function initLlama(_ref2, onProgress) {
var _loraPath, _removeProgressListen2;
let {
model,
is_model_asset: isModelAsset,
pooling_type: poolingType,
lora,
...rest
} = _ref2;
let path = model;
if (path.startsWith('file://')) path = path.slice(7);
let loraPath = lora;
if ((_loraPath = loraPath) !== null && _loraPath !== void 0 && _loraPath.startsWith('file://')) loraPath = loraPath.slice(7);
const contextId = contextIdCounter + contextIdRandom();
contextIdCounter += 1;
let removeProgressListener = null;
if (onProgress) {
removeProgressListener = EventEmitter.addListener(EVENT_ON_INIT_CONTEXT_PROGRESS, evt => {
if (evt.contextId !== contextId) return;
onProgress(evt.progress);
});
}
const poolType = poolTypeMap[poolingType];
const {
gpu,
reasonNoGPU,
model: modelDetails
} = await _NativeRNLlama.default.initContext(contextId, {
model: path,
is_model_asset: !!isModelAsset,
use_progress_callback: !!onProgress,
pooling_type: poolType,
lora: loraPath,
...rest
}).catch(err => {
var _removeProgressListen;
(_removeProgressListen = removeProgressListener) === null || _removeProgressListen === void 0 ? void 0 : _removeProgressListen.remove();
throw err;
});
(_removeProgressListen2 = removeProgressListener) === null || _removeProgressListen2 === void 0 ? void 0 : _removeProgressListen2.remove();
return new LlamaContext({
contextId,
gpu,
reasonNoGPU,
model: modelDetails
});
}
async function releaseAllLlama() {
return _NativeRNLlama.default.releaseAllContexts();
}
//# sourceMappingURL=index.js.map
;