@langchain/community
Version:
Third-party integrations for LangChain.js
109 lines (108 loc) • 3.9 kB
JavaScript
Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
const require_runtime = require("../_virtual/_rolldown/runtime.cjs");
const require_llama_cpp = require("../utils/llama_cpp.cjs");
let _langchain_core_outputs = require("@langchain/core/outputs");
let node_llama_cpp = require("node-llama-cpp");
let _langchain_core_language_models_llms = require("@langchain/core/language_models/llms");
//#region src/llms/llama_cpp.ts
var llama_cpp_exports = /* @__PURE__ */ require_runtime.__exportAll({ LlamaCpp: () => LlamaCpp });
/**
* To use this model you need to have the `node-llama-cpp` module installed.
* This can be installed using `npm install -S node-llama-cpp` and the minimum
* version supported in version 2.0.0.
* This also requires that have a locally built version of Llama3 installed.
*/
var LlamaCpp = class LlamaCpp extends _langchain_core_language_models_llms.LLM {
lc_serializable = true;
static inputs;
maxTokens;
temperature;
topK;
topP;
trimWhitespaceSuffix;
_model;
_context;
_session;
_jsonSchema;
_gbnf;
static lc_name() {
return "LlamaCpp";
}
constructor(inputs) {
super(inputs);
this.maxTokens = inputs?.maxTokens;
this.temperature = inputs?.temperature;
this.topK = inputs?.topK;
this.topP = inputs?.topP;
this.trimWhitespaceSuffix = inputs?.trimWhitespaceSuffix;
}
/**
* Initializes the llama_cpp model for usage.
* @param inputs - the inputs passed onto the model.
* @returns A Promise that resolves to the LlamaCpp type class.
*/
static async initialize(inputs) {
const instance = new LlamaCpp(inputs);
const llama = await (0, node_llama_cpp.getLlama)();
instance._model = await require_llama_cpp.createLlamaModel(inputs, llama);
instance._context = await require_llama_cpp.createLlamaContext(instance._model, inputs);
instance._jsonSchema = await require_llama_cpp.createLlamaJsonSchemaGrammar(inputs?.jsonSchema, llama);
instance._gbnf = await require_llama_cpp.createCustomGrammar(inputs?.gbnf, llama);
instance._session = require_llama_cpp.createLlamaSession(instance._context);
return instance;
}
_llmType() {
return "llama_cpp";
}
/** @ignore */
async _call(prompt, options) {
try {
let promptGrammer;
if (this._jsonSchema !== void 0) promptGrammer = this._jsonSchema;
else if (this._gbnf !== void 0) promptGrammer = this._gbnf;
else promptGrammer = void 0;
const promptOptions = {
grammar: promptGrammer,
onToken: options?.onToken,
maxTokens: this?.maxTokens,
temperature: this?.temperature,
topK: this?.topK,
topP: this?.topP,
trimWhitespaceSuffix: this?.trimWhitespaceSuffix
};
const completion = await this._session.prompt(prompt, promptOptions);
if (this._jsonSchema !== void 0 && completion !== void 0) return this._jsonSchema.parse(completion);
return completion;
} catch {
throw new Error("Error getting prompt completion.");
}
}
async *_streamResponseChunks(prompt, _options, runManager) {
const promptOptions = {
temperature: this?.temperature,
maxTokens: this?.maxTokens,
topK: this?.topK,
topP: this?.topP
};
if (this._context.sequencesLeft === 0) this._context = await require_llama_cpp.createLlamaContext(this._model, LlamaCpp.inputs);
const sequence = this._context.getSequence();
const tokens = this._model.tokenize(prompt);
const stream = await this.caller.call(async () => sequence.evaluate(tokens, promptOptions));
for await (const chunk of stream) {
yield new _langchain_core_outputs.GenerationChunk({
text: this._model.detokenize([chunk]),
generationInfo: {}
});
await runManager?.handleLLMNewToken(this._model.detokenize([chunk]) ?? "");
}
}
};
//#endregion
exports.LlamaCpp = LlamaCpp;
Object.defineProperty(exports, "llama_cpp_exports", {
enumerable: true,
get: function() {
return llama_cpp_exports;
}
});
//# sourceMappingURL=llama_cpp.cjs.map