UNPKG

@langchain/community

Version:
1 lines 15.4 kB
{"version":3,"file":"llama_cpp.cjs","names":["SimpleChatModel","createLlamaModel","createLlamaContext","ChatGenerationChunk","AIMessageChunk","LlamaChatSession","ChatMessage"],"sources":["../../src/chat_models/llama_cpp.ts"],"sourcesContent":["import {\n LlamaModel,\n LlamaContext,\n LlamaChatSession,\n type Token,\n ChatUserMessage,\n ChatModelResponse,\n ChatHistoryItem,\n getLlama,\n} from \"node-llama-cpp\";\n\nimport {\n SimpleChatModel,\n type BaseChatModelParams,\n} from \"@langchain/core/language_models/chat_models\";\nimport type { BaseLanguageModelCallOptions } from \"@langchain/core/language_models/base\";\nimport { CallbackManagerForLLMRun } from \"@langchain/core/callbacks/manager\";\nimport {\n BaseMessage,\n AIMessageChunk,\n ChatMessage,\n} from \"@langchain/core/messages\";\nimport { ChatGenerationChunk } from \"@langchain/core/outputs\";\nimport {\n LlamaBaseCppInputs,\n createLlamaModel,\n createLlamaContext,\n} from \"../utils/llama_cpp.js\";\n\n/**\n * Note that the modelPath is the only required parameter. For testing you\n * can set this in the environment variable `LLAMA_PATH`.\n */\nexport interface LlamaCppInputs\n extends LlamaBaseCppInputs, BaseChatModelParams {}\n\nexport interface LlamaCppCallOptions extends BaseLanguageModelCallOptions {\n /** The maximum number of tokens the response should contain. */\n maxTokens?: number;\n /** A function called when matching the provided token array */\n onToken?: (tokens: number[]) => void;\n}\n\n/**\n * To use this model you need to have the `node-llama-cpp` module installed.\n * This can be installed using `npm install -S node-llama-cpp` and the minimum\n * version supported in version 2.0.0.\n * This also requires that have a locally built version of Llama3 installed.\n * @example\n * ```typescript\n * // Initialize the ChatLlamaCpp model with the path to the model binary file.\n * const model = await ChatLlamaCpp.initialize({\n * modelPath: \"/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin\",\n * temperature: 0.5,\n * });\n *\n * // Call the model with a message and await the response.\n * const response = await model.invoke([\n * new HumanMessage({ content: \"My name is John.\" }),\n * ]);\n *\n * // Log the response to the console.\n * console.log({ response });\n *\n * ```\n */\nexport class ChatLlamaCpp extends SimpleChatModel<LlamaCppCallOptions> {\n static inputs: LlamaCppInputs;\n\n maxTokens?: number;\n\n temperature?: number;\n\n topK?: number;\n\n topP?: number;\n\n trimWhitespaceSuffix?: boolean;\n\n _model: LlamaModel;\n\n _context: LlamaContext;\n\n _session: LlamaChatSession | null;\n\n lc_serializable = true;\n\n static lc_name() {\n return \"ChatLlamaCpp\";\n }\n\n public constructor(inputs: LlamaCppInputs) {\n super(inputs);\n this.maxTokens = inputs?.maxTokens;\n this.temperature = inputs?.temperature;\n this.topK = inputs?.topK;\n this.topP = inputs?.topP;\n this.trimWhitespaceSuffix = inputs?.trimWhitespaceSuffix;\n this._session = null;\n }\n\n /**\n * Initializes the llama_cpp model for usage in the chat models wrapper.\n * @param inputs - the inputs passed onto the model.\n * @returns A Promise that resolves to the ChatLlamaCpp type class.\n */\n public static async initialize(\n inputs: LlamaBaseCppInputs\n ): Promise<ChatLlamaCpp> {\n const instance = new ChatLlamaCpp(inputs);\n const llama = await getLlama();\n\n instance._model = await createLlamaModel(inputs, llama);\n instance._context = await createLlamaContext(instance._model, inputs);\n\n return instance;\n }\n\n _llmType() {\n return \"llama_cpp\";\n }\n\n /** @ignore */\n _combineLLMOutput() {\n return {};\n }\n\n invocationParams() {\n return {\n maxTokens: this.maxTokens,\n temperature: this.temperature,\n topK: this.topK,\n topP: this.topP,\n trimWhitespaceSuffix: this.trimWhitespaceSuffix,\n };\n }\n\n /** @ignore */\n async _call(\n messages: BaseMessage[],\n options: this[\"ParsedCallOptions\"],\n runManager?: CallbackManagerForLLMRun\n ): Promise<string> {\n let prompt = \"\";\n\n if (messages.length > 1) {\n // We need to build a new _session\n prompt = this._buildSession(messages);\n } else if (!this._session) {\n prompt = this._buildSession(messages);\n } else {\n if (typeof messages[0].content !== \"string\") {\n throw new Error(\n \"ChatLlamaCpp does not support non-string message content in sessions.\"\n );\n }\n // If we already have a session then we should just have a single prompt\n prompt = messages[0].content;\n }\n\n try {\n const promptOptions = {\n signal: options.signal,\n onToken: async (tokens: number[]) => {\n options.onToken?.(tokens);\n await runManager?.handleLLMNewToken(\n this._model.detokenize(tokens.map((num) => num as Token))\n );\n },\n maxTokens: this?.maxTokens,\n temperature: this?.temperature,\n topK: this?.topK,\n topP: this?.topP,\n trimWhitespaceSuffix: this?.trimWhitespaceSuffix,\n };\n // @ts-expect-error - TS2531: Object is possibly 'null'.\n const completion = await this._session.prompt(prompt, promptOptions);\n return completion;\n } catch (e) {\n if (typeof e === \"object\") {\n const error = e as Error;\n if (error.message === \"AbortError\") {\n throw error;\n }\n }\n throw new Error(\"Error getting prompt completion.\");\n }\n }\n\n async *_streamResponseChunks(\n input: BaseMessage[],\n _options: this[\"ParsedCallOptions\"],\n runManager?: CallbackManagerForLLMRun\n ): AsyncGenerator<ChatGenerationChunk> {\n const promptOptions = {\n temperature: this?.temperature,\n topK: this?.topK,\n topP: this?.topP,\n };\n\n const prompt = this._buildPrompt(input);\n const sequence = this._context.getSequence();\n\n const stream = await this.caller.call(async () =>\n sequence.evaluate(this._model.tokenize(prompt), promptOptions)\n );\n\n for await (const chunk of stream) {\n yield new ChatGenerationChunk({\n text: this._model.detokenize([chunk]),\n message: new AIMessageChunk({\n content: this._model.detokenize([chunk]),\n }),\n generationInfo: {},\n });\n await runManager?.handleLLMNewToken(\n this._model.detokenize([chunk]) ?? \"\"\n );\n }\n }\n\n // This constructs a new session if we need to adding in any sys messages or previous chats\n protected _buildSession(messages: BaseMessage[]): string {\n let prompt = \"\";\n let sysMessage = \"\";\n let noSystemMessages: BaseMessage[] = [];\n let interactions: ChatHistoryItem[] = [];\n\n // Let's see if we have a system message\n if (messages.findIndex((msg) => msg.getType() === \"system\") !== -1) {\n const sysMessages = messages.filter(\n (message) => message.getType() === \"system\"\n );\n\n const systemMessageContent = sysMessages[sysMessages.length - 1].content;\n\n if (typeof systemMessageContent !== \"string\") {\n throw new Error(\n \"ChatLlamaCpp does not support non-string message content in sessions.\"\n );\n }\n // Only use the last provided system message\n sysMessage = systemMessageContent;\n\n // Now filter out the system messages\n noSystemMessages = messages.filter(\n (message) => message.getType() !== \"system\"\n );\n } else {\n noSystemMessages = messages;\n }\n\n // Lets see if we just have a prompt left or are their previous interactions?\n if (noSystemMessages.length > 1) {\n // Is the last message a prompt?\n if (noSystemMessages[noSystemMessages.length - 1].getType() === \"human\") {\n const finalMessageContent =\n noSystemMessages[noSystemMessages.length - 1].content;\n if (typeof finalMessageContent !== \"string\") {\n throw new Error(\n \"ChatLlamaCpp does not support non-string message content in sessions.\"\n );\n }\n prompt = finalMessageContent;\n interactions = this._convertMessagesToInteractions(\n noSystemMessages.slice(0, noSystemMessages.length - 1)\n );\n } else {\n interactions = this._convertMessagesToInteractions(noSystemMessages);\n }\n } else {\n if (typeof noSystemMessages[0].content !== \"string\") {\n throw new Error(\n \"ChatLlamaCpp does not support non-string message content in sessions.\"\n );\n }\n // If there was only a single message we assume it's a prompt\n prompt = noSystemMessages[0].content;\n }\n\n // Now lets construct a session according to what we got\n if (sysMessage !== \"\" && interactions.length > 0) {\n this._session = new LlamaChatSession({\n contextSequence: this._context.getSequence(),\n systemPrompt: sysMessage,\n });\n this._session.setChatHistory(interactions);\n } else if (sysMessage !== \"\" && interactions.length === 0) {\n this._session = new LlamaChatSession({\n contextSequence: this._context.getSequence(),\n systemPrompt: sysMessage,\n });\n } else if (sysMessage === \"\" && interactions.length > 0) {\n this._session = new LlamaChatSession({\n contextSequence: this._context.getSequence(),\n });\n this._session.setChatHistory(interactions);\n } else {\n this._session = new LlamaChatSession({\n contextSequence: this._context.getSequence(),\n });\n }\n\n return prompt;\n }\n\n // This builds a an array of interactions\n protected _convertMessagesToInteractions(\n messages: BaseMessage[]\n ): ChatHistoryItem[] {\n const result: ChatHistoryItem[] = [];\n\n for (let i = 0; i < messages.length; i += 2) {\n if (i + 1 < messages.length) {\n const prompt = messages[i].content;\n const response = messages[i + 1].content;\n if (typeof prompt !== \"string\" || typeof response !== \"string\") {\n throw new Error(\n \"ChatLlamaCpp does not support non-string message content.\"\n );\n }\n const llamaPrompt: ChatUserMessage = { type: \"user\", text: prompt };\n const llamaResponse: ChatModelResponse = {\n type: \"model\",\n response: [response],\n };\n result.push(llamaPrompt);\n result.push(llamaResponse);\n }\n }\n\n return result;\n }\n\n protected _buildPrompt(input: BaseMessage[]): string {\n const prompt = input\n .map((message) => {\n let messageText;\n if (message.getType() === \"human\") {\n messageText = `[INST] ${message.content} [/INST]`;\n } else if (message.getType() === \"ai\") {\n messageText = message.content;\n } else if (message.getType() === \"system\") {\n messageText = `<<SYS>> ${message.content} <</SYS>>`;\n } else if (ChatMessage.isInstance(message)) {\n messageText = `\\n\\n${message.role[0].toUpperCase()}${message.role.slice(\n 1\n )}: ${message.content}`;\n } else {\n console.warn(\n `Unsupported message type passed to llama_cpp: \"${message.getType()}\"`\n );\n messageText = \"\";\n }\n return messageText;\n })\n .join(\"\\n\");\n\n return prompt;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAkEA,IAAa,eAAb,MAAa,qBAAqBA,4CAAAA,gBAAqC;CACrE,OAAO;CAEP;CAEA;CAEA;CAEA;CAEA;CAEA;CAEA;CAEA;CAEA,kBAAkB;CAElB,OAAO,UAAU;AACf,SAAO;;CAGT,YAAmB,QAAwB;AACzC,QAAM,OAAO;AACb,OAAK,YAAY,QAAQ;AACzB,OAAK,cAAc,QAAQ;AAC3B,OAAK,OAAO,QAAQ;AACpB,OAAK,OAAO,QAAQ;AACpB,OAAK,uBAAuB,QAAQ;AACpC,OAAK,WAAW;;;;;;;CAQlB,aAAoB,WAClB,QACuB;EACvB,MAAM,WAAW,IAAI,aAAa,OAAO;AAGzC,WAAS,SAAS,MAAMC,kBAAAA,iBAAiB,QAF3B,OAAA,GAAA,eAAA,WAAgB,CAEyB;AACvD,WAAS,WAAW,MAAMC,kBAAAA,mBAAmB,SAAS,QAAQ,OAAO;AAErE,SAAO;;CAGT,WAAW;AACT,SAAO;;;CAIT,oBAAoB;AAClB,SAAO,EAAE;;CAGX,mBAAmB;AACjB,SAAO;GACL,WAAW,KAAK;GAChB,aAAa,KAAK;GAClB,MAAM,KAAK;GACX,MAAM,KAAK;GACX,sBAAsB,KAAK;GAC5B;;;CAIH,MAAM,MACJ,UACA,SACA,YACiB;EACjB,IAAI,SAAS;AAEb,MAAI,SAAS,SAAS,EAEpB,UAAS,KAAK,cAAc,SAAS;WAC5B,CAAC,KAAK,SACf,UAAS,KAAK,cAAc,SAAS;OAChC;AACL,OAAI,OAAO,SAAS,GAAG,YAAY,SACjC,OAAM,IAAI,MACR,wEACD;AAGH,YAAS,SAAS,GAAG;;AAGvB,MAAI;GACF,MAAM,gBAAgB;IACpB,QAAQ,QAAQ;IAChB,SAAS,OAAO,WAAqB;AACnC,aAAQ,UAAU,OAAO;AACzB,WAAM,YAAY,kBAChB,KAAK,OAAO,WAAW,OAAO,KAAK,QAAQ,IAAa,CAAC,CAC1D;;IAEH,WAAW,MAAM;IACjB,aAAa,MAAM;IACnB,MAAM,MAAM;IACZ,MAAM,MAAM;IACZ,sBAAsB,MAAM;IAC7B;AAGD,UADmB,MAAM,KAAK,SAAS,OAAO,QAAQ,cAAc;WAE7D,GAAG;AACV,OAAI,OAAO,MAAM,UAAU;IACzB,MAAM,QAAQ;AACd,QAAI,MAAM,YAAY,aACpB,OAAM;;AAGV,SAAM,IAAI,MAAM,mCAAmC;;;CAIvD,OAAO,sBACL,OACA,UACA,YACqC;EACrC,MAAM,gBAAgB;GACpB,aAAa,MAAM;GACnB,MAAM,MAAM;GACZ,MAAM,MAAM;GACb;EAED,MAAM,SAAS,KAAK,aAAa,MAAM;EACvC,MAAM,WAAW,KAAK,SAAS,aAAa;EAE5C,MAAM,SAAS,MAAM,KAAK,OAAO,KAAK,YACpC,SAAS,SAAS,KAAK,OAAO,SAAS,OAAO,EAAE,cAAc,CAC/D;AAED,aAAW,MAAM,SAAS,QAAQ;AAChC,SAAM,IAAIC,wBAAAA,oBAAoB;IAC5B,MAAM,KAAK,OAAO,WAAW,CAAC,MAAM,CAAC;IACrC,SAAS,IAAIC,yBAAAA,eAAe,EAC1B,SAAS,KAAK,OAAO,WAAW,CAAC,MAAM,CAAC,EACzC,CAAC;IACF,gBAAgB,EAAE;IACnB,CAAC;AACF,SAAM,YAAY,kBAChB,KAAK,OAAO,WAAW,CAAC,MAAM,CAAC,IAAI,GACpC;;;CAKL,cAAwB,UAAiC;EACvD,IAAI,SAAS;EACb,IAAI,aAAa;EACjB,IAAI,mBAAkC,EAAE;EACxC,IAAI,eAAkC,EAAE;AAGxC,MAAI,SAAS,WAAW,QAAQ,IAAI,SAAS,KAAK,SAAS,KAAK,IAAI;GAClE,MAAM,cAAc,SAAS,QAC1B,YAAY,QAAQ,SAAS,KAAK,SACpC;GAED,MAAM,uBAAuB,YAAY,YAAY,SAAS,GAAG;AAEjE,OAAI,OAAO,yBAAyB,SAClC,OAAM,IAAI,MACR,wEACD;AAGH,gBAAa;AAGb,sBAAmB,SAAS,QACzB,YAAY,QAAQ,SAAS,KAAK,SACpC;QAED,oBAAmB;AAIrB,MAAI,iBAAiB,SAAS,EAE5B,KAAI,iBAAiB,iBAAiB,SAAS,GAAG,SAAS,KAAK,SAAS;GACvE,MAAM,sBACJ,iBAAiB,iBAAiB,SAAS,GAAG;AAChD,OAAI,OAAO,wBAAwB,SACjC,OAAM,IAAI,MACR,wEACD;AAEH,YAAS;AACT,kBAAe,KAAK,+BAClB,iBAAiB,MAAM,GAAG,iBAAiB,SAAS,EAAE,CACvD;QAED,gBAAe,KAAK,+BAA+B,iBAAiB;OAEjE;AACL,OAAI,OAAO,iBAAiB,GAAG,YAAY,SACzC,OAAM,IAAI,MACR,wEACD;AAGH,YAAS,iBAAiB,GAAG;;AAI/B,MAAI,eAAe,MAAM,aAAa,SAAS,GAAG;AAChD,QAAK,WAAW,IAAIC,eAAAA,iBAAiB;IACnC,iBAAiB,KAAK,SAAS,aAAa;IAC5C,cAAc;IACf,CAAC;AACF,QAAK,SAAS,eAAe,aAAa;aACjC,eAAe,MAAM,aAAa,WAAW,EACtD,MAAK,WAAW,IAAIA,eAAAA,iBAAiB;GACnC,iBAAiB,KAAK,SAAS,aAAa;GAC5C,cAAc;GACf,CAAC;WACO,eAAe,MAAM,aAAa,SAAS,GAAG;AACvD,QAAK,WAAW,IAAIA,eAAAA,iBAAiB,EACnC,iBAAiB,KAAK,SAAS,aAAa,EAC7C,CAAC;AACF,QAAK,SAAS,eAAe,aAAa;QAE1C,MAAK,WAAW,IAAIA,eAAAA,iBAAiB,EACnC,iBAAiB,KAAK,SAAS,aAAa,EAC7C,CAAC;AAGJ,SAAO;;CAIT,+BACE,UACmB;EACnB,MAAM,SAA4B,EAAE;AAEpC,OAAK,IAAI,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK,EACxC,KAAI,IAAI,IAAI,SAAS,QAAQ;GAC3B,MAAM,SAAS,SAAS,GAAG;GAC3B,MAAM,WAAW,SAAS,IAAI,GAAG;AACjC,OAAI,OAAO,WAAW,YAAY,OAAO,aAAa,SACpD,OAAM,IAAI,MACR,4DACD;GAEH,MAAM,cAA+B;IAAE,MAAM;IAAQ,MAAM;IAAQ;GACnE,MAAM,gBAAmC;IACvC,MAAM;IACN,UAAU,CAAC,SAAS;IACrB;AACD,UAAO,KAAK,YAAY;AACxB,UAAO,KAAK,cAAc;;AAI9B,SAAO;;CAGT,aAAuB,OAA8B;AAwBnD,SAvBe,MACZ,KAAK,YAAY;GAChB,IAAI;AACJ,OAAI,QAAQ,SAAS,KAAK,QACxB,eAAc,UAAU,QAAQ,QAAQ;YAC/B,QAAQ,SAAS,KAAK,KAC/B,eAAc,QAAQ;YACb,QAAQ,SAAS,KAAK,SAC/B,eAAc,WAAW,QAAQ,QAAQ;YAChCC,yBAAAA,YAAY,WAAW,QAAQ,CACxC,eAAc,OAAO,QAAQ,KAAK,GAAG,aAAa,GAAG,QAAQ,KAAK,MAChE,EACD,CAAC,IAAI,QAAQ;QACT;AACL,YAAQ,KACN,kDAAkD,QAAQ,SAAS,CAAC,GACrE;AACD,kBAAc;;AAEhB,UAAO;IACP,CACD,KAAK,KAAK"}