UNPKG

@rexdug7005/nvidia-llama4

Version:

Integración de NVIDIA Llama4 con LangChain.js, con soporte para Tools Agent de n8n

github.com/Alex66688/nvidia-llama4

Alex66688/nvidia-llama4

194 lines (193 loc) • 7.46 kB

JavaScript

import { BaseLLM, } from "@langchain/core/language_models/llms"; import { GenerationChunk } from "@langchain/core/outputs"; import axios from "axios"; import { convertOptionsToNvidiaParams, } from "./utils.js"; /** * Implementación del modelo de lenguaje NVIDIA Llama4 para LangChain */ export class NvidiaLlama4 extends BaseLLM { static lc_name() { return "NvidiaLlama4"; } constructor(fields) { super(fields); Object.defineProperty(this, "apiKey", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "baseUrl", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "modelName", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "defaultOptions", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "streaming", { enumerable: true, configurable: true, writable: true, value: void 0 }); this.apiKey = fields.apiKey; this.baseUrl = fields.baseUrl || "https://integrate.api.nvidia.com/v1/chat/completions"; this.modelName = fields.model || "meta/llama-4-maverick-17b-128e-instruct"; this.streaming = fields.streaming ?? false; // Extraer opciones predeterminadas eliminando las propiedades que no son opciones del modelo // eslint-disable-next-line @typescript-eslint/no-unused-vars const { apiKey, baseUrl, model, streaming, ...rest } = fields; this.defaultOptions = rest; } _llmType() { return "nvidia-llama4"; } /** * Obtiene los parámetros para la llamada a la API */ getParams(prompt, options, streaming = false) { // Convertir las opciones a formato NVIDIA const baseOptions = convertOptionsToNvidiaParams({ ...this.defaultOptions, ...options, model: this.modelName, }); // Construir el payload para la API (formato de chat) const payload = { ...baseOptions, messages: [{ role: "user", content: prompt }], stream: streaming, }; // Agregar imágenes si existen (para capacidades multimodales) if (options.images && options.images.length > 0) { payload.images = options.images; } return payload; } /** * Genera una respuesta sincrónica (no streaming) */ async _generate(prompts, options) { const requestOptions = { headers: { "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}`, Accept: "application/json", }, }; const generations = await Promise.all(prompts.map(async (prompt) => { const params = this.getParams(prompt, options, false); try { const response = await axios.post(this.baseUrl, params, requestOptions); const responseData = response.data; // En el formato de chat/completions, el texto está en choices[0].message.content const text = responseData.choices?.[0]?.message?.content || ""; return [ { text, generationInfo: { finishReason: responseData.choices?.[0]?.finish_reason, tokenUsage: responseData.usage, }, }, ]; } catch (error) { throw new Error(`Error al llamar a la API de NVIDIA Llama4: ${String(error)}`); } })); return { generations, }; } /** * Procesa la respuesta de streaming de la API */ async *_streamResponseChunks(prompt, options, runManager) { const requestOptions = { headers: { "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}`, Accept: "text/event-stream", }, responseType: "stream", }; const params = this.getParams(prompt, options, true); try { const response = await axios.post(this.baseUrl, params, requestOptions); const stream = response.data; // Un buffer para acumular los datos del stream let buffer = ""; for await (const chunk of stream) { const chunkText = Buffer.from(chunk).toString("utf-8"); buffer += chunkText; // Procesar líneas completas while (buffer.includes("\n")) { const newlineIndex = buffer.indexOf("\n"); const line = buffer.substring(0, newlineIndex).trim(); buffer = buffer.substring(newlineIndex + 1); if (line.startsWith("data: ")) { const data = line.substring(6).trim(); // Fin del stream if (data === "[DONE]") { return; } try { const parsedData = JSON.parse(data); // En el formato de chat/completions, el contenido está en choices[0].delta.content const text = parsedData.choices?.[0]?.delta?.content || ""; if (text) { const chunk = new GenerationChunk({ text, generationInfo: { finishReason: parsedData.choices?.[0]?.finish_reason, }, }); yield chunk; // Notificar al manager de callbacks si existe if (runManager) { await runManager.handleLLMNewToken(text); } } } catch (error) { // Ignorar líneas no válidas continue; } } } } } catch (error) { throw new Error(`Error al procesar el stream de NVIDIA Llama4: ${String(error)}`); } } /** * Implementación del método _call requerido para LLMs */ async _call(prompt, options) { if (this.streaming) { let responseText = ""; for await (const chunk of this._streamResponseChunks(prompt, options)) { if (chunk && chunk.text) { responseText += chunk.text; } } return responseText; } const result = await this._generate([prompt], options); return result.generations[0]?.[0]?.text || ""; } }