@rexdug7005/nvidia-llama4
Version:
Integración de NVIDIA Llama4 con LangChain.js, con soporte para Tools Agent de n8n
194 lines (193 loc) • 7.46 kB
JavaScript
import { BaseLLM, } from "@langchain/core/language_models/llms";
import { GenerationChunk } from "@langchain/core/outputs";
import axios from "axios";
import { convertOptionsToNvidiaParams, } from "./utils.js";
/**
* Implementación del modelo de lenguaje NVIDIA Llama4 para LangChain
*/
export class NvidiaLlama4 extends BaseLLM {
static lc_name() {
return "NvidiaLlama4";
}
constructor(fields) {
super(fields);
Object.defineProperty(this, "apiKey", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "baseUrl", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "modelName", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "defaultOptions", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "streaming", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
this.apiKey = fields.apiKey;
this.baseUrl =
fields.baseUrl || "https://integrate.api.nvidia.com/v1/chat/completions";
this.modelName = fields.model || "meta/llama-4-maverick-17b-128e-instruct";
this.streaming = fields.streaming ?? false;
// Extraer opciones predeterminadas eliminando las propiedades que no son opciones del modelo
// eslint-disable-next-line @typescript-eslint/no-unused-vars
const { apiKey, baseUrl, model, streaming, ...rest } = fields;
this.defaultOptions = rest;
}
_llmType() {
return "nvidia-llama4";
}
/**
* Obtiene los parámetros para la llamada a la API
*/
getParams(prompt, options, streaming = false) {
// Convertir las opciones a formato NVIDIA
const baseOptions = convertOptionsToNvidiaParams({
...this.defaultOptions,
...options,
model: this.modelName,
});
// Construir el payload para la API (formato de chat)
const payload = {
...baseOptions,
messages: [{ role: "user", content: prompt }],
stream: streaming,
};
// Agregar imágenes si existen (para capacidades multimodales)
if (options.images && options.images.length > 0) {
payload.images = options.images;
}
return payload;
}
/**
* Genera una respuesta sincrónica (no streaming)
*/
async _generate(prompts, options) {
const requestOptions = {
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
Accept: "application/json",
},
};
const generations = await Promise.all(prompts.map(async (prompt) => {
const params = this.getParams(prompt, options, false);
try {
const response = await axios.post(this.baseUrl, params, requestOptions);
const responseData = response.data;
// En el formato de chat/completions, el texto está en choices[0].message.content
const text = responseData.choices?.[0]?.message?.content || "";
return [
{
text,
generationInfo: {
finishReason: responseData.choices?.[0]?.finish_reason,
tokenUsage: responseData.usage,
},
},
];
}
catch (error) {
throw new Error(`Error al llamar a la API de NVIDIA Llama4: ${String(error)}`);
}
}));
return {
generations,
};
}
/**
* Procesa la respuesta de streaming de la API
*/
async *_streamResponseChunks(prompt, options, runManager) {
const requestOptions = {
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
Accept: "text/event-stream",
},
responseType: "stream",
};
const params = this.getParams(prompt, options, true);
try {
const response = await axios.post(this.baseUrl, params, requestOptions);
const stream = response.data;
// Un buffer para acumular los datos del stream
let buffer = "";
for await (const chunk of stream) {
const chunkText = Buffer.from(chunk).toString("utf-8");
buffer += chunkText;
// Procesar líneas completas
while (buffer.includes("\n")) {
const newlineIndex = buffer.indexOf("\n");
const line = buffer.substring(0, newlineIndex).trim();
buffer = buffer.substring(newlineIndex + 1);
if (line.startsWith("data: ")) {
const data = line.substring(6).trim();
// Fin del stream
if (data === "[DONE]") {
return;
}
try {
const parsedData = JSON.parse(data);
// En el formato de chat/completions, el contenido está en choices[0].delta.content
const text = parsedData.choices?.[0]?.delta?.content || "";
if (text) {
const chunk = new GenerationChunk({
text,
generationInfo: {
finishReason: parsedData.choices?.[0]?.finish_reason,
},
});
yield chunk;
// Notificar al manager de callbacks si existe
if (runManager) {
await runManager.handleLLMNewToken(text);
}
}
}
catch (error) {
// Ignorar líneas no válidas
continue;
}
}
}
}
}
catch (error) {
throw new Error(`Error al procesar el stream de NVIDIA Llama4: ${String(error)}`);
}
}
/**
* Implementación del método _call requerido para LLMs
*/
async _call(prompt, options) {
if (this.streaming) {
let responseText = "";
for await (const chunk of this._streamResponseChunks(prompt, options)) {
if (chunk && chunk.text) {
responseText += chunk.text;
}
}
return responseText;
}
const result = await this._generate([prompt], options);
return result.generations[0]?.[0]?.text || "";
}
}