UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

127 lines (126 loc) 6.04 kB
import { AIProviderFactory } from "../core/factory.js"; import { PromptBuilder } from "./prompts.js"; import { logger } from "../utils/logger.js"; import { SpanSerializer, SpanType, SpanStatus, getMetricsAggregator, } from "../observability/index.js"; import { withSpan } from "../telemetry/withSpan.js"; import { tracers } from "../telemetry/tracers.js"; /** * Implements a RAGAS-style evaluator that uses a "judge" LLM to score the * quality of an AI response based on rich, contextual information. */ export class RAGASEvaluator { evaluationModel; providerName; threshold; promptBuilder; promptGenerator; constructor(evaluationModel, providerName, threshold, promptGenerator) { this.evaluationModel = evaluationModel || process.env.NEUROLINK_RAGAS_EVALUATION_MODEL || "gemini-1.5-flash"; this.providerName = providerName || process.env.NEUROLINK_RAGAS_EVALUATION_PROVIDER || "vertex"; this.threshold = threshold || Number(process.env.NEUROLINK_EVALUATION_THRESHOLD) || 7; this.promptBuilder = new PromptBuilder(); this.promptGenerator = promptGenerator; } /** * Evaluates an AI-generated response using a model-based approach. * * @param context The rich, contextual information for the evaluation. * @returns A promise that resolves to a detailed `EvaluationResult`. */ async evaluate(context) { return withSpan({ name: "neurolink.evaluation.ragas", tracer: tracers.sdk, attributes: { "evaluation.provider": this.providerName, "evaluation.model": this.evaluationModel, }, }, async (otelSpan) => { const span = SpanSerializer.createSpan(SpanType.EVALUATION, "evaluation.ragas", { "evaluation.dimension": "relevance|accuracy|completeness", "ai.provider": this.providerName, "ai.model": this.evaluationModel, }); const startTime = Date.now(); try { const prompt = this.promptBuilder.buildEvaluationPrompt(context, this.promptGenerator); const provider = await AIProviderFactory.createProvider(this.providerName, this.evaluationModel); const result = await provider.generate({ input: { text: prompt }, }); if (!result) { throw new Error("Evaluation generation failed to return a result."); } const rawEvaluationResponse = result.content; const parsedResult = this.parseEvaluationResponse(rawEvaluationResponse); const evaluationTime = Date.now() - startTime; const finalResult = { ...parsedResult, isPassing: parsedResult.finalScore >= this.threshold, // This will be recalculated, but is needed for the type evaluationModel: this.evaluationModel, evaluationTime, attemptNumber: context.attemptNumber, rawEvaluationResponse, }; // Write evaluation scores to OTel span for Langfuse visibility otelSpan.setAttribute("evaluation.relevance_score", finalResult.relevanceScore); otelSpan.setAttribute("evaluation.accuracy_score", finalResult.accuracyScore); otelSpan.setAttribute("evaluation.completeness_score", finalResult.completenessScore); otelSpan.setAttribute("evaluation.final_score", finalResult.finalScore); otelSpan.setAttribute("evaluation.is_passing", finalResult.isPassing); span.durationMs = Date.now() - startTime; const endedSpan = SpanSerializer.endSpan(span, SpanStatus.OK); getMetricsAggregator().recordSpan(endedSpan); return finalResult; } catch (error) { span.durationMs = Date.now() - startTime; const endedSpan = SpanSerializer.endSpan(span, SpanStatus.ERROR); endedSpan.statusMessage = error instanceof Error ? error.message : String(error); getMetricsAggregator().recordSpan(endedSpan); throw error; } }); // end withSpan } /** * Parses the raw JSON string from the judge LLM into a structured `EvaluationResult` object. * It includes error handling to gracefully manage malformed JSON. * * @param rawResponse The raw string response from the evaluation model. * @returns A structured object containing the evaluation scores and feedback. */ parseEvaluationResponse(rawResponse) { try { const cleanedResponse = rawResponse.replace(/```json\n|```/g, "").trim(); const parsed = JSON.parse(cleanedResponse); logger.debug("Parsed evaluation response for RAGAS Evaluator:", parsed); return { relevanceScore: Number(parsed.relevanceScore) || 0, accuracyScore: Number(parsed.accuracyScore) || 0, completenessScore: Number(parsed.completenessScore) || 0, finalScore: Number(parsed.finalScore) || 0, suggestedImprovements: parsed.suggestedImprovements || "No suggestions provided.", reasoning: parsed.reasoning || "No reasoning provided.", }; } catch (error) { logger.error("Failed to parse evaluation response:", error); return { relevanceScore: 0, accuracyScore: 0, completenessScore: 0, finalScore: 0, reasoning: "Error parsing evaluation response.", suggestedImprovements: "Error parsing evaluation response.", }; } } }