@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
127 lines • 6.08 kB
JavaScript
import { AIProviderFactory } from "../core/factory.js";
import { PromptBuilder } from "./prompts.js";
import { logger } from "../utils/logger.js";
import { SpanSerializer, SpanType, SpanStatus, getMetricsAggregator, } from "../observability/index.js";
import { withSpan } from "../telemetry/withSpan.js";
import { tracers } from "../telemetry/tracers.js";
/**
* Implements a RAGAS-style evaluator that uses a "judge" LLM to score the
* quality of an AI response based on rich, contextual information.
*/
export class RAGASEvaluator {
evaluationModel;
providerName;
threshold;
promptBuilder;
promptGenerator;
constructor(evaluationModel, providerName, threshold, promptGenerator) {
this.evaluationModel =
evaluationModel ||
process.env.NEUROLINK_RAGAS_EVALUATION_MODEL ||
"gemini-1.5-flash";
this.providerName =
providerName ||
process.env.NEUROLINK_RAGAS_EVALUATION_PROVIDER ||
"vertex";
this.threshold =
threshold || Number(process.env.NEUROLINK_EVALUATION_THRESHOLD) || 7;
this.promptBuilder = new PromptBuilder();
this.promptGenerator = promptGenerator;
}
/**
* Evaluates an AI-generated response using a model-based approach.
*
* @param context The rich, contextual information for the evaluation.
* @returns A promise that resolves to a detailed `EvaluationResult`.
*/
async evaluate(context) {
return withSpan({
name: "neurolink.evaluation.ragas",
tracer: tracers.sdk,
attributes: {
"evaluation.provider": this.providerName,
"evaluation.model": this.evaluationModel,
},
}, async (otelSpan) => {
const span = SpanSerializer.createSpan(SpanType.EVALUATION, "evaluation.ragas", {
"evaluation.dimension": "relevance|accuracy|completeness",
"ai.provider": this.providerName,
"ai.model": this.evaluationModel,
});
const startTime = Date.now();
try {
const prompt = this.promptBuilder.buildEvaluationPrompt(context, this.promptGenerator);
const provider = await AIProviderFactory.createProvider(this.providerName, this.evaluationModel);
const result = await provider.generate({
input: { text: prompt },
});
if (!result) {
throw new Error("Evaluation generation failed to return a result.");
}
const rawEvaluationResponse = result.content;
const parsedResult = this.parseEvaluationResponse(rawEvaluationResponse);
const evaluationTime = Date.now() - startTime;
const finalResult = {
...parsedResult,
isPassing: parsedResult.finalScore >= this.threshold, // This will be recalculated, but is needed for the type
evaluationModel: this.evaluationModel,
evaluationTime,
attemptNumber: context.attemptNumber,
rawEvaluationResponse,
};
// Write evaluation scores to OTel span for Langfuse visibility
otelSpan.setAttribute("evaluation.relevance_score", finalResult.relevanceScore);
otelSpan.setAttribute("evaluation.accuracy_score", finalResult.accuracyScore);
otelSpan.setAttribute("evaluation.completeness_score", finalResult.completenessScore);
otelSpan.setAttribute("evaluation.final_score", finalResult.finalScore);
otelSpan.setAttribute("evaluation.is_passing", finalResult.isPassing);
span.durationMs = Date.now() - startTime;
const endedSpan = SpanSerializer.endSpan(span, SpanStatus.OK);
getMetricsAggregator().recordSpan(endedSpan);
return finalResult;
}
catch (error) {
span.durationMs = Date.now() - startTime;
const endedSpan = SpanSerializer.endSpan(span, SpanStatus.ERROR);
endedSpan.statusMessage =
error instanceof Error ? error.message : String(error);
getMetricsAggregator().recordSpan(endedSpan);
throw error;
}
}); // end withSpan
}
/**
* Parses the raw JSON string from the judge LLM into a structured `EvaluationResult` object.
* It includes error handling to gracefully manage malformed JSON.
*
* @param rawResponse The raw string response from the evaluation model.
* @returns A structured object containing the evaluation scores and feedback.
*/
parseEvaluationResponse(rawResponse) {
try {
const cleanedResponse = rawResponse.replace(/```json\n|```/g, "").trim();
const parsed = JSON.parse(cleanedResponse);
logger.debug("Parsed evaluation response for RAGAS Evaluator:", parsed);
return {
relevanceScore: Number(parsed.relevanceScore) || 0,
accuracyScore: Number(parsed.accuracyScore) || 0,
completenessScore: Number(parsed.completenessScore) || 0,
finalScore: Number(parsed.finalScore) || 0,
suggestedImprovements: parsed.suggestedImprovements || "No suggestions provided.",
reasoning: parsed.reasoning || "No reasoning provided.",
};
}
catch (error) {
logger.error("Failed to parse evaluation response:", error);
return {
relevanceScore: 0,
accuracyScore: 0,
completenessScore: 0,
finalScore: 0,
reasoning: "Error parsing evaluation response.",
suggestedImprovements: "Error parsing evaluation response.",
};
}
}
}
//# sourceMappingURL=ragasEvaluator.js.map