@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, and professional CLI. Built-in tools operational, 58+ external MCP servers discoverable. Connect to filesystem, GitHub, database operations, and more. Build, test, and
255 lines (251 loc) • 9.91 kB
JavaScript
/**
* NeuroLink Unified Evaluation System
*/
import { logger } from "../utils/logger.js";
import { AIProviderFactory } from "./factory.js";
import { z } from "zod";
import { ProviderRegistry } from "../factories/providerRegistry.js";
import { modelConfig } from "./modelConfiguration.js";
// Zod schema for validation
const UnifiedEvaluationSchema = z.object({
relevance: z.number().min(1).max(10),
accuracy: z.number().min(1).max(10),
completeness: z.number().min(1).max(10),
overall: z.number().min(1).max(10),
domainAlignment: z.number().min(1).max(10).optional(),
terminologyAccuracy: z.number().min(1).max(10).optional(),
toolEffectiveness: z.number().min(1).max(10).optional(),
});
/**
* Get default evaluation when evaluation fails
*/
function getDefaultUnifiedEvaluation(reason, evaluationTime, context) {
const functionTag = "getDefaultUnifiedEvaluation";
logger.debug(`[${functionTag}] Creating default evaluation`, {
reason,
evaluationTime,
hasContext: !!context,
});
return {
relevance: 1,
accuracy: 1,
completeness: 1,
overall: 1,
domainAlignment: 1,
terminologyAccuracy: 1,
toolEffectiveness: 1,
isOffTopic: false,
alertSeverity: "low",
reasoning: `Default evaluation used due to: ${reason}`,
contextUtilization: {
conversationUsed: false,
toolsUsed: false,
domainKnowledgeUsed: false,
},
evaluationContext: {
domain: context.primaryDomain || "general",
toolsEvaluated: [],
conversationTurns: 0,
},
evaluationModel: "default",
evaluationTime,
evaluationProvider: "default",
evaluationAttempt: 1,
evaluationConfig: {
mode: "fallback",
fallbackUsed: true,
costEstimate: 0,
},
};
}
/**
* Parse unified evaluation result from text response
*/
function parseUnifiedEvaluationResult(response, context) {
const functionTag = "parseUnifiedEvaluationResult";
try {
logger.debug(`[${functionTag}] Parsing evaluation response`, {
responseLength: response.length,
});
// Try JSON parsing first
const jsonMatch = response.match(/\{[^}]*\}/s);
if (jsonMatch) {
try {
const parsed = JSON.parse(jsonMatch[0]);
return parsed;
}
catch (e) {
logger.debug(`[${functionTag}] JSON parsing failed, trying regex`);
}
}
// Fallback to regex parsing
const result = {};
const patterns = {
relevance: /relevance[:\s]*([0-9]+(?:\.[0-9]+)?)/i,
accuracy: /accuracy[:\s]*([0-9]+(?:\.[0-9]+)?)/i,
completeness: /completeness[:\s]*([0-9]+(?:\.[0-9]+)?)/i,
overall: /overall[:\s]*([0-9]+(?:\.[0-9]+)?)/i,
reasoning: /reasoning[:\s]*(.+?)(?=\n\s*\w+:|\n\s*$|$)/is,
};
for (const [key, pattern] of Object.entries(patterns)) {
const match = response.match(pattern);
if (match) {
if (key === "reasoning") {
// Extract reasoning text
result.reasoning = match[1].trim();
}
else {
// Extract numerical scores
const value = parseFloat(match[1]);
if (value >= 1 && value <= 10) {
const roundedValue = Math.round(value);
if (key === "relevance") {
result.relevance = roundedValue;
}
else if (key === "accuracy") {
result.accuracy = roundedValue;
}
else if (key === "completeness") {
result.completeness = roundedValue;
}
else if (key === "overall") {
result.overall = roundedValue;
}
}
}
}
}
// Ensure minimum valid scores
return {
relevance: result.relevance || 1,
accuracy: result.accuracy || 1,
completeness: result.completeness || 1,
overall: result.overall || 1,
reasoning: result.reasoning || "No detailed reasoning provided",
};
}
catch (error) {
logger.error(`[${functionTag}] Failed to parse evaluation result`, {
error,
});
return {
relevance: 1,
accuracy: 1,
completeness: 1,
overall: 1,
reasoning: "Error occurred during evaluation parsing",
};
}
}
/**
* Main unified evaluation function
*/
export async function generateUnifiedEvaluation(context) {
const functionTag = "generateUnifiedEvaluation";
const startTime = Date.now();
logger.debug(`[${functionTag}] Starting evaluation`, {
hasUserQuery: !!context.userQuery,
hasAiResponse: !!context.aiResponse,
domain: context.primaryDomain,
});
try {
// Ensure providers are registered
await ProviderRegistry.registerAllProviders();
// Get evaluation provider
const evaluationProvider = process.env.NEUROLINK_EVALUATION_PROVIDER || "google-ai";
// Use configurable model selection instead of hardcoded default
const evaluationModel = process.env.NEUROLINK_EVALUATION_MODEL ||
modelConfig.getModelForTier(evaluationProvider, "fast") ||
"gemini-2.5-flash"; // Ultimate fallback
logger.debug(`[${functionTag}] Using provider: ${evaluationProvider}, model: ${evaluationModel}`);
const provider = await AIProviderFactory.createProvider(evaluationProvider, evaluationModel);
if (!provider) {
logger.debug(`[${functionTag}] No evaluation provider available, returning defaults`);
return getDefaultUnifiedEvaluation("no-provider", Date.now() - startTime, context);
}
// Create evaluation prompt
const prompt = `
Evaluate this AI response on a scale of 1-10 for each criterion:
User Query: ${context.userQuery}
AI Response: ${context.aiResponse}
Rate on these criteria (1-10 scale):
- Relevance: How well does the response address the user's question?
- Accuracy: How factually correct and precise is the information?
- Completeness: How thoroughly does it cover the topic?
- Overall: General quality assessment
Respond in this exact format:
Relevance: [score]
Accuracy: [score]
Completeness: [score]
Overall: [score]
Reasoning: [Provide a detailed explanation of your evaluation, explaining why you gave these scores. Include specific observations about the response's strengths and all possible areas for improvement.]
`;
// Generate evaluation
const result = await provider.generate(prompt);
if (!result) {
logger.debug(`[${functionTag}] No response from provider`);
return getDefaultUnifiedEvaluation("no-response", Date.now() - startTime, context);
}
// Extract text from result
const response = typeof result === "string" ? result : result?.content || String(result);
// Parse evaluation result
const parsed = parseUnifiedEvaluationResult(response, context);
// Validate and enhance result
const validatedResult = {
...parsed,
evaluationModel: `${evaluationProvider}/${evaluationModel}`,
evaluationTime: Date.now() - startTime,
evaluationProvider,
evaluationAttempt: 1,
evaluationConfig: {
mode: "standard",
fallbackUsed: false,
costEstimate: 0.001, // Rough estimate
},
};
logger.debug(`[${functionTag}] Evaluation completed`, {
relevance: validatedResult.relevance,
accuracy: validatedResult.accuracy,
completeness: validatedResult.completeness,
overall: validatedResult.overall,
evaluationTime: validatedResult.evaluationTime,
});
return validatedResult;
}
catch (error) {
logger.error(`[${functionTag}] Evaluation failed`, {
error: error instanceof Error ? error.message : String(error),
});
return getDefaultUnifiedEvaluation(error instanceof Error ? error.message : "unknown-error", Date.now() - startTime, context);
}
}
// Legacy compatibility function with flexible arguments
export async function evaluateResponse(responseOrContext, contextOrUserQuery, userQuery, providedContexts, options, additionalArgs) {
// Handle different call patterns for backward compatibility
let aiResponse;
let context;
if (typeof responseOrContext === "string") {
// Normal call: evaluateResponse(response, context, ...)
aiResponse = responseOrContext;
context = contextOrUserQuery;
}
else {
// Provider call pattern: evaluateResponse(contextObject, userQuery, ...)
context = responseOrContext;
aiResponse =
context?.aiResponse ||
context?.response ||
String(contextOrUserQuery || "");
}
const evalContext = {
userQuery: (typeof userQuery === "string" ? userQuery : "") ||
context?.userQuery ||
(typeof contextOrUserQuery === "string" ? contextOrUserQuery : "") ||
"Generated response",
aiResponse,
context: context,
};
return generateUnifiedEvaluation(evalContext);
}
// Export additional utilities
export { getDefaultUnifiedEvaluation, parseUnifiedEvaluationResult };