@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
352 lines (351 loc) • 11 kB
JavaScript
/**
* Multi-Judge Workflow
* ====================
*
* 5-model ensemble with 3-judge voting for maximum reliability:
* - 5 diverse models generate responses
* - 3 judges independently evaluate (voting consensus)
* - Best response selected by aggregate scoring
*
* Ideal for: Critical decisions requiring high confidence
*
* @module workflow/workflows/multiJudgeWorkflow
*/
import { AIProviderName } from "../../constants/enums.js";
import { WORKFLOW_CREATION_DATE } from "../config.js";
/**
* Multi-Judge-5 Workflow Configuration
*
* Uses 5 models across different providers:
* - GPT-4o (OpenAI)
* - GPT-4o-mini (OpenAI)
* - Claude 3.5 Sonnet (Anthropic)
* - Claude 3 Haiku (Anthropic)
* - Gemini 2.0 Flash (Google)
*
* 3 independent judges vote:
* - GPT-4o evaluates accuracy & clarity
* - Claude 3.5 Sonnet evaluates reasoning & depth
* - Gemini 2.0 Flash evaluates completeness & coherence
*
* Scores are averaged across all judges for final selection
*
* @example
* ```typescript
* import { runWorkflow } from '../core/workflowRunner.js';
* import { MULTI_JUDGE_5_WORKFLOW } from './multiJudgeWorkflow.js';
*
* const result = await runWorkflow(MULTI_JUDGE_5_WORKFLOW, {
* prompt: 'Should we invest in renewable energy?',
* verbose: true,
* });
*
* console.log('Consensus score:', result.score);
* console.log('Agreement level:', result.consensus);
* ```
*/
export const MULTI_JUDGE_5_WORKFLOW = {
id: "multi-judge-5",
name: "Multi-Judge-5 Ensemble",
description: "5-model ensemble with 3-judge voting for high confidence",
version: "1.0.0",
type: "ensemble",
// 5 diverse models for comprehensive coverage
models: [
{
provider: AIProviderName.OPENAI,
model: "gpt-4o",
label: "GPT-4o",
weight: 1.0,
temperature: 0.7,
},
{
provider: AIProviderName.OPENAI,
model: "gpt-4o-mini",
label: "GPT-4o-mini",
weight: 0.8, // Slightly lower weight
temperature: 0.7,
},
{
provider: AIProviderName.ANTHROPIC,
model: "claude-3-5-sonnet-20241022",
label: "Claude 3.5 Sonnet",
weight: 1.0,
temperature: 0.7,
},
{
provider: AIProviderName.ANTHROPIC,
model: "claude-3-haiku-20240307",
label: "Claude 3 Haiku",
weight: 0.7, // Lower weight for faster model
temperature: 0.7,
},
{
provider: AIProviderName.GOOGLE_AI,
model: "gemini-2.0-flash",
label: "Gemini 2 Flash",
weight: 0.9,
temperature: 0.7,
},
],
// 3 independent judges with different criteria focus
judges: [
{
provider: AIProviderName.OPENAI,
model: "gpt-4o",
criteria: ["accuracy", "clarity", "factual_correctness"],
outputFormat: "detailed",
includeReasoning: true,
temperature: 0.1,
scoreScale: { min: 0, max: 100 },
label: "Accuracy Judge",
},
{
provider: AIProviderName.ANTHROPIC,
model: "claude-3-5-sonnet-20241022",
criteria: ["reasoning_quality", "depth", "nuance"],
outputFormat: "detailed",
includeReasoning: true,
temperature: 0.1,
scoreScale: { min: 0, max: 100 },
label: "Reasoning Judge",
},
{
provider: AIProviderName.GOOGLE_AI,
model: "gemini-2.0-flash",
criteria: ["completeness", "coherence", "relevance"],
outputFormat: "detailed",
includeReasoning: true,
temperature: 0.1,
scoreScale: { min: 0, max: 100 },
label: "Completeness Judge",
},
],
// Execution configuration
execution: {
parallelism: 5, // All 5 models run simultaneously
timeout: 45000, // 45 second total timeout
modelTimeout: 30000, // 30 second per-model timeout
minResponses: 3, // Need at least 3 successful responses
costThreshold: 0.15, // Warn if cost exceeds $0.15
},
// Metadata
tags: ["ensemble", "multi-judge", "voting", "high-confidence", "critical"],
metadata: {
useCase: "Critical decisions requiring high confidence",
recommendedFor: [
"important business decisions",
"technical evaluations",
"complex analysis",
"fact-checking",
],
averageCost: 0.1,
averageLatency: 5000,
consensusThreshold: 0.7, // Expect 70%+ agreement
},
createdAt: WORKFLOW_CREATION_DATE,
};
/**
* Multi-Judge-3 Workflow (Lighter Version)
*
* 3 models with 2 judges (more cost-effective):
* - GPT-4o, Claude 3.5, Gemini 2.0
* - Judged by GPT-4o and Claude 3.5
*/
export const MULTI_JUDGE_3_WORKFLOW = {
id: "multi-judge-3",
name: "Multi-Judge-3 Ensemble",
description: "3-model ensemble with 2-judge voting",
version: "1.0.0",
type: "ensemble",
models: [
{
provider: AIProviderName.OPENAI,
model: "gpt-4o",
label: "GPT-4o",
weight: 1.0,
temperature: 0.7,
},
{
provider: AIProviderName.ANTHROPIC,
model: "claude-3-5-sonnet-20241022",
label: "Claude 3.5 Sonnet",
weight: 1.0,
temperature: 0.7,
},
{
provider: AIProviderName.GOOGLE_AI,
model: "gemini-2.0-flash",
label: "Gemini 2.0 Flash",
weight: 1.0,
temperature: 0.7,
},
],
judges: [
{
provider: AIProviderName.OPENAI,
model: "gpt-4o",
criteria: ["accuracy", "clarity", "completeness"],
outputFormat: "detailed",
includeReasoning: true,
temperature: 0.1,
scoreScale: { min: 0, max: 100 },
label: "Primary Judge",
},
{
provider: AIProviderName.ANTHROPIC,
model: "claude-3-5-sonnet-20241022",
criteria: ["reasoning", "depth", "coherence"],
outputFormat: "detailed",
includeReasoning: true,
temperature: 0.1,
scoreScale: { min: 0, max: 100 },
label: "Secondary Judge",
},
],
execution: {
parallelism: 3,
timeout: 35000,
modelTimeout: 25000,
minResponses: 2,
costThreshold: 0.08,
},
tags: ["ensemble", "multi-judge", "voting", "balanced"],
metadata: {
useCase: "Balanced multi-judge evaluation",
recommendedFor: ["important queries", "quality verification"],
averageCost: 0.04,
averageLatency: 3500,
},
createdAt: WORKFLOW_CREATION_DATE,
};
/**
* Create custom multi-judge workflow
*
* @param modelCount - Number of models (3, 5, or 7)
* @param judgeCount - Number of judges (2 or 3)
* @returns Configured workflow
*
* @example
* ```typescript
* const workflow = createMultiJudgeWorkflow(7, 3);
* const result = await runWorkflow(workflow, {
* prompt: 'Complex analysis task',
* });
* ```
*/
export function createMultiJudgeWorkflow(modelCount, judgeCount) {
// Base models (always include these)
const baseModels = [
{
provider: AIProviderName.OPENAI,
model: "gpt-4o",
label: "GPT-4o",
weight: 1.0,
temperature: 0.7,
},
{
provider: AIProviderName.ANTHROPIC,
model: "claude-3-5-sonnet-20241022",
label: "Claude 3.5 Sonnet",
weight: 1.0,
temperature: 0.7,
},
{
provider: AIProviderName.GOOGLE_AI,
model: "gemini-2.0-flash",
label: "Gemini 2.0 Flash",
weight: 1.0,
temperature: 0.7,
},
];
// Additional models for larger ensembles
const additionalModels = [
{
provider: AIProviderName.OPENAI,
model: "gpt-4o-mini",
label: "GPT-4o-mini",
weight: 0.8,
temperature: 0.7,
},
{
provider: AIProviderName.ANTHROPIC,
model: "claude-3-haiku-20240307",
label: "Claude 3 Haiku",
weight: 0.7,
temperature: 0.7,
},
{
provider: AIProviderName.GOOGLE_AI,
model: "gemini-1.5-flash",
label: "Gemini 1.5 Flash",
weight: 0.8,
temperature: 0.7,
},
{
provider: AIProviderName.OPENAI,
model: "gpt-3.5-turbo",
label: "GPT-3.5 Turbo",
weight: 0.6,
temperature: 0.7,
},
];
const models = [...baseModels, ...additionalModels.slice(0, modelCount - 3)];
// Base judges
const baseJudges = [
{
provider: AIProviderName.OPENAI,
model: "gpt-4o",
criteria: ["accuracy", "clarity", "completeness"],
outputFormat: "detailed",
includeReasoning: true,
temperature: 0.1,
scoreScale: { min: 0, max: 100 },
label: "Primary Judge",
},
{
provider: AIProviderName.ANTHROPIC,
model: "claude-3-5-sonnet-20241022",
criteria: ["reasoning", "depth", "coherence"],
outputFormat: "detailed",
includeReasoning: true,
temperature: 0.1,
scoreScale: { min: 0, max: 100 },
label: "Secondary Judge",
},
];
const thirdJudge = {
provider: AIProviderName.GOOGLE_AI,
model: "gemini-2.0-flash",
criteria: ["relevance", "factual_accuracy", "structure"],
outputFormat: "detailed",
includeReasoning: true,
temperature: 0.1,
scoreScale: { min: 0, max: 100 },
label: "Tertiary Judge",
};
const judges = judgeCount === 3 ? [...baseJudges, thirdJudge] : baseJudges;
return {
id: `multi-judge-${modelCount}-${judgeCount}`,
name: `Multi-Judge ${modelCount}x${judgeCount}`,
description: `${modelCount}-model ensemble with ${judgeCount}-judge voting`,
version: "1.0.0",
type: "ensemble",
models,
judges,
execution: {
parallelism: modelCount,
timeout: 45000,
modelTimeout: 30000,
minResponses: Math.ceil(modelCount / 2),
costThreshold: 0.2,
},
tags: ["ensemble", "multi-judge", "custom"],
metadata: {
useCase: "Custom multi-judge evaluation",
modelCount,
judgeCount,
},
createdAt: WORKFLOW_CREATION_DATE,
};
}