@clduab11/gemini-flow
Version:
Revolutionary AI agent swarm coordination platform with Google Services integration, multimedia processing, and production-ready monitoring. Features 8 Google AI services, quantum computing capabilities, and enterprise-grade security.
488 lines (413 loc) • 15.8 kB
text/typescript
/**
* Smart Routing Engine Performance Tests
*
* Comprehensive test suite proving <75ms routing overhead
* Tests LRU cache, complexity analysis, and intelligent selection
*/
import {
ModelRouter,
RoutingDecision,
ComplexityAnalysis,
} from "../model-router.js";
import { ModelConfig, RoutingContext } from "../model-orchestrator.js";
describe("Smart Routing Engine Performance Tests", () => {
let router: ModelRouter;
let mockModels: Map<string, ModelConfig>;
let testContexts: RoutingContext[];
beforeEach(() => {
router = new ModelRouter();
// Setup mock models for testing
mockModels = new Map([
[
"gemini-2.0-flash",
{
name: "gemini-2.0-flash",
tier: "free",
capabilities: ["text", "code", "reasoning"],
latencyTarget: 800,
costPerToken: 0.000001,
maxTokens: 1000000,
},
],
[
"gemini-2.0-flash-thinking",
{
name: "gemini-2.0-flash-thinking",
tier: "pro",
capabilities: ["text", "code", "advanced-reasoning"],
latencyTarget: 1200,
costPerToken: 0.000002,
maxTokens: 1000000,
},
],
[
"gemini-pro-vertex",
{
name: "gemini-pro-vertex",
tier: "enterprise",
capabilities: ["text", "code", "reasoning", "enterprise-security"],
latencyTarget: 1000,
costPerToken: 0.000003,
maxTokens: 1000000,
},
],
]);
// Setup test contexts for different scenarios
testContexts = [
{
task: "Simple text generation task",
userTier: "free",
priority: "low",
latencyRequirement: 1000,
},
{
task: "Complex code analysis and refactoring implementation",
userTier: "pro",
priority: "high",
latencyRequirement: 500,
capabilities: ["code", "advanced-reasoning"],
},
{
task: "Enterprise security audit with comprehensive analysis",
userTier: "enterprise",
priority: "critical",
latencyRequirement: 300,
capabilities: ["enterprise-security", "advanced-reasoning"],
},
];
});
describe("Routing Latency Requirements", () => {
test("should meet <75ms routing target for cold start", async () => {
const context = testContexts[0];
const startTime = performance.now();
const decision = await router.selectOptimalModel(context, mockModels);
const routingTime = performance.now() - startTime;
expect(routingTime).toBeLessThan(75);
expect(decision.modelName).toBeDefined();
expect(decision.confidence).toBeGreaterThan(0);
expect(decision.routingTime).toBeLessThan(75);
});
test("should achieve <10ms routing with LRU cache hit", async () => {
const context = testContexts[1];
// First call to populate cache
await router.selectOptimalModel(context, mockModels);
// Second call should hit cache
const startTime = performance.now();
const decision = await router.selectOptimalModel(context, mockModels);
const routingTime = performance.now() - startTime;
expect(routingTime).toBeLessThan(10);
expect(decision.fromCache).toBe(true);
expect(decision.reason).toContain("cache hit");
});
test("should consistently meet target across 100 routing decisions", async () => {
const routingTimes: number[] = [];
const contexts = Array(100)
.fill(null)
.map((_, i) => ({
...testContexts[i % testContexts.length],
task: `Task variation ${i}: ${testContexts[i % testContexts.length].task}`,
}));
for (const context of contexts) {
const startTime = performance.now();
await router.selectOptimalModel(context, mockModels);
const routingTime = performance.now() - startTime;
routingTimes.push(routingTime);
}
const avgTime =
routingTimes.reduce((a, b) => a + b, 0) / routingTimes.length;
const maxTime = Math.max(...routingTimes);
const p95Time = routingTimes.sort((a, b) => a - b)[
Math.floor(routingTimes.length * 0.95)
];
expect(avgTime).toBeLessThan(50); // Average should be well under target
expect(p95Time).toBeLessThan(75); // 95th percentile meets target
expect(maxTime).toBeLessThan(150); // Even worst case should be reasonable
});
});
describe("LRU Cache Performance", () => {
test("should maintain cache hit rate >70% after warmup", async () => {
const warmupSize = 50;
const testSize = 100;
// Warmup phase with repeated patterns
const warmupContexts = Array(warmupSize)
.fill(null)
.map((_, i) => ({
...testContexts[i % 3], // Repeat first 3 contexts
task: `Warmup task ${i % 3}`,
}));
for (const context of warmupContexts) {
await router.selectOptimalModel(context, mockModels);
}
// Test phase with similar patterns
let cacheHits = 0;
const testPatterns = Array(testSize)
.fill(null)
.map((_, i) => ({
...testContexts[i % 3],
task: `Test task ${i % 3}`, // Similar to warmup
}));
for (const context of testPatterns) {
const decision = await router.selectOptimalModel(context, mockModels);
if (decision.fromCache) {
cacheHits++;
}
}
const hitRate = cacheHits / testSize;
expect(hitRate).toBeGreaterThan(0.7);
});
test("should handle cache eviction properly at 1000 entry limit", async () => {
// Fill cache beyond limit
const contexts = Array(1200)
.fill(null)
.map((_, i) => ({
task: `Unique task ${i}`,
userTier: "pro" as const,
priority: "medium" as const,
latencyRequirement: 1000,
}));
for (const context of contexts) {
await router.selectOptimalModel(context, mockModels);
}
const stats = router.getRouterStats();
expect(stats.cache.size).toBeLessThanOrEqual(1000);
// Should still maintain performance
const startTime = performance.now();
await router.selectOptimalModel(contexts[0], mockModels);
const routingTime = performance.now() - startTime;
expect(routingTime).toBeLessThan(75);
});
});
describe("Intelligent Model Selection", () => {
test("should route simple tasks to fast models", async () => {
const simpleContext: RoutingContext = {
task: "Hello world",
userTier: "free",
priority: "low",
latencyRequirement: 500,
};
const decision = await router.selectOptimalModel(
simpleContext,
mockModels,
);
expect(decision.modelName).toBe("gemini-2.0-flash");
expect(decision.confidence).toBeGreaterThan(0.8);
});
test("should route complex tasks to advanced models", async () => {
const complexContext: RoutingContext = {
task: "Implement a complex distributed system with microservices architecture, considering scalability, fault tolerance, and performance optimization strategies",
userTier: "enterprise",
priority: "high",
latencyRequirement: 2000,
capabilities: ["advanced-reasoning", "enterprise-security"],
};
const decision = await router.selectOptimalModel(
complexContext,
mockModels,
);
expect(decision.modelName).toBe("gemini-pro-vertex");
expect(decision.confidence).toBeGreaterThan(0.8);
});
test("should respect user tier restrictions", async () => {
const restrictedContext: RoutingContext = {
task: "Complex enterprise task requiring advanced capabilities",
userTier: "free", // Free tier user
priority: "high",
latencyRequirement: 1000,
capabilities: ["advanced-reasoning"],
};
const decision = await router.selectOptimalModel(
restrictedContext,
mockModels,
);
// Should not route to enterprise-only models
expect(decision.modelName).toBe("gemini-2.0-flash");
});
});
describe("Complexity Analysis Performance", () => {
test("should analyze complexity in <5ms", async () => {
const complexTexts = [
"Simple task",
"Complex algorithmic implementation with advanced data structures",
"function calculateComplexMetrics(data) { return data.map(item => ({ ...item, score: analyzeComplexity(item) })); }",
];
for (const text of complexTexts) {
const context: RoutingContext = {
task: text,
userTier: "pro",
priority: "medium",
latencyRequirement: 1000,
};
const startTime = performance.now();
// Access internal complexity analysis through routing
await router.selectOptimalModel(context, mockModels);
const analysisTime = performance.now() - startTime;
expect(analysisTime).toBeLessThan(75); // Total routing should be under 75ms
}
});
test("should cache complexity analysis results", async () => {
const context: RoutingContext = {
task: "Complex task for caching test",
userTier: "pro",
priority: "medium",
latencyRequirement: 1000,
};
// First analysis
const startTime1 = performance.now();
await router.selectOptimalModel(context, mockModels);
const time1 = performance.now() - startTime1;
// Second analysis with same task
const startTime2 = performance.now();
await router.selectOptimalModel(context, mockModels);
const time2 = performance.now() - startTime2;
// Second should be faster due to complexity caching
expect(time2).toBeLessThan(time1);
});
});
describe("Fallback Strategies", () => {
test("should handle model unavailability with <75ms fallback", async () => {
const context = testContexts[1];
// Mark preferred model as unavailable
router.updateModelAvailability("gemini-2.0-flash-thinking", false);
const startTime = performance.now();
const decision = await router.selectFallbackModel(
"gemini-2.0-flash-thinking",
context,
mockModels,
"Model unavailable",
);
const fallbackTime = performance.now() - startTime;
expect(fallbackTime).toBeLessThan(75);
expect(decision.modelName).not.toBe("gemini-2.0-flash-thinking");
expect(decision.modelName).toBeDefined();
});
test("should gracefully degrade to lower tier models", async () => {
const context: RoutingContext = {
task: "Enterprise task",
userTier: "enterprise",
priority: "high",
latencyRequirement: 1000,
};
// Mark enterprise model as unavailable
router.updateModelAvailability("gemini-pro-vertex", false);
const decision = await router.selectFallbackModel(
"gemini-pro-vertex",
context,
mockModels,
"Service maintenance",
);
expect(decision.modelName).toBe("gemini-2.0-flash-thinking");
expect(decision.confidence).toBeGreaterThan(0.5);
});
});
describe("Performance Monitoring", () => {
test("should track routing performance metrics", async () => {
// Perform several routing operations
for (let i = 0; i < 20; i++) {
await router.selectOptimalModel(
testContexts[i % testContexts.length],
mockModels,
);
}
const performance = router.getRoutingPerformance();
expect(performance.averageTime).toBeGreaterThan(0);
expect(performance.p95Time).toBeLessThan(75);
expect(performance.targetMet).toBe(true);
expect(performance.cacheHitRate).toBeGreaterThanOrEqual(0);
});
test("should emit performance warnings for slow routing", async () => {
const warningPromise = new Promise((resolve) => {
router.on("routing_slow", (data) => {
expect(data.routingTime).toBeGreaterThan(data.target);
expect(data.target).toBe(75);
resolve(data);
});
});
// Simulate slow routing by creating a very complex context
const heavyContext: RoutingContext = {
task: "x".repeat(10000), // Very long task
userTier: "enterprise",
priority: "critical",
latencyRequirement: 100,
};
await router.selectOptimalModel(heavyContext, mockModels);
// Wait for the warning or timeout after 5 seconds
await Promise.race([
warningPromise,
new Promise((_, reject) =>
setTimeout(() => reject(new Error("No warning emitted")), 5000),
),
]);
}, 10000);
});
describe("Comprehensive Statistics", () => {
test("should provide detailed router statistics", async () => {
// Generate some activity
for (let i = 0; i < 10; i++) {
await router.selectOptimalModel(
testContexts[i % testContexts.length],
mockModels,
);
// Record some performance data
router.recordPerformance(
mockModels.get(
Object.keys(Object.fromEntries(mockModels))[i % mockModels.size],
)!.name,
Math.random() * 1000 + 500, // latency
Math.random() > 0.1, // 90% success rate
Math.random() * 0.01, // cost
{ input: 100, output: 50, total: 150 },
);
}
const stats = router.getRouterStats();
expect(stats.performance).toBeDefined();
expect(stats.cache).toBeDefined();
expect(stats.availability).toBeDefined();
expect(stats.models).toBeDefined();
expect(stats.cache.size).toBeGreaterThan(0);
expect(stats.cache.limit).toBe(1000);
expect(stats.models.length).toBeGreaterThan(0);
});
});
describe("Stress Testing", () => {
test("should maintain performance under concurrent load", async () => {
const concurrentRequests = 50;
const promises: Promise<RoutingDecision>[] = [];
const startTime = performance.now();
// Fire off concurrent requests
for (let i = 0; i < concurrentRequests; i++) {
const context = {
...testContexts[i % testContexts.length],
task: `Concurrent task ${i}`,
};
promises.push(router.selectOptimalModel(context, mockModels));
}
const results = await Promise.all(promises);
const totalTime = performance.now() - startTime;
const avgTimePerRequest = totalTime / concurrentRequests;
// Each request should still meet target even under load
expect(avgTimePerRequest).toBeLessThan(100); // Slightly higher under load
expect(results).toHaveLength(concurrentRequests);
results.forEach((result) => {
expect(result.modelName).toBeDefined();
expect(result.confidence).toBeGreaterThan(0);
});
});
test("should handle memory pressure gracefully", async () => {
// Generate a large number of unique contexts to stress memory
const largeLoad = 2000;
for (let i = 0; i < largeLoad; i++) {
const context: RoutingContext = {
task: `Memory stress test task ${i} with unique content`,
userTier: "pro",
priority: "medium",
latencyRequirement: 1000,
};
const decision = await router.selectOptimalModel(context, mockModels);
expect(decision.routingTime).toBeLessThan(75);
}
// Verify cache didn't grow unbounded
const stats = router.getRouterStats();
expect(stats.cache.size).toBeLessThanOrEqual(1000);
});
});
});