UNPKG

@aj-archipelago/cortex

Version:

Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.

478 lines (430 loc) 16.5 kB
// agentic.test.js // Tests for the agentic entity system import test from 'ava'; import serverFactory from '../../../../../index.js'; import { createClient } from 'graphql-ws'; import ws from 'ws'; // Define models to test - 4.1 as default, include grok 4 const TEST_MODELS = [ 'oai-gpt41', // Default 4.1 model 'xai-grok-4-fast-reasoning' // Grok 4 model ]; let testServer; let wsClient; test.before(async () => { process.env.CORTEX_ENABLE_REST = 'true'; const { server, startServer } = await serverFactory(); startServer && await startServer(); testServer = server; // Create WebSocket client for subscriptions wsClient = createClient({ url: `ws://localhost:${process.env.CORTEX_PORT || 4000}/graphql`, webSocketImpl: ws, retryAttempts: 3, connectionParams: {}, on: { error: (error) => { console.error('WS connection error:', error); } } }); // Test the connection by making a simple subscription try { await new Promise((resolve, reject) => { const subscription = wsClient.subscribe( { query: ` subscription TestConnection { requestProgress(requestIds: ["test"]) { requestId } } ` }, { next: () => { resolve(); }, error: reject, complete: () => { resolve(); } } ); // Add a timeout to avoid hanging setTimeout(() => { resolve(); }, 2000); }); } catch (error) { console.error('Failed to establish WebSocket connection:', error); throw error; } }); test.after.always('cleanup', async () => { if (wsClient) { wsClient.dispose(); } if (testServer) { await testServer.stop(); } }); // Helper function to collect subscription events async function collectSubscriptionEvents(subscription, timeout = 30000) { const events = []; return new Promise((resolve, reject) => { const timeoutId = setTimeout(() => { if (events.length > 0) { resolve(events); } else { reject(new Error('Subscription timed out with no events')); } }, timeout); const unsubscribe = wsClient.subscribe( { query: subscription.query, variables: subscription.variables }, { next: (event) => { events.push(event); if (event?.data?.requestProgress?.progress === 1) { clearTimeout(timeoutId); unsubscribe(); resolve(events); } }, error: (error) => { clearTimeout(timeoutId); reject(error); }, complete: () => { clearTimeout(timeoutId); resolve(events); } } ); }); } // Helper function to create model-specific tests function createModelTest(testName, testFunction) { TEST_MODELS.forEach(model => { test.serial(`${testName} - ${model}`, async (t) => { await testFunction(t, model); }); }); } // Helper function to validate info object structure function validateInfoObject(t, info, testName) { if (!info || typeof info !== 'object') { t.fail(`${testName}: Info object should be a valid object`); return; } // Check if info object has any meaningful content const hasContent = Object.keys(info).length > 0; t.true(hasContent, `${testName}: Info object should have some content`); // Validate toolUsed if present if (info.toolUsed) { if (Array.isArray(info.toolUsed)) { t.true(info.toolUsed.length > 0, `${testName}: toolUsed array should not be empty`); info.toolUsed.forEach((tool, index) => { // Handle nested arrays in toolUsed if (Array.isArray(tool)) { t.true(tool.length > 0, `${testName}: toolUsed[${index}] nested array should not be empty`); tool.forEach((nestedTool, nestedIndex) => { t.true(typeof nestedTool === 'string', `${testName}: toolUsed[${index}][${nestedIndex}] should be a string`); t.truthy(nestedTool.trim(), `${testName}: toolUsed[${index}][${nestedIndex}] should not be empty`); }); } else { t.true(typeof tool === 'string', `${testName}: toolUsed[${index}] should be a string`); t.truthy(tool.trim(), `${testName}: toolUsed[${index}] should not be empty`); } }); } else { t.true(typeof info.toolUsed === 'string', `${testName}: toolUsed should be a string if not array`); t.truthy(info.toolUsed.trim(), `${testName}: toolUsed should not be empty`); } } // Validate citations if present if (info.citations) { t.true(Array.isArray(info.citations), `${testName}: citations should be an array`); info.citations.forEach((citation, index) => { t.true(typeof citation === 'object', `${testName}: citations[${index}] should be an object`); if (citation.title) { t.true(typeof citation.title === 'string', `${testName}: citations[${index}].title should be a string`); } if (citation.url) { t.true(typeof citation.url === 'string', `${testName}: citations[${index}].url should be a string`); // URLs can be empty strings, which is valid if (citation.url.trim()) { t.true(citation.url.startsWith('http'), `${testName}: citations[${index}].url should be a valid URL if not empty`); } } if (citation.content) { t.true(typeof citation.content === 'string', `${testName}: citations[${index}].content should be a string`); } }); } // Validate toolCalls if present if (info.toolCalls) { t.true(Array.isArray(info.toolCalls), `${testName}: toolCalls should be an array`); info.toolCalls.forEach((toolCall, index) => { t.true(typeof toolCall === 'object', `${testName}: toolCalls[${index}] should be an object`); if (toolCall.name) { t.true(typeof toolCall.name === 'string', `${testName}: toolCalls[${index}].name should be a string`); } }); } // Validate usage if present if (info.usage) { // Handle both single usage object and array of usage objects if (Array.isArray(info.usage)) { t.true(info.usage.length > 0, `${testName}: usage array should not be empty`); info.usage.forEach((usage, index) => { t.true(typeof usage === 'object', `${testName}: usage[${index}] should be an object`); if (usage.prompt_tokens !== undefined) { t.true(typeof usage.prompt_tokens === 'number', `${testName}: usage[${index}].prompt_tokens should be a number`); } if (usage.completion_tokens !== undefined) { t.true(typeof usage.completion_tokens === 'number', `${testName}: usage[${index}].completion_tokens should be a number`); } if (usage.total_tokens !== undefined) { t.true(typeof usage.total_tokens === 'number', `${testName}: usage[${index}].total_tokens should be a number`); } }); } else { t.true(typeof info.usage === 'object', `${testName}: usage should be an object`); if (info.usage.prompt_tokens !== undefined) { t.true(typeof info.usage.prompt_tokens === 'number', `${testName}: usage.prompt_tokens should be a number`); } if (info.usage.completion_tokens !== undefined) { t.true(typeof info.usage.completion_tokens === 'number', `${testName}: usage.completion_tokens should be a number`); } if (info.usage.total_tokens !== undefined) { t.true(typeof info.usage.total_tokens === 'number', `${testName}: usage.total_tokens should be a number`); } } } // Validate finishReason if present if (info.finishReason) { t.true(typeof info.finishReason === 'string', `${testName}: finishReason should be a string`); const validReasons = ['stop', 'length', 'tool_calls', 'content_filter', 'function_call']; t.true(validReasons.includes(info.finishReason), `${testName}: finishReason should be a valid reason`); } } // Helper function to flatten nested arrays function flattenArray(arr) { const result = []; for (const item of arr) { if (Array.isArray(item)) { result.push(...flattenArray(item)); } else { result.push(item); } } return result; } // Test basic single-step task createModelTest('sys_entity_agent handles single-step task', async (t, model) => { t.timeout(60000); // 60 second timeout const response = await testServer.executeOperation({ query: ` query TestAgentSingleStep( $text: String!, $chatHistory: [MultiMessage]!, $model: String ) { sys_entity_agent( text: $text, chatHistory: $chatHistory, model: $model, stream: true ) { result contextId tool warnings errors } } `, variables: { text: 'What is the current time in Los Angeles?', chatHistory: [{ role: "user", content: ["What is the current time in Los Angeles?"] }], model: model } }); console.log(`Single-step Agent Response (${model}):`, JSON.stringify(response, null, 2)); // Check for successful response t.falsy(response.body?.singleResult?.errors, 'Should not have GraphQL errors'); const requestId = response.body?.singleResult?.data?.sys_entity_agent?.result; t.truthy(requestId, 'Should have a requestId in the result field'); // Collect events const events = await collectSubscriptionEvents({ query: ` subscription OnRequestProgress($requestId: String!) { requestProgress(requestIds: [$requestId]) { requestId progress data info } } `, variables: { requestId } }); console.log(`Received ${events.length} events for single-step task (${model})`); t.true(events.length > 0, 'Should have received events'); // Verify we got a completion event const completionEvent = events.find(event => event.data.requestProgress.progress === 1 ); t.truthy(completionEvent, 'Should have received a completion event'); // Validate the info object in the completion event const infoString = completionEvent.data.requestProgress.info; // For single-step tasks, info might be empty or an empty object, which is acceptable if (infoString && infoString.trim()) { let infoObject; try { infoObject = JSON.parse(infoString); } catch (error) { t.fail(`Failed to parse info object: ${error.message}`); return; } // Validate the info object structure if (Object.keys(infoObject).length > 0) { validateInfoObject(t, infoObject, 'Single-step task'); } // For single-step tasks, we might not have tool usage, but we should have other properties if (infoObject.finishReason) { t.true(['stop', 'length', 'tool_calls', 'content_filter'].includes(infoObject.finishReason), 'Single-step task should have a valid finish reason'); } console.log(`Single-step info object validation passed for ${model}:`, JSON.stringify(infoObject, null, 2)); } else { console.log(`Single-step task (${model}) completed without info object - this is acceptable for simple tasks`); } }); // Test multi-step task with tool usage createModelTest('sys_entity_agent handles multi-step task with tools', async (t, model) => { t.timeout(360000); // 120 second timeout for multi-step task const response = await testServer.executeOperation({ query: ` query TestAgentMultiStep( $text: String!, $chatHistory: [MultiMessage]!, $model: String ) { sys_entity_agent( text: $text, chatHistory: $chatHistory, model: $model, stream: true ) { result contextId tool warnings errors } } `, variables: { text: 'Research the latest developments in renewable energy and summarize the key trends.', chatHistory: [{ role: "user", content: ["Research the latest developments in renewable energy and summarize the key trends."] }], model: model } }); console.log(`Multi-step Agent Response (${model}):`, JSON.stringify(response, null, 2)); // Check for successful response t.falsy(response.body?.singleResult?.errors, 'Should not have GraphQL errors'); const requestId = response.body?.singleResult?.data?.sys_entity_agent?.result; t.truthy(requestId, 'Should have a requestId in the result field'); // Collect events with a longer timeout since this is a multi-step operation const events = await collectSubscriptionEvents({ query: ` subscription OnRequestProgress($requestId: String!) { requestProgress(requestIds: [$requestId]) { requestId progress data info } } `, variables: { requestId } }, 240000); console.log(`Received ${events.length} events for multi-step task (${model})`); t.true(events.length > 0, 'Should have received events'); // Verify we got a completion event const completionEvent = events.find(event => event.data.requestProgress.progress === 1 ); t.truthy(completionEvent, 'Should have received a completion event'); const infoString = completionEvent.data.requestProgress.info; t.truthy(infoString, 'Multi-step task should have info object'); let infoObject; try { infoObject = JSON.parse(infoString); } catch (error) { t.fail(`Failed to parse info object: ${error.message}`); return; } // Validate the info object structure validateInfoObject(t, infoObject, 'Multi-step task'); // Additional specific validations for multi-step tasks if (infoObject.toolUsed) { // For multi-step tasks, we expect multiple tools to be used const toolUsedArray = Array.isArray(infoObject.toolUsed) ? infoObject.toolUsed : [infoObject.toolUsed]; t.true(toolUsedArray.length > 0, 'Multi-step task should have used at least one tool'); // Flatten nested arrays for tool counting const flattenedTools = flattenArray(toolUsedArray); t.true(flattenedTools.length > 0, 'Multi-step task should have used tools after flattening'); // Check for common tool types that should be used in research tasks const expectedToolTypes = ['Search', 'SearchInternetAgent2', 'SearchXPlatform', 'WebPageContent', 'SearchAgent']; const hasExpectedTool = flattenedTools.some(tool => expectedToolTypes.some(expectedType => tool.includes(expectedType)) ); t.true(hasExpectedTool, 'Multi-step research task should have used search tools'); } // Validate citations for research tasks if (infoObject.citations) { t.true(infoObject.citations.length > 0, 'Research task should have citations'); infoObject.citations.forEach((citation, index) => { // Citations should have either URL or content, but not necessarily both t.truthy(citation.url || citation.content, `Citation ${index} should have URL or content`); if (citation.title || citation.content) { t.truthy(citation.title || citation.content, `Citation ${index} should have title or content`); } }); } // Validate toolCalls for multi-step tasks if (infoObject.toolCalls) { t.true(infoObject.toolCalls.length > 0, 'Multi-step task should have tool calls'); } // Validate usage statistics if (infoObject.usage) { // Handle both single usage object and array of usage objects if (Array.isArray(infoObject.usage)) { t.true(infoObject.usage.length > 0, 'Multi-step task should have usage data'); const latestUsage = infoObject.usage[0]; // Most recent usage first t.truthy(latestUsage.total_tokens, 'Multi-step task should have total token usage'); t.true(latestUsage.total_tokens > 0, 'Multi-step task should have used tokens'); } else { t.truthy(infoObject.usage.total_tokens, 'Multi-step task should have total token usage'); t.true(infoObject.usage.total_tokens > 0, 'Multi-step task should have used tokens'); } } // Validate finish reason if (infoObject.finishReason) { t.true(['stop', 'length', 'tool_calls', 'content_filter'].includes(infoObject.finishReason), 'Multi-step task should have a valid finish reason'); } console.log(`Multi-step info object validation passed for ${model}:`, JSON.stringify(infoObject, null, 2)); });