UNPKG

@lobehub/chat

Version:

Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.

1,498 lines (1,274 loc) 52.9 kB
import { describe, expect, it, vi } from 'vitest'; import { Agent, AgentEventError, AgentRuntimeContext, AgentState, Cost, CostCalculationContext, CostLimit, RuntimeConfig, ToolsCalling, Usage, } from '../../types'; import { AgentRuntime } from '../runtime'; // Mock Agent for testing class MockAgent implements Agent { tools = {}; executors = {}; modelRuntime?: (payload: unknown) => AsyncIterable<any>; async runner(context: AgentRuntimeContext, state: AgentState) { switch (context.phase) { case 'user_input': return { type: 'call_llm' as const, payload: { messages: state.messages } }; case 'llm_result': const llmPayload = context.payload as { result: any; hasToolCalls: boolean }; if (llmPayload.hasToolCalls) { return { type: 'request_human_approve' as const, pendingToolsCalling: llmPayload.result.tool_calls, }; } return { type: 'finish' as const, reason: 'completed' as const, reasonDetail: 'Done' }; case 'tool_result': return { type: 'call_llm' as const, payload: { messages: state.messages } }; default: return { type: 'finish' as const, reason: 'completed' as const, reasonDetail: 'Done' }; } } } // Helper function to create test context function createTestContext( phase: AgentRuntimeContext['phase'], payload?: any, sessionId: string = 'test-session', ): AgentRuntimeContext { return { phase, payload, session: { sessionId, messageCount: 1, status: 'idle', stepCount: 0, }, }; } describe('AgentRuntime', () => { describe('Constructor and Executor Priority', () => { it('should use built-in executors by default', () => { const agent = new MockAgent(); const runtime = new AgentRuntime(agent); // @ts-expect-error - accessing private property for testing const executors = runtime.executors; expect(executors).toHaveProperty('call_llm'); expect(executors).toHaveProperty('call_tool'); expect(executors).toHaveProperty('finish'); expect(executors).toHaveProperty('request_human_approve'); }); it('should allow config executors to override built-in ones', () => { const agent = new MockAgent(); const customFinish = vi.fn(); const config: RuntimeConfig = { executors: { finish: customFinish, }, }; const runtime = new AgentRuntime(agent, config); // @ts-ignore expect(runtime.executors.finish).toBe(customFinish); }); it('should give agent executors highest priority', () => { const agent = new MockAgent(); const agentFinish = vi.fn(); const configFinish = vi.fn(); agent.executors = { finish: agentFinish }; const config: RuntimeConfig = { executors: { finish: configFinish }, }; const runtime = new AgentRuntime(agent, config); // @ts-ignore expect(runtime.executors.finish).toBe(agentFinish); }); }); describe('step method', () => { it('should execute approved tool call directly', async () => { const agent = new MockAgent(); agent.tools = { test_tool: vi.fn().mockResolvedValue({ result: 'success' }), }; const runtime = new AgentRuntime(agent); const state = AgentRuntime.createInitialState({ sessionId: 'test-session' }); const toolCall: ToolsCalling = { id: 'call_123', type: 'function', function: { name: 'test_tool', arguments: '{"input": "test"}', }, }; const result = await runtime.approveToolCall(state, toolCall); expect(result.events).toHaveLength(1); expect(result.events[0]).toMatchObject({ type: 'tool_result', id: 'call_123', result: { result: 'success' }, }); expect(result.newState.messages).toHaveLength(1); expect(result.newState.messages[0].role).toBe('tool'); }); it('should follow agent runner -> executor flow', async () => { const agent = new MockAgent(); const runtime = new AgentRuntime(agent); const state = AgentRuntime.createInitialState({ sessionId: 'test-session', messages: [{ role: 'user', content: 'Hello' }], }); const result = await runtime.step(state); // Should call agent runner, get call_llm instruction, but fail due to no llmProvider expect(result.events).toHaveLength(1); expect(result.events[0].type).toBe('error'); expect(result.newState.status).toBe('error'); }); it('should handle errors gracefully', async () => { const agent = new MockAgent(); agent.runner = vi.fn().mockImplementation(() => Promise.reject(new Error('Agent error'))); const runtime = new AgentRuntime(agent); const state = AgentRuntime.createInitialState({ sessionId: 'test-session' }); const result = await runtime.step(state); expect(result.events).toHaveLength(1); expect(result.events[0]).toMatchObject({ type: 'error', error: expect.any(Error), }); expect(result.newState.status).toBe('error'); expect(result.newState.error).toBeInstanceOf(Error); }); }); describe('Built-in Executors', () => { describe('call_llm executor', () => { it('should require modelRuntime', async () => { const agent = new MockAgent(); const runtime = new AgentRuntime(agent); const state = AgentRuntime.createInitialState({ sessionId: 'test-session', messages: [{ role: 'user', content: 'Hello' }], }); const result = await runtime.step(state); expect(result.events[0].type).toBe('error'); expect((result.events[0] as AgentEventError).error.message).toContain( 'Model Runtime is required', ); }); it('should handle streaming LLM response', async () => { const agent = new MockAgent(); async function* mockModelRuntime(payload: unknown) { yield { content: 'Hello' }; yield { content: ' world' }; yield { content: '!' }; } agent.modelRuntime = mockModelRuntime; const runtime = new AgentRuntime(agent); const state = AgentRuntime.createInitialState({ sessionId: 'test-session', messages: [{ role: 'user', content: 'Hello' }], }); const result = await runtime.step(state); expect(result.events).toHaveLength(5); // start + 3 streams + result expect(result.events[0]).toMatchObject({ type: 'llm_start', payload: expect.anything(), }); expect(result.events[1]).toMatchObject({ type: 'llm_stream', chunk: { content: 'Hello' }, }); expect(result.events[4]).toMatchObject({ type: 'llm_result', result: { content: 'Hello world!', tool_calls: [] }, }); // In the new architecture, call_llm executor doesn't add messages to state // It only returns events, messages should be handled by higher-level logic expect(result.newState.messages).toHaveLength(1); // Only user message expect(result.newState.status).toBe('running'); }); it('should handle LLM response with tool calls', async () => { const agent = new MockAgent(); async function* mockModelRuntime(payload: unknown) { yield { content: 'I need to use a tool' }; yield { tool_calls: [ { id: 'call_123', type: 'function' as const, function: { name: 'test_tool', arguments: '{}' }, }, ], }; } agent.modelRuntime = mockModelRuntime; const runtime = new AgentRuntime(agent); const state = AgentRuntime.createInitialState({ sessionId: 'test-session', messages: [{ role: 'user', content: 'Hello' }], }); const result = await runtime.step(state); // In the new architecture, call_llm executor doesn't add messages to state // Check that the events contain the expected LLM result expect(result.events).toContainEqual( expect.objectContaining({ type: 'llm_result', result: expect.objectContaining({ content: 'I need to use a tool', tool_calls: [ { id: 'call_123', type: 'function', function: { name: 'test_tool', arguments: '{}' }, }, ], }), }), ); }); }); describe('call_tool executor', () => { it('should execute tool and add result to messages', async () => { const agent = new MockAgent(); agent.tools = { calculator: vi.fn().mockResolvedValue({ result: 42 }), }; const runtime = new AgentRuntime(agent); const state = AgentRuntime.createInitialState({ sessionId: 'test-session' }); const toolCall: ToolsCalling = { id: 'call_123', type: 'function', function: { name: 'calculator', arguments: '{"expression": "2+2"}', }, }; const result = await runtime.approveToolCall(state, toolCall); expect((agent.tools as any).calculator).toHaveBeenCalledWith({ expression: '2+2' }); expect(result.events).toHaveLength(1); expect(result.events[0]).toMatchObject({ type: 'tool_result', id: 'call_123', result: { result: 42 }, }); expect(result.newState.messages).toHaveLength(1); expect(result.newState.messages[0]).toMatchObject({ role: 'tool', tool_call_id: 'call_123', content: '{"result":42}', }); }); it('should throw error for unknown tool', async () => { const agent = new MockAgent(); const runtime = new AgentRuntime(agent); const state = AgentRuntime.createInitialState({ sessionId: 'test-session' }); const toolCall: ToolsCalling = { id: 'call_123', type: 'function', function: { name: 'unknown_tool', arguments: '{}', }, }; const result = await runtime.approveToolCall(state, toolCall); expect(result.events[0].type).toBe('error'); expect((result.events[0] as AgentEventError).error.message).toContain( 'Tool not found: unknown_tool', ); }); }); describe('human interaction executors', () => { it('should handle human approve request', async () => { const agent = new MockAgent(); // Mock agent to return human approve instruction agent.runner = vi.fn().mockImplementation(() => Promise.resolve({ type: 'request_human_approve', pendingToolsCalling: [ { id: 'call_123', type: 'function', function: { name: 'test_tool', arguments: '{}' }, }, ], }), ); const runtime = new AgentRuntime(agent); const state = AgentRuntime.createInitialState({ sessionId: 'test-session' }); const result = await runtime.step(state); expect(result.events).toHaveLength(2); expect(result.events[0]).toMatchObject({ type: 'human_approve_required', sessionId: 'test-session', }); expect(result.events[1]).toMatchObject({ type: 'tool_pending', }); expect(result.newState.status).toBe('waiting_for_human'); expect(result.newState.pendingToolsCalling).toBeDefined(); }); it('should handle human prompt request', async () => { const agent = new MockAgent(); agent.runner = vi.fn().mockImplementation(() => Promise.resolve({ type: 'request_human_prompt', prompt: 'Please provide input', metadata: { key: 'value' }, }), ); const runtime = new AgentRuntime(agent); const state = AgentRuntime.createInitialState({ sessionId: 'test-session' }); const result = await runtime.step(state); expect(result.events).toHaveLength(1); expect(result.events[0]).toMatchObject({ type: 'human_prompt_required', prompt: 'Please provide input', metadata: { key: 'value' }, sessionId: 'test-session', }); expect(result.newState.status).toBe('waiting_for_human'); expect(result.newState.pendingHumanPrompt).toEqual({ prompt: 'Please provide input', metadata: { key: 'value' }, }); }); it('should handle human select request', async () => { const agent = new MockAgent(); agent.runner = vi.fn().mockImplementation(() => Promise.resolve({ type: 'request_human_select', prompt: 'Choose an option', options: [ { label: 'Option 1', value: 'opt1' }, { label: 'Option 2', value: 'opt2' }, ], multi: false, }), ); const runtime = new AgentRuntime(agent); const state = AgentRuntime.createInitialState({ sessionId: 'test-session' }); const result = await runtime.step(state); expect(result.events).toHaveLength(1); expect(result.events[0]).toMatchObject({ type: 'human_select_required', prompt: 'Choose an option', options: [ { label: 'Option 1', value: 'opt1' }, { label: 'Option 2', value: 'opt2' }, ], multi: false, sessionId: 'test-session', }); expect(result.newState.status).toBe('waiting_for_human'); }); }); describe('finish executor', () => { it('should mark conversation as done', async () => { const agent = new MockAgent(); agent.runner = vi.fn().mockImplementation(() => Promise.resolve({ type: 'finish', reason: 'completed', reasonDetail: 'Task completed', }), ); const runtime = new AgentRuntime(agent); const state = AgentRuntime.createInitialState({ sessionId: 'test-session' }); const result = await runtime.step(state); expect(result.events).toHaveLength(1); expect(result.events[0]).toMatchObject({ type: 'done', finalState: expect.objectContaining({ status: 'done', }), reason: 'completed', reasonDetail: 'Task completed', }); expect(result.newState.status).toBe('done'); }); }); }); describe('createInitialState', () => { it('should create initial state without message', () => { const state = AgentRuntime.createInitialState({ sessionId: 'test-session' }); expect(state).toMatchObject({ sessionId: 'test-session', status: 'idle', messages: [], stepCount: 0, createdAt: expect.any(String), lastModified: expect.any(String), }); }); it('should create initial state with message', () => { const state = AgentRuntime.createInitialState({ sessionId: 'test-session', messages: [{ role: 'user', content: 'Hello world' }], }); expect(state.messages).toHaveLength(1); expect(state.messages[0]).toMatchObject({ role: 'user', content: 'Hello world', }); expect(state.stepCount).toBe(0); }); it('should create initial state with custom stepCount', () => { const state = AgentRuntime.createInitialState({ sessionId: 'test-session', stepCount: 5, }); expect(state.stepCount).toBe(5); }); it('should create initial state with maxSteps limit', () => { const state = AgentRuntime.createInitialState({ sessionId: 'test-session', maxSteps: 10, }); expect(state.maxSteps).toBe(10); expect(state.stepCount).toBe(0); }); }); describe('Step Count Tracking', () => { it('should increment stepCount on each step execution', async () => { const agent = new MockAgent(); const runtime = new AgentRuntime(agent); let state = AgentRuntime.createInitialState({ sessionId: 'test-session' }); expect(state.stepCount).toBe(0); // First step const result1 = await runtime.step(state, createTestContext('user_input')); expect(result1.newState.stepCount).toBe(1); // Second step const result2 = await runtime.step(result1.newState, createTestContext('user_input')); expect(result2.newState.stepCount).toBe(2); }); it('should respect maxSteps limit', async () => { const agent = new MockAgent(); // Add a mock modelRuntime to avoid LLM provider error agent.modelRuntime = async function* () { yield { content: 'test response' }; }; const runtime = new AgentRuntime(agent); const state = AgentRuntime.createInitialState({ sessionId: 'test-session', maxSteps: 3, // 允许 3 步 }); // First step - should work const result1 = await runtime.step(state, createTestContext('user_input')); expect(result1.newState.stepCount).toBe(1); expect(result1.newState.status).not.toBe('error'); // Second step - should work const result2 = await runtime.step(result1.newState, createTestContext('user_input')); expect(result2.newState.stepCount).toBe(2); expect(result2.newState.status).not.toBe('error'); // Third step - should work (at limit) const result3 = await runtime.step(result2.newState, createTestContext('user_input')); expect(result3.newState.stepCount).toBe(3); expect(result3.newState.status).not.toBe('error'); // Fourth step - should finish due to maxSteps const result4 = await runtime.step(result3.newState, createTestContext('user_input')); expect(result4.newState.stepCount).toBe(4); expect(result4.newState.status).toBe('done'); expect(result4.events[0]).toMatchObject({ type: 'done', finalState: expect.objectContaining({ status: 'done', }), reason: 'max_steps_exceeded', reasonDetail: 'Maximum steps exceeded: 3', }); }); it('should include stepCount in session context', async () => { const agent = new MockAgent(); // Mock agent to check the context it receives const runnerSpy = vi.spyOn(agent, 'runner'); const runtime = new AgentRuntime(agent); const state = AgentRuntime.createInitialState({ sessionId: 'test-session', stepCount: 5, // Start with step 5 messages: [{ role: 'user', content: 'test' }], }); // Don't provide context, let runtime create it with updated stepCount await runtime.step(state); // Check that agent received correct stepCount in context expect(runnerSpy).toHaveBeenCalledWith( expect.objectContaining({ session: expect.objectContaining({ stepCount: 6, // Should be incremented }), }), expect.any(Object), ); }); }); describe('Interruption Handling', () => { it('should interrupt execution with reason and metadata', () => { const agent = new MockAgent(); const runtime = new AgentRuntime(agent); const state = AgentRuntime.createInitialState({ sessionId: 'test-session', stepCount: 3, }); const result = runtime.interrupt(state, 'User requested stop', true, { userAction: 'stop_button', }); expect(result.newState.status).toBe('interrupted'); expect(result.newState.interruption).toMatchObject({ reason: 'User requested stop', canResume: true, interruptedAt: expect.any(String), }); expect(result.events[0]).toMatchObject({ type: 'interrupted', reason: 'User requested stop', canResume: true, metadata: { userAction: 'stop_button' }, interruptedAt: expect.any(String), }); }); it('should resume from interrupted state', async () => { const agent = new MockAgent(); agent.modelRuntime = async function* () { yield { content: 'resumed response' }; }; const runtime = new AgentRuntime(agent); // Create interrupted state let state = AgentRuntime.createInitialState({ sessionId: 'test-session' }); const interruptResult = runtime.interrupt(state, 'Test interruption'); // Resume execution const resumeResult = await runtime.resume(interruptResult.newState, 'Test resume'); expect(resumeResult.newState.status).toBe('running'); expect(resumeResult.newState.interruption).toBeUndefined(); expect(resumeResult.events[0]).toMatchObject({ type: 'resumed', reason: 'Test resume', resumedFromStep: 0, resumedAt: expect.any(String), }); }); it('should not allow resume if canResume is false', async () => { const agent = new MockAgent(); const runtime = new AgentRuntime(agent); let state = AgentRuntime.createInitialState({ sessionId: 'test-session' }); const interruptResult = runtime.interrupt(state, 'Fatal error', false); await expect(runtime.resume(interruptResult.newState)).rejects.toThrow( 'Cannot resume: interruption is not resumable', ); }); it('should not allow resume from non-interrupted state', async () => { const agent = new MockAgent(); const runtime = new AgentRuntime(agent); const state = AgentRuntime.createInitialState({ sessionId: 'test-session' }); await expect(runtime.resume(state)).rejects.toThrow( 'Cannot resume: state is not interrupted', ); }); it('should resume with specific context', async () => { const agent = new MockAgent(); agent.modelRuntime = async function* () { yield { content: 'context-specific response' }; }; const runtime = new AgentRuntime(agent); let state = AgentRuntime.createInitialState({ sessionId: 'test-session', messages: [{ role: 'user', content: 'Hello' }], }); const interruptResult = runtime.interrupt(state, 'Test interruption'); const resumeContext: AgentRuntimeContext = { phase: 'user_input', payload: { message: { role: 'user', content: 'Hello' } }, session: { sessionId: 'test-session', messageCount: 1, status: 'interrupted', stepCount: 0, }, }; const resumeResult = await runtime.resume( interruptResult.newState, 'Resume with context', resumeContext, ); expect(resumeResult.events.length).toBeGreaterThanOrEqual(2); // resume + llm events (start, stream, result) expect(resumeResult.events[0].type).toBe('resumed'); expect(resumeResult.newState.status).toBe('running'); // Should contain LLM execution events expect(resumeResult.events.map((e) => e.type)).toContain('llm_start'); expect(resumeResult.events.map((e) => e.type)).toContain('llm_result'); }); }); describe('Usage and Cost Tracking', () => { it('should initialize with zero usage and cost', () => { const state = AgentRuntime.createInitialState({ sessionId: 'test-session' }); expect(state.usage).toMatchObject({ llm: { tokens: { input: 0, output: 0, total: 0 }, apiCalls: 0, processingTimeMs: 0, }, tools: { totalCalls: 0, byTool: [], totalTimeMs: 0, }, humanInteraction: { approvalRequests: 0, promptRequests: 0, selectRequests: 0, totalWaitingTimeMs: 0, }, }); expect(state.cost).toMatchObject({ llm: { byModel: [], total: 0, currency: 'USD', }, tools: { byTool: [], total: 0, currency: 'USD', }, total: 0, currency: 'USD', calculatedAt: expect.any(String), }); }); it('should track usage and cost through agent methods', async () => { // Create agent with cost calculation methods class CostTrackingAgent implements Agent { tools = { test_tool: async () => ({ result: 'success' }), }; async runner(context: AgentRuntimeContext, state: AgentState) { switch (context.phase) { case 'user_input': return { type: 'call_llm' as const, payload: { messages: state.messages } }; default: return { type: 'finish' as const, reason: 'completed' as const, reasonDetail: 'Done', }; } } calculateUsage( operationType: 'llm' | 'tool' | 'human_interaction', operationResult: any, previousUsage: Usage, ): Usage { const newUsage = structuredClone(previousUsage); if (operationType === 'llm') { newUsage.llm.tokens.input += 100; newUsage.llm.tokens.output += 50; newUsage.llm.tokens.total += 150; newUsage.llm.apiCalls += 1; newUsage.llm.processingTimeMs += 1000; } return newUsage; } calculateCost(context: CostCalculationContext): Cost { const newCost = structuredClone(context.previousCost || (context.usage as any)); // Simple cost calculation: $0.01 per 1000 tokens const tokenCost = (context.usage.llm.tokens.total / 1000) * 0.01; newCost.llm.total = tokenCost; newCost.total = tokenCost; newCost.calculatedAt = new Date().toISOString(); return newCost; } modelRuntime = async function* () { yield { content: 'test response' }; }; } const agent = new CostTrackingAgent(); const runtime = new AgentRuntime(agent); const state = AgentRuntime.createInitialState({ sessionId: 'test-session', messages: [{ role: 'user', content: 'Hello' }], }); const result = await runtime.step(state, createTestContext('user_input')); // Should have updated usage expect(result.newState.usage.llm.tokens.total).toBe(150); expect(result.newState.usage.llm.apiCalls).toBe(1); // Should have calculated cost expect(result.newState.cost.total).toBe(0.0015); // 150 tokens * $0.01/1000 }); it('should respect cost limits with stop action', async () => { class CostTrackingAgent implements Agent { async runner(context: AgentRuntimeContext, state: AgentState) { return { type: 'call_llm' as const, payload: { messages: state.messages } }; } calculateUsage(operationType: string, operationResult: any, previousUsage: Usage): Usage { const newUsage = structuredClone(previousUsage); newUsage.llm.tokens.total += 1000; // High token usage return newUsage; } calculateCost(context: CostCalculationContext): Cost { const newCost = structuredClone(context.previousCost || ({} as Cost)); newCost.total = 10.0; // High cost that exceeds limit newCost.currency = 'USD'; newCost.calculatedAt = new Date().toISOString(); return newCost; } modelRuntime = async function* () { yield { content: 'test response' }; }; } const agent = new CostTrackingAgent(); const runtime = new AgentRuntime(agent); const costLimit: CostLimit = { maxTotalCost: 5.0, currency: 'USD', onExceeded: 'stop', }; const state = AgentRuntime.createInitialState({ sessionId: 'test-session', messages: [{ role: 'user', content: 'Hello' }], costLimit, }); const result = await runtime.step(state, createTestContext('user_input')); expect(result.newState.status).toBe('done'); expect(result.events[0]).toMatchObject({ type: 'done', reason: 'cost_limit_exceeded', reasonDetail: expect.stringContaining('Cost limit exceeded'), }); }); it('should handle cost limit with interrupt action', async () => { class CostTrackingAgent implements Agent { async runner(context: AgentRuntimeContext, state: AgentState) { return { type: 'call_llm' as const, payload: { messages: state.messages } }; } calculateCost(context: CostCalculationContext): Cost { return { llm: { byModel: [], total: 15.0, currency: 'USD' }, tools: { byTool: [], total: 0, currency: 'USD' }, total: 15.0, currency: 'USD', calculatedAt: new Date().toISOString(), }; } modelRuntime = async function* () { yield { content: 'test response' }; }; } const agent = new CostTrackingAgent(); const runtime = new AgentRuntime(agent); const costLimit: CostLimit = { maxTotalCost: 10.0, currency: 'USD', onExceeded: 'interrupt', }; const state = AgentRuntime.createInitialState({ sessionId: 'test-session', messages: [{ role: 'user', content: 'Hello' }], costLimit, }); const result = await runtime.step(state, createTestContext('user_input')); expect(result.newState.status).toBe('interrupted'); expect(result.events[0]).toMatchObject({ type: 'interrupted', reason: expect.stringContaining('Cost limit exceeded'), metadata: expect.objectContaining({ costExceeded: true, }), }); }); }); describe('Integration Tests', () => { it('should complete a full conversation flow', async () => { const agent = new MockAgent(); agent.tools = { get_weather: vi.fn().mockResolvedValue({ temperature: 25, condition: 'sunny', }), }; // Mock agent behavior for different states agent.runner = vi .fn() .mockImplementation((context: AgentRuntimeContext, state: AgentState) => { switch (context.phase) { case 'user_input': return Promise.resolve({ type: 'call_llm', payload: { messages: state.messages } }); case 'llm_result': const llmPayload = context.payload as { result: any; hasToolCalls: boolean }; if (llmPayload.hasToolCalls) { return Promise.resolve({ type: 'request_human_approve', pendingToolsCalling: llmPayload.result.tool_calls, }); } return Promise.resolve({ type: 'finish', reason: 'completed', reasonDetail: 'Done' }); case 'tool_result': return Promise.resolve({ type: 'call_llm', payload: { messages: state.messages } }); default: return Promise.resolve({ type: 'finish', reason: 'completed', reasonDetail: 'Done' }); } }); async function* mockModelRuntime(payload: unknown) { const messages = (payload as any).messages; const lastMessage = messages[messages.length - 1]; if (lastMessage.role === 'user') { yield { content: "I'll check the weather for you." }; yield { tool_calls: [ { id: 'call_weather', type: 'function' as const, function: { name: 'get_weather', arguments: '{"city": "Beijing"}', }, }, ], }; } else if (lastMessage.role === 'tool') { yield { content: 'The weather in Beijing is 25°C and sunny.' }; } } agent.modelRuntime = mockModelRuntime; const runtime = new AgentRuntime(agent); // Step 1: User asks question let state = AgentRuntime.createInitialState({ sessionId: 'test-session', messages: [{ role: 'user', content: "What's the weather in Beijing?" }], }); let result = await runtime.step(state); // Should get LLM response with tool call (status is 'running' after LLM execution) expect(result.newState.status).toBe('running'); // In new architecture, call_llm doesn't add messages to state expect(result.newState.messages).toHaveLength(1); // Only user message // Check events contain the tool call result expect(result.events).toContainEqual( expect.objectContaining({ type: 'llm_result', result: expect.objectContaining({ tool_calls: expect.arrayContaining([ expect.objectContaining({ id: 'call_weather', type: 'function', }), ]), }), }), ); // Step 1.5: Agent processes assistant message with tool calls using nextContext result = await runtime.step(result.newState, result.nextContext); // Now should request human approval expect(result.newState.status).toBe('waiting_for_human'); expect(result.newState.pendingToolsCalling).toHaveLength(1); // Step 2: Approve and execute tool call const toolCall = result.newState.pendingToolsCalling![0]; result = await runtime.approveToolCall(result.newState, toolCall); // Should have executed tool expect((agent.tools as any).get_weather).toHaveBeenCalledWith({ city: 'Beijing' }); expect(result.newState.messages).toHaveLength(2); // user + tool result (call_tool executor adds tool message) // Step 3: LLM processes tool result using nextContext result = await runtime.step(result.newState, result.nextContext); // Should get final response in events expect(result.events).toContainEqual( expect.objectContaining({ type: 'llm_result', result: expect.objectContaining({ content: expect.stringContaining('25°C and sunny'), }), }), ); }); }); describe('Batch Tool Execution', () => { it('should execute multiple tools concurrently with call_tools_batch instruction', async () => { // Agent that returns multiple tool calls class BatchToolAgent implements Agent { tools = { tool_a: vi.fn().mockResolvedValue({ result: 'result_a' }), tool_b: vi.fn().mockResolvedValue({ result: 'result_b' }), tool_c: vi.fn().mockResolvedValue({ result: 'result_c' }), }; async runner(context: AgentRuntimeContext, _state: AgentState) { if (context.phase === 'user_input') { return { payload: [ { id: 'call_a', type: 'function' as const, function: { name: 'tool_a', arguments: '{}' }, }, { id: 'call_b', type: 'function' as const, function: { name: 'tool_b', arguments: '{}' }, }, { id: 'call_c', type: 'function' as const, function: { name: 'tool_c', arguments: '{}' }, }, ], type: 'call_tools_batch' as const, }; } return { type: 'finish' as const, reason: 'completed' as const }; } } const agent = new BatchToolAgent(); const runtime = new AgentRuntime(agent); const state = AgentRuntime.createInitialState({ sessionId: 'batch-test', messages: [{ role: 'user', content: 'Execute tools' }], }); const result = await runtime.step(state); // Should have executed all 3 tools expect(agent.tools.tool_a).toHaveBeenCalled(); expect(agent.tools.tool_b).toHaveBeenCalled(); expect(agent.tools.tool_c).toHaveBeenCalled(); // Should have 3 tool result events expect(result.events.filter((e) => e.type === 'tool_result')).toHaveLength(3); // Should have 3 tool messages in state const toolMessages = result.newState.messages.filter((m) => m.role === 'tool'); expect(toolMessages).toHaveLength(3); // Should have tools_batch_result phase in nextContext expect(result.nextContext?.phase).toBe('tools_batch_result'); expect(result.nextContext?.payload).toHaveProperty('toolCount', 3); }); it('should support agent returning instruction array', async () => { // Agent that returns array of instructions class ArrayReturnAgent implements Agent { tools = { tool_1: vi.fn().mockResolvedValue({ result: 'tool_1_result' }), tool_2: vi.fn().mockResolvedValue({ result: 'tool_2_result' }), }; async runner(context: AgentRuntimeContext, _state: AgentState) { if (context.phase === 'user_input') { // Return array of instructions return [ { payload: { id: 'call_1', type: 'function' as const, function: { name: 'tool_1', arguments: '{}' }, }, type: 'call_tool' as const, }, { payload: { id: 'call_2', type: 'function' as const, function: { name: 'tool_2', arguments: '{}' }, }, type: 'call_tool' as const, }, ]; } return { type: 'finish' as const, reason: 'completed' as const }; } } const agent = new ArrayReturnAgent(); const runtime = new AgentRuntime(agent); const state = AgentRuntime.createInitialState({ sessionId: 'array-test', messages: [{ role: 'user', content: 'Execute tools' }], }); const result = await runtime.step(state); // Should have executed both tools sequentially expect(agent.tools.tool_1).toHaveBeenCalled(); expect(agent.tools.tool_2).toHaveBeenCalled(); // Should have 2 tool result events expect(result.events.filter((e) => e.type === 'tool_result')).toHaveLength(2); // Should have 2 tool messages in state const toolMessages = result.newState.messages.filter((m) => m.role === 'tool'); expect(toolMessages).toHaveLength(2); }); it('should stop execution when encountering blocking status', async () => { // Agent that returns mixed instructions with approval class BlockingAgent implements Agent { tools = { safe_tool: vi.fn().mockResolvedValue({ result: 'safe_result' }), }; async runner(context: AgentRuntimeContext, _state: AgentState) { if (context.phase === 'user_input') { // Return array: safe tool + approval request return [ { payload: { id: 'call_safe', type: 'function' as const, function: { name: 'safe_tool', arguments: '{}' }, }, type: 'call_tool' as const, }, { pendingToolsCalling: [ { id: 'call_danger', type: 'function' as const, function: { name: 'danger_tool', arguments: '{}' }, }, ], type: 'request_human_approve' as const, }, ]; } return { type: 'finish' as const, reason: 'completed' as const }; } } const agent = new BlockingAgent(); const runtime = new AgentRuntime(agent); const state = AgentRuntime.createInitialState({ sessionId: 'blocking-test', messages: [{ role: 'user', content: 'Execute' }], }); const result = await runtime.step(state); // Safe tool should have been executed expect(agent.tools.safe_tool).toHaveBeenCalled(); // Should be in waiting state (blocked by approval request) expect(result.newState.status).toBe('waiting_for_human'); // Should have pending tool calls expect(result.newState.pendingToolsCalling).toHaveLength(1); expect(result.newState.pendingToolsCalling![0].function.name).toBe('danger_tool'); // Should have both tool_result and human_approve_required events expect(result.events).toContainEqual(expect.objectContaining({ type: 'tool_result' })); expect(result.events).toContainEqual( expect.objectContaining({ type: 'human_approve_required' }), ); }); it('should merge tool results correctly', async () => { // Agent that returns batch with tools that modify usage/cost class UsageTrackingAgent implements Agent { tools = { expensive_tool: vi.fn().mockResolvedValue({ cost: 10 }), cheap_tool: vi.fn().mockResolvedValue({ cost: 1 }), }; calculateUsage( operationType: 'llm' | 'tool' | 'human_interaction', operationResult: any, previousUsage: Usage, ): Usage { if (operationType === 'tool') { return { ...previousUsage, tools: { ...previousUsage.tools, totalCalls: previousUsage.tools.totalCalls + 1, totalTimeMs: previousUsage.tools.totalTimeMs + 100, }, }; } return previousUsage; } async runner(context: AgentRuntimeContext, _state: AgentState) { if (context.phase === 'user_input') { return { payload: [ { id: 'call_expensive', type: 'function' as const, function: { name: 'expensive_tool', arguments: '{}' }, }, { id: 'call_cheap', type: 'function' as const, function: { name: 'cheap_tool', arguments: '{}' }, }, ], type: 'call_tools_batch' as const, }; } return { type: 'finish' as const, reason: 'completed' as const }; } } const agent = new UsageTrackingAgent(); const runtime = new AgentRuntime(agent); const state = AgentRuntime.createInitialState({ sessionId: 'merge-test', messages: [{ role: 'user', content: 'Execute' }], }); const result = await runtime.step(state); // Both tools should have been called expect(agent.tools.expensive_tool).toHaveBeenCalled(); expect(agent.tools.cheap_tool).toHaveBeenCalled(); // Usage should be merged (2 tools called) expect(result.newState.usage.tools.totalCalls).toBe(2); }); }); describe('Edge Cases and Error Handling', () => { it('should handle unknown instruction type', async () => { const agent = new MockAgent(); agent.runner = vi.fn().mockResolvedValue({ type: 'unknown_instruction_type' as any }); const runtime = new AgentRuntime(agent); const state = AgentRuntime.createInitialState({ sessionId: 'test-session' }); const result = await runtime.step(state); expect(result.events[0].type).toBe('error'); expect((result.events[0] as AgentEventError).error.message).toContain( 'No executor found for instruction type', ); }); it('should handle LLM errors', async () => { const agent = new MockAgent(); agent.modelRuntime = async function* () { throw new Error('LLM API error'); }; const runtime = new AgentRuntime(agent); const state = AgentRuntime.createInitialState({ messages: [{ role: 'user', content: 'test' }], sessionId: 'test-session', }); const result = await runtime.step(state); expect(result.events[0].type).toBe('error'); expect((result.events[0] as AgentEventError).error.message).toBe('LLM API error'); }); it('should handle cost limit with warn action', async () => { class WarnCostAgent implements Agent { async runner(context: AgentRuntimeContext, state: AgentState) { return { type: 'call_llm' as const, payload: { messages: state.messages } }; } calculateCost(context: CostCalculationContext): Cost { return { calculatedAt: new Date().toISOString(), currency: 'USD', llm: { byModel: [], currency: 'USD', total: 15.0 }, tools: { byTool: [], currency: 'USD', total: 0 }, total: 15.0, }; } modelRuntime = async function* () { yield { content: 'test' }; }; } const agent = new WarnCostAgent(); const runtime = new AgentRuntime(agent); const costLimit: CostLimit = { currency: 'USD', maxTotalCost: 10.0, onExceeded: 'warn', }; const state = AgentRuntime.createInitialState({ costLimit, messages: [{ role: 'user', content: 'test' }], sessionId: 'test-session', }); const result = await runtime.step(state); expect(result.events[0]).toMatchObject({ type: 'error', }); expect((result.events[0] as AgentEventError).error.message).toContain( 'Warning: Cost limit exceeded', ); expect(result.newState.status).toBe('running'); }); it('should track tool cost limits', async () => { class ToolCostAgent implements Agent { tools = { expensive_tool: vi.fn().mockResolvedValue({ result: 'done' }), }; async runner(context: AgentRuntimeContext, state: AgentState) { if (context.phase === 'user_input') { return { payload: { apiName: 'expensive_tool', arguments: '{}', id: 'call_1', identifier: 'expensive_tool', type: 'default' as const, }, type: 'call_tool' as const, }; } return { reason: 'completed' as const, type: 'finish' as const }; } calculateCost(context: CostCalculationContext): Cost { return { calculatedAt: new Date().toISOString(), currency: 'USD', llm: { byModel: [], currency: 'USD', total: 0 }, tools: { byTool: [], currency: 'USD', total: 20.0 }, total: 20.0, }; } } const agent = new ToolCostAgent(); const runtime = new AgentRuntime(agent); const costLimit: CostLimit = { currency: 'USD', maxTotalCost: 10.0, onExceeded: 'stop', }; const state = AgentRuntime.createInitialState({ costLimit, messages: [{ role: 'user', content: 'test' }], sessionId: 'test-session', }); const result = await runtime.step(state); expect(result.newState.status).toBe('done'); expect(result.events[0]).toMatchObject({ reason: 'cost_limit_exceeded', type: 'done', }); }); it('should merge cost statistics in batch tool execution', async () => { class BatchCostAgent implements Agent { tools = { tool_1: vi.fn().mockResolvedValue({ result: 'result_1' }), tool_2: vi.fn().mockResolvedValue({ result: 'result_2' }), }; calculateCost(context: CostCalculationContext): Cost { const baseCost = context.previousCost || { calculatedAt: new Date().toISOString(), currency: 'USD', llm: { byModel: [], currency: 'USD', total: 0 }, tools: { byTool: [], currency: 'USD', total: 0 }, total: 0, }; return { ...baseCost, calculatedAt: new Date().toISOString(), tools: { byTool: [], currency: 'USD', total: baseCost.tools.total + 5.0, }, total: baseCost.total + 5.0, }; } async runner(context: AgentRuntimeContext, _state: AgentState) { if (context.phase === 'user_input') { return { payload: [ { apiName: 'tool_1', arguments: '{}', id: 'call_1', identifier: 'tool_1', type: 'default' as const, }, { apiName: 'tool_2', arguments: '{}', id: 'call_2', identifier: 'tool_2', type: 'default' as const, }, ], type: 'call_tools_batch' as const, }; } return { reason: 'completed' as const, type: 'finish' as const }; } } const agent = new BatchCostAgent(); const runtime = new AgentRuntime(agent); const state = AgentRuntime.createInitialState({ messages: [{ role: 'user', content: 'Execute' }], sessionId: 'cost-merge-test', }); const result = await runtime.step(state); // Cost should be merged from both tools expect(result.newState.cost.tools.total).toBeGreaterThan(0); expect(result.newState.cost.total).toBeGreaterThan(0); }); it('should merge per-too