magnitude-core
Version:
Magnitude e2e testing agent
343 lines (342 loc) • 15.8 kB
JavaScript
import logger from '@/logger';
import EventEmitter from "eventemitter3";
import z from "zod";
import { Observation } from '@/memory/observation';
import { AgentError } from "@/agent/errors";
import { AgentMemory } from "@/memory";
import { taskActions } from "@/actions/taskActions";
import { traceAsync } from "@/ai/baml_client";
import { telemetrifyAgent } from '@/telemetry/events';
import { isClaude } from '@/ai/util';
import { retryOnError } from '@/common';
import { renderContentParts } from '@/memory/rendering';
import { MultiModelHarness } from '@/ai/multiModelHarness';
// Options for the startAgent helper function
const DEFAULT_CONFIG = {
actions: [...taskActions], // Default to taskActions; other actions come from connectors
connectors: [],
llm: {
provider: 'google-ai',
options: {
model: 'gemini-2.5-pro-preview-05-06',
apiKey: process.env.GOOGLE_API_KEY || "YOUR_GOOGLE_API_KEY"
}
},
prompt: null,
telemetry: true,
};
export class Agent {
// maybe remove conns/actions from options since stored sep
options; //Omit<Required<AgentOptions>, 'actions'>;
connectors;
actions; // actions from connectors + any other additional ones configured
memoryOptions;
models;
//public readonly model: ModelHarness;
//public readonly micro: GroundingService;
//public readonly events: EventEmitter<AgentEvents>;
//protected readonly _emitter: EventEmitter<AgentEvents>;
events = new EventEmitter();
//public readonly memory: AgentMemory;
doneActing;
latestTaskMemory; // | null = null;
constructor(baseConfig = {}) {
this.options = {
...DEFAULT_CONFIG,
...baseConfig,
connectors: baseConfig.connectors ?? [],
actions: [...(baseConfig.actions || DEFAULT_CONFIG.actions)],
};
this.connectors = this.options.connectors;
// Aggregate actions from connectors
//const aggregatedActions = [...this.options.actions];
this.actions = [...this.options.actions];
for (const connector of this.connectors) {
this.actions.push(...(connector.getActionSpace ? connector.getActionSpace() : []));
}
// Deduplicate actions by name
// TODO: maybe error instead, or automatically differentiate them?
//this.options.actions = Array.from(new Map(aggregatedActions.map(actDef => [actDef.name, actDef])).values());
const llms = Array.isArray(this.options.llm) ? this.options.llm : [this.options.llm];
let doPromptCaching = false;
for (const client of llms) {
// If any LLM is prompt-caching compatible, turn on prompt caching overall for memory etc.
if (isClaude(client) && (client.provider === 'anthropic' || client.provider === 'claude-code')) {
// Prompt-caching compatible client
if ('promptCaching' in client.options && client.options.promptCaching !== undefined) {
doPromptCaching = client.options.promptCaching;
}
else {
// Default to true if not specified, and override on client config to true
doPromptCaching = true;
client.options.promptCaching = true;
}
}
}
//this.model = new ModelHarness({ llm: this.options.llm });
this.models = new MultiModelHarness(llms);
this.models.events.on('tokensUsed', (usage) => this.events.emit('tokensUsed', usage), this);
this.doneActing = false;
this.memoryOptions = {
// TODO: maybe do if Gemini or other prompt caching supported providers as well
// Claude supports prompt caching but only via Anthropic, not on Bedrock
promptCaching: doPromptCaching
};
// Empty memory will get replaced on first act(), but this prevents errors from having undefined memory
this.latestTaskMemory = new AgentMemory(this.memoryOptions);
}
getConnector(connectorClass) {
return this.connectors.find(c => c instanceof connectorClass);
}
require(connectorClass) {
const connector = this.getConnector(connectorClass);
if (!connector)
throw new Error(`Missing required connector ${connectorClass}`);
return connector;
}
async start() {
// Register telemetry if enabled - do on start instead of cons to prevent weird subclass event issues
if (this.options.telemetry)
telemetrifyAgent(this);
//console.log('setting up model')
await this.models.setup();
//console.log('done setting up model')
logger.info("Agent: Starting connectors...");
for (const connector of this.connectors) {
if (connector.onStart)
await connector.onStart();
}
this.events.emit('start');
logger.info("Agent: All connectors started.");
// logger.info("Making initial observations...");
// await this._recordConnectorObservations();
// logger.info("Initial observations recorded");
// Initial observations are handled by the first getObservations call in exec
}
identifyAction(action) {
// Get definition corresponding to an action
const actionDefinition = this.actions.find(def => def.name === action.variant);
if (!actionDefinition) {
// It's possible the action name was from a connector that is no longer active,
// or the action space was not correctly aggregated.
throw new AgentError(`Undefined action type '${action.variant}'. Ensure agent is configured with appropriate action definitions from connectors.`);
}
return actionDefinition;
}
async exec(action, memory) {
/**
* Execute an action that belongs to this Agent's action space.
* Provide memory to record the action taken, its results, and any connector observations to that memory.
*/
let actionDefinition = this.identifyAction(action);
let input;
if (actionDefinition.schema instanceof z.ZodObject) {
let variant;
({ variant, ...input } = action);
}
else {
input = action.input;
}
let parsed = actionDefinition.schema.safeParse(input);
if (!parsed.success) {
throw new AgentError(`Generated action '${action.variant}' violates input schema: ${parsed.error.message}`, { adaptable: true });
}
this.events.emit('actionStarted', action);
const data = await actionDefinition.resolver({ input: parsed.data, agent: this });
this.events.emit('actionDone', action);
if (memory) {
// Record action taken
memory.recordObservation(Observation.fromActionTaken(actionDefinition.name, JSON.stringify(action)));
// Record results of action
if (data) {
memory.recordObservation(Observation.fromActionResult(actionDefinition.name, data));
}
// Collect and record observations from connectors
await this._recordConnectorObservations(memory);
}
}
async _recordConnectorObservations(memory) {
for (const connector of this.connectors) {
// could do Promise.all if matters
const connObservations = connector.collectObservations ? await connector.collectObservations() : [];
//observations.push(...connObservations);
for (const obs of connObservations) {
memory.recordObservation(obs);
}
}
}
get memory() {
//if (!this.latestTaskMemory) throw new Error("No memory available");
return this.latestTaskMemory;
}
async act(taskOrSteps, options = {}) {
const instructions = [
...(this.options.prompt ? [this.options.prompt] : []),
...(options.prompt ? [options.prompt] : []),
].join('\n');
const taskMemory = options.memory ?? new AgentMemory({ ...this.memoryOptions, instructions: instructions === '' ? undefined : instructions });
if (Array.isArray(taskOrSteps)) {
const steps = taskOrSteps;
//this.events.emit('actStarted', steps.join(', '));
// trace overall task
await (traceAsync('multistep', async (steps, options) => {
for (const step of steps) {
this.events.emit('actStarted', step, options);
await this._traceAct(step, taskMemory, options);
this.events.emit('actDone', step, options);
}
})(steps, options));
//this.events.emit('actDone', steps.join(', '));
}
else {
const task = taskOrSteps;
this.events.emit('actStarted', task, options);
await this._traceAct(task, taskMemory, options);
this.events.emit('actDone', task, options);
}
}
async _traceAct(task, memory, options = {}) {
// memory not serializable to trace so bake it
await (traceAsync('act', async (task) => {
await this._act(task, memory, options);
})(task));
}
async _buildContext(memory) {
const messages = await memory.render();
const connectorInstructions = [];
for (const connector of this.connectors) {
if (connector.getInstructions) {
const instructions = await connector.getInstructions();
if (instructions) {
connectorInstructions.push({
connectorId: connector.id,
instructions: instructions
});
}
}
}
return {
instructions: memory.instructions,
observationContent: messages,
//observationContent: content,
connectorInstructions: connectorInstructions
};
}
async _act(description, memory, options = {}) {
this.doneActing = false;
logger.info(`Act: ${description}`);
// for now simply add data to task
let dataContentParts = [];
if (options.data) {
//description += "\nUse the following data where appropriate:\n";
// description += "\n<data>\n";
// // if (typeof options.data === 'string') {
// // description += options.data;
// // } else {
// // description += Object.entries(options.data).map(([k, v]) => `${k}: ${v}`).join("\n");
// // }
// const parts = renderParts(options.data);
// description += "\n</data>";
dataContentParts = await renderContentParts(options.data, { mode: 'json', indent: 2 });
}
//this.events.emit('stepStart', description);
//const testData = convertOptionsToTestData(options);
// Initialize task memory and record initial observations
// Combine any agent-level and task-level instructions
this.latestTaskMemory = memory;
// record initial observations
logger.info("Making initial observations...");
await this._recordConnectorObservations(memory);
logger.info("Initial observations recorded");
while (true) {
// Removed direct screenshot/tabState access here; it's part of memoryContext via connectors
logger.info(`Creating partial recipe`);
let reasoning = "";
let actions = [];
try {
const memoryContext = await this._buildContext(memory);
await retryOnError(async () => {
({ reasoning, actions } = await this.models.partialAct(memoryContext, description, dataContentParts, this.actions));
if (actions.length === 0) {
// Empty action list behavior - default wait else ... err? what if not in action space?
//actions.push()
throw new AgentError(`No actions generated`);
}
},
// HTTP body is not JSON - comes from Anthropic sometimes, weird error
// Sometimes Anthropic will give 401 Unauthorized randomly even when authorized
{
mode: 'retry_on_partial_message',
errorSubstrings: ['HTTP body is not JSON', '401 Unauthorized', 'No actions generated'],
retryLimit: 3,
delayMs: 1000,
showWarnOnRetry: true
});
}
catch (error) {
logger.error(`Error planning actions: ${error instanceof Error ? error.message : String(error)}`);
/**
* (1) Failure to conform to JSON
* (2) Misconfigured BAML client / bad API key
* (3) Network error (past max retries)
*/
// this.fail({
// variant: 'misalignment',
// message: `Could not create partial recipe -> ${(error as Error).message}`
// });
throw new AgentError(`Error planning actions: ${error.message}`, { variant: 'misalignment' });
}
logger.info({ reasoning, actions }, `Partial recipe created`);
// Could be emitted in memory and bubbled up instead of recordThought was called in more places
this.events.emit('thought', reasoning);
memory.recordThought(reasoning);
// Execute partial recipe
for (const action of actions) {
await this.exec(action, memory);
// const postActionScreenshot = await this.screenshot();
// const actionDescriptor: ActionDescriptor = { ...action, screenshot: postActionScreenshot.image } as ActionDescriptor;
// this.events.emit('action', actionDescriptor);
logger.info({ action }, `Action taken`);
}
// If macro expects these actions should complete the step, break
// if (finished) {
// break;
// }
if (this.doneActing) {
break;
}
}
logger.info(`Done with step`);
//this.events.emit('stepSuccess');
//this.currentTaskMemory = null;
}
async query(query, schema) {
// Record observations in case no act() was used beforehand
await this._recordConnectorObservations(this.latestTaskMemory);
const memoryContext = await this._buildContext(this.memory); //this.memory.buildContext(this.connectors);
return await this.models.query(memoryContext, query, schema);
}
async queueDone() {
this.doneActing = true;
}
async stop() {
/**
* Stop the agent and close the browser context.
* May be called asynchronously and interrupt an agent in the middle of a action sequence.
*/
// set signal to cancelled?
logger.info("Agent: Stopping connectors...");
for (const connector of this.connectors) {
try {
if (connector.onStop)
await connector.onStop();
}
catch (error) {
logger.warn(`Agent: Error stopping connector ${connector.id}: ${error instanceof Error ? error.message : String(error)}`);
}
}
this.events.emit('stop');
logger.info("Agent: All connectors stopped.");
logger.info("Agent: Stopped successfully.");
}
}