UNPKG

browser-use-typescript

Version:

A TypeScript-based browser automation framework

1,214 lines (1,032 loc) 46.9 kB
import { BaseChatModel } from "@langchain/core/language_models/chat_models"; import { HumanMessage, BaseMessage, AIMessage, SystemMessage } from "@langchain/core/messages"; import {Browser} from "../../browser/playwrightBrowser/browserService"; import { BrowserContext } from "../../browser/playwrightBrowser/browserContext"; import { Controller } from "../../controller/controllerContext"; import { RateLimitError } from "../types"; import { ActionResult, AgentError, AgentHistory, AgentHistoryList, AgentOutput, AgentBrain, AgentSettings, AgentState, StepMetadata, ToolCallingMethod, ValidationError } from "../types"; import { DOMHistoryElement } from "../../domHIstory/historyTypes"; import { AgentStepInfo } from "../types"; import { convertInputMessages } from "../message_manager/utils"; import { BrowserState,BrowserStateHistory } from "../../browser/playwrightBrowser/type"; import { MessageManager } from "../message_manager/services"; import { AgentMessagePrompt, PlannerPrompt, SystemPrompt } from "../prompt"; import { ActionModel, RegisteredAction } from "../../controller/registry/types"; import { HistoryTreeProcessor } from "../../domHIstory/historyTypes"; class Logger { private debugEnabled = false; private isDebugger = false; // ANSI color codes private colors = { // Text colors reset: "\x1b[0m", white: "\x1b[37m", bold: "\x1b[1m", italic: "\x1b[3m", // Background colors bgRed: "\x1b[41m", bgPurple: "\x1b[45m", bgBlue: "\x1b[44m", bgYellow: "\x1b[43m", bgGreen: "\x1b[42m" }; constructor(debugEnabled = true, isDebugger = false) { this.debugEnabled = debugEnabled; this.isDebugger = isDebugger; } error(message: string) { if (this.debugEnabled) { console.error(`${this.colors.bgRed}${this.colors.white}ERROR: ${message}${this.colors.reset}`); } } log(message: string) { if (this.debugEnabled) { console.log(`${this.colors.bgBlue}${this.colors.white}LOG: ${message}${this.colors.reset}`); } } debug(message: string) { if (this.debugEnabled) { console.log(`${this.colors.bgPurple}${this.colors.white}DEBUG: ${message}${this.colors.reset}`); } } info(message: string) { if (this.debugEnabled) { console.log(`${this.colors.bgGreen}${this.colors.white}INFO: ${message}${this.colors.reset}`); } } warn(message: string) { if (this.debugEnabled) { console.warn(`${this.colors.bgYellow}${this.colors.white}WARN: ${message}${this.colors.reset}`); } } trace(message: string) { if (this.debugEnabled) { console.trace(`${this.colors.bgBlue}${this.colors.white}TRACE: ${message}${this.colors.reset}`); } } success(message: string) { if (!this.isDebugger) { console.log(`${this.colors.bold}${this.colors.bgGreen}${this.colors.white}SUCCESS: ${message}${this.colors.reset}`); } } debuggerError(message: string) { if (!this.isDebugger) { console.log(`${this.colors.italic}${this.colors.bgRed}${this.colors.white}ERROR: ${message}${this.colors.reset}`); } } } export const logger = new Logger(false, false); // Default to debug disabled and debugger disabled // Type for the callback functions type NewStepCallback = | ((browserState: BrowserState, agentOutput: AgentOutput, step: number) => void) | ((browserState: BrowserState, agentOutput: AgentOutput, step: number) => Promise<void>) | null; type DoneCallback = | ((agentHistoryList: AgentHistoryList) => Promise<void>) | ((agentHistoryList: AgentHistoryList) => void) | null; type ExternalAgentStatusCallback = (() => Promise<boolean>) | null; // Generic type to match Python's implementation export class Agent<Context = any> { // Core components task: string; llm: BaseChatModel; controller: Controller<Context>; sensitive_data: Record<string, string> | null; plannerModelName:string="" // Settings settings: AgentSettings; // State state: AgentState; // Models and Actions available_actions: string; tool_calling_method: ToolCallingMethod; initial_actions?:ActionModel[] chat_model_library?: string; model_name?: string; browser_use_version?: string; browser_use_source?: string; // Message Management private _message_manager: MessageManager; // Browser browser: Browser | null; browser_context: BrowserContext | null; injected_browser: boolean; injected_browser_context: boolean; // Callbacks register_new_step_callback: NewStepCallback; register_done_callback: DoneCallback; register_external_agent_status_raise_error_callback: ExternalAgentStatusCallback; // Context context: Context | null; DoneActionModel: any; DoneAgentOutput:AgentOutput | undefined; // Telemetry // Replace with actual ProductTelemetry when implemented ActionModel:ActionModel | undefined AgentOutput: AgentOutput | undefined; /** * Agent constructor * Equivalent to Python's __init__ method */ constructor( task: string, llm: BaseChatModel, // Optional parameters browser: Browser | null = null, browser_context: BrowserContext | null = null, controller: Controller<Context> = new Controller<Context>(), // Initial agent run parameters sensitive_data: Record<string, string> | null = null, initial_actions: Array<Record<string, Record<string, any>>> | null = null, // Cloud Callbacks register_new_step_callback: NewStepCallback = null, register_done_callback: DoneCallback = null, register_external_agent_status_raise_error_callback: ExternalAgentStatusCallback = null, // Agent settings use_vision: boolean = true, use_vision_for_planner: boolean = false, save_conversation_path?: string | null, save_conversation_path_encoding?: string | null, max_failures: number = 3, retry_delay: number = 10, override_system_message?: string | null, extend_system_message?: string | null, max_input_tokens: number = 128000, validate_output: boolean = false, message_context?: string | null, generate_gif: boolean | string = false, available_file_paths?: string[] | null, include_attributes: string[] = [ 'title', 'type', 'name', 'role', 'aria-label', 'placeholder', 'value', 'alt', 'aria-expanded', 'data-date-format', ], max_actions_per_step: number = 10, tool_calling_method: ToolCallingMethod | null = null , page_extraction_llm: BaseChatModel | null = null, planner_llm: BaseChatModel | null = null, planner_interval: number = 1, // Run planner every N steps // Inject state injected_agent_state: AgentState | null = null, // context: Context | null = null, ) { const finalPageExtractionLlm = page_extraction_llm || llm; // Core components this.task = task; this.llm = llm; this.controller = controller; this.sensitive_data = sensitive_data; this.settings = new AgentSettings({ use_vision, use_vision_for_planner, save_conversation_path: save_conversation_path ?? undefined, save_conversation_path_encoding: save_conversation_path_encoding ?? undefined, max_failures, retry_delay, override_system_message: override_system_message ?? undefined, extend_system_message: extend_system_message ?? undefined, max_input_tokens, validate_output, message_context: message_context ?? undefined, generate_gif, available_file_paths: available_file_paths ?? undefined, include_attributes, max_actions_per_step, tool_calling_method: tool_calling_method || 'auto', page_extraction_llm: finalPageExtractionLlm, planner_llm: planner_llm ?? undefined, planner_interval, }); // Initialize state this.state = injected_agent_state || new AgentState(); // Action setup this._setup_action_models(); this.initial_actions = initial_actions ? this._convert_initial_actions(initial_actions) : undefined; // Model setup this._set_model_names(); // For models without tool calling, add available actions to context this.available_actions = this.controller.registry.getPromptDescription(); this.tool_calling_method = this._set_tool_calling_method() || 'auto' ; this.settings.message_context = this._set_message_context() || undefined; // Initialize message manager with state this._message_manager = new MessageManager( task, new SystemPrompt( this.available_actions, this.settings.max_actions_per_step, override_system_message??undefined, extend_system_message??undefined ).getSystemMessage(), { maxTokens: this.settings.max_input_tokens, numChatTurnsToKeep:10, imageTokens:800, includeAttributes: this.settings.include_attributes, messageContext: this.settings.message_context, sensitiveData:this.sensitive_data||undefined, availableFilePath: this.settings.available_file_paths, }, this.state.message_manager_state ); // Browser setup this.injected_browser = browser !== null; this.injected_browser_context = browser_context !== null; if (browser_context) { this.browser = browser; this.browser_context = browser_context; } else { this.browser = browser || new Browser(); this.browser_context = new BrowserContext(); } // Callbacks this.register_new_step_callback = register_new_step_callback; this.register_done_callback = register_done_callback; this.register_external_agent_status_raise_error_callback = register_external_agent_status_raise_error_callback; // Context this.context = context; // Log conversation path if set if (this.settings.save_conversation_path) { console.info(`Saving conversation to ${this.settings.save_conversation_path}`); } } async take_step(): Promise<[boolean, boolean]> { this.step(); if (this.state.history.isDone()) { if (this.settings.validate_output) { if (!await this._validate_output()) { return [true, false]; } } if (this.register_done_callback) { if (typeof this.register_done_callback === 'function') { await this.register_done_callback(this.state.history); } } return [true, true]; } return [false, false]; } async run(this: Agent, max_steps: number = 100) { try { if (this.initial_actions) { const result = await this.multi_act(this.initial_actions,); this.state.last_result = result; } for (let step = 0; step < max_steps; step++) { if (this.state.consecutive_failures >= this.settings.max_failures) { logger.error(`❌ Stopping due to ${this.settings.max_failures} consecutive failures`); break; } if (this.state.stopped) { logger.info('Agent stopped'); break; } while (this.state.paused) { await new Promise(resolve => setTimeout(resolve, 200)); if (this.state.stopped) { break; } } const step_info = new AgentStepInfo( step, max_steps ); await this.step(step_info); if (this.state.history.isDone()) { logger.info('✅ Task completed'); if (this.settings.validate_output && step < max_steps - 1) { if (!await this._validate_output()) { continue; } } break; } } logger.info('❌ Failed to complete task in maximum steps'); return this.state.history; } catch (e:any) { logger.error(e); } } async multi_act( this: Agent, actions:ActionModel[], check_for_new_elements: boolean = true ): Promise<ActionResult[]> { const results: ActionResult[] = []; const cached_selector_map = await this.browser_context!.get_selector_map()||undefined; const cached_path_hashes = new Set( Array.from(Object.values(cached_selector_map)).map(e => e.hash.branch_path_hash) ); await this.browser_context!.remove_highlights(); for (let i = 0; i < actions.length; i++) { const action = actions[i]; if (action.getIndex() !== null && i !== 0) { const new_state = await this.browser_context!.get_state(); const new_path_hashes = new Set( Array.from(Object.values(new_state.selectorMap)).map((e:any) => e.hash.branch_path_hash) ); if (check_for_new_elements && !new_path_hashes.has(cached_path_hashes)) { const msg = `Something new appeared after action ${i} / ${actions.length}`; logger.info(msg); results.push(new ActionResult( {extractedContent: msg, includeInMemory: true } )); break; } } await this._raise_if_stopped_or_paused(); const result = await this.controller.act( action, this.browser_context!, this.settings.page_extraction_llm, this.sensitive_data||undefined, this.settings.available_file_paths, this.context ); results.push(result); logger.debug(`Executed action ${i + 1} / ${actions.length}`); if (results[results.length - 1].isDone || results[results.length - 1].error || i === actions.length - 1) { break; } await new Promise(resolve => setTimeout(resolve, this.browser_context!.config.wait_between_actions)); } return results; } async _validate_output(): Promise<boolean> { const system_msg = 'You are a validator of an agent who interacts with a browser. ' + 'Validate if the output of last action is what the user wanted and if the task is completed. ' + 'If the task is unclear defined, you can let it pass. But if something is missing or the image does not show what was requested dont let it pass. ' + 'Try to understand the page and help the model with suggestions like scroll, do x, ... to get the solution right. ' + `Task to validate: ${this.task}. Return a JSON object with 2 keys: is_valid and reason. ` + 'is_valid is a boolean that indicates if the output is correct. ' + 'reason is a string that explains why it is valid or not.' + ' example: {"is_valid": false, "reason": "The user wanted to search for \\"cat photos\\", but the agent searched for \\"dog photos\\" instead."}'; if (this.browser_context!.session) { const state = await this.browser_context!.get_state(); const content = new AgentMessagePrompt( state, this.state.last_result || undefined, this.settings.include_attributes ); const msg = [new SystemMessage({ content: system_msg }), content.getUserMessage(this.settings.use_vision)]; class ValidationResult { is_valid: boolean|undefined; reason: string|undefined; } const validator = this.llm.withStructuredOutput(ValidationResult, {"includeRaw": true}); const response = await validator.invoke(msg); const parsed = response['parsed'] as ValidationResult; const is_valid = parsed.is_valid; if (!is_valid) { logger.info(`❌ Validator decision: ${parsed.reason}`); const msg = `The output is not yet correct. ${parsed.reason}.`; this.state.last_result = [new ActionResult({ extractedContent: msg, includeInMemory: true })]; } else { logger.info(`✅ Validator decision: ${parsed.reason}`); } return is_valid!; } else { return true; } } add_new_task(new_task: string): void { this._message_manager.add_new_task(new_task); } async _raise_if_stopped_or_paused(): Promise<void> { if (this.register_external_agent_status_raise_error_callback) { if (await this.register_external_agent_status_raise_error_callback()) { throw new Error("InterruptedError"); } } if (this.state.stopped || this.state.paused) { logger.debug('Agent paused'); throw new Error("InterruptedError"); } } async step(this: Agent,step_info?: AgentStepInfo): Promise<void> { logger.info(`📍 Current step`); let state : BrowserState | null = null ; let model_output: AgentOutput| null = null; let result: ActionResult[] = []; const step_start_time = Date.now(); let tokens = 0; try { state = await this.browser_context!.get_state(); logger.debug("Browser COntext checked 1/5 "); await this._raise_if_stopped_or_paused(); logger.debug("Resuming 2/5"); this._message_manager.add_state_message(state!, this.state.last_result, step_info!, this.settings.use_vision); logger.debug("State message added 3/5"); if (this.settings.planner_llm && this.state.n_steps % this.settings.planner_interval == 0) { const plan = await this._run_planner(); this._message_manager.add_plan(plan, -1); logger.debug("Plan added 4/5"); } if (step_info && step_info.is_last_step()) { let msg = 'Now comes your last step. Use only the "done" action now. No other actions - so here your action sequence must have length 1.'; msg += '\nIf the task is not yet fully finished as requested by the user, set success in "done" to false! E.g. if not all steps are fully completed.'; msg += '\nIf the task is fully finished, set success in "done" to true.'; msg += '\nInclude everything you found out for the ultimate task in the done text.'; logger.info('Last step finishing up'); this._message_manager._add_message_with_tokens(new HumanMessage({content: msg})); } const input_messages = this._message_manager.get_messages(); tokens = this._message_manager.state.history.current_tokens; try { logger.info(`Requesting next action from model at current step`); model_output = await this.get_next_action(input_messages); logger.info(`🪜🪜 Model Output Receivved ${model_output.action}, ${model_output.current_state} `); logger.info(`Successfully received model output with ${model_output?.action?.length || 0} actions`); this.state.n_steps += 1; this._message_manager._remove_last_state_message(); await this._raise_if_stopped_or_paused(); if (model_output) { logger.info(`Adding model output to message manager`); this._message_manager.add_model_output(model_output); } else { logger.warn(`Model output is null, skipping add to message manager`); } } catch (e:any) { logger.error(`Error getting next action: ${e.message}`); this._message_manager._remove_last_state_message(); throw e; } logger.debug(`Executing actions from model output`); if (model_output && model_output.action) { result = await this.multi_act(model_output.action); logger.info(`Completed ${result.length} actions`); } else { logger.warn(`No actions to execute, model output is null or has no actions`); result = []; } this.state.last_result = result; if (result.length > 0 && result[result.length-1].isDone) { logger.info(`📄 Result: ${result[result.length-1].extractedContent}`); } this.state.consecutive_failures = 0; } catch (e) { if ((e as Error).message === "InterruptedError") { logger.info ('Agent paused'); this.state.last_result = [ new ActionResult({ error: 'The agent was paused - now continuing actions might need to be repeated', includeInMemory: true }) ]; return; } else { result = await this._handle_step_error(e as Error); this.state.last_result = result; } } finally { if (!result) { result = []; } if (state) { const metadata = new StepMetadata( this.state.n_steps, step_start_time, new Date().getTime(), tokens, ); this._make_history_item(model_output, state, result, metadata); } } } async _handle_step_error(error: Error): Promise<ActionResult[]> { let error_msg = AgentError.formatError(error) const prefix = `❌ Result failed ${this.state.consecutive_failures + 1}/${this.settings.max_failures} times:\n `; if (error instanceof ValidationError || error instanceof EvalError) { logger.error(`${prefix}${error_msg}`); if (error_msg.includes('Max token limit reached')) { this._message_manager.settings.maxTokens = this.settings.max_input_tokens - 500; logger.info( `Cutting tokens from history - new max input tokens: ${this._message_manager.settings.maxTokens}` ); this._message_manager.cut_messages(); } else if (error_msg.includes('Could not parse response')) { error_msg += '\n\nReturn a valid JSON object with the required fields.'; } this.state.consecutive_failures += 1; } else { if (error instanceof RateLimitError|| (error.message.includes('429')&&error.message.includes('quota'))) { logger.debuggerError(`Your API key has reached its rate limit. Please buy more credits and try again later.${prefix}`); await new Promise(resolve => setTimeout(resolve, this.settings.retry_delay)); this.state.consecutive_failures += 1; } else { logger.debuggerError(`${prefix}${error_msg}`); this.state.consecutive_failures += 1; } } return [new ActionResult({error: error_msg, includeInMemory: true})]; } async _make_history_item( model_output: AgentOutput | null, state: BrowserState, result: ActionResult[], metadata?: StepMetadata, ): Promise<void> { logger.debug(`Creating history item for current agent step`); let interacted_elements:DOMHistoryElement[]; if (model_output) { logger.debug(`Processing model output with ${model_output.action.length} actions for history`); interacted_elements = (await AgentHistory.getInteractedElement(model_output, state.selectorMap))! as DOMHistoryElement[] || []; logger.debug(`Found ${interacted_elements.filter(el => el !== null).length} interacted elements`); } else { logger.debug(`No model output, using null placeholder for interacted elements`); interacted_elements = []; } logger.debug(`Creating browser state history for URL: ${state.url}`); const state_history = new BrowserStateHistory( state.url, state.title, state.tab, interacted_elements, state.screenshot, ); const history_item = new AgentHistory(model_output, result, state_history, metadata); logger.debug(`History item created with ${result.length} action results`); this.state.history.history.push(history_item); logger.info(`Added history item to agent history (total: ${this.state.history.history.length} items)`); } _set_message_context(): string | null { logger.debug(`Setting message context for tool calling method: ${this.tool_calling_method}`); if (this.tool_calling_method === 'raw') { if (this.settings.message_context) { logger.debug(`Appending available actions to existing message context`); this.settings.message_context += `\n\nAvailable actions: ${this.available_actions}`; } else { logger.debug(`Creating new message context with available actions`); this.settings.message_context = `Available actions: ${this.available_actions}`; } } logger.debug(`Final message context set (${this.settings.message_context ? 'exists' : 'null'})`); return this.settings.message_context!; } _set_model_names(): void { logger.debug(`Setting model names from LLM instance`); this.chat_model_library = this.llm.constructor.name; logger.debug(`Chat model library set to: ${this.chat_model_library}`); this.model_name = this.llm.name; if ('model_name' in this.llm) { const model = this.llm; this.model_name = model !== null ? model.getName() : undefined; logger.debug(`Model name set from model_name property: ${this.model_name}`); } else if ('model' in this.llm) { const model = this.llm; this.model_name = model.name !== null ? model.getName() : 'Unknown'; logger.debug(`Model name set from model.name property: ${this.model_name}`); } else { logger.warn(`Could not determine model name from LLM instance, using default: ${this.model_name}`); } if (this.settings.planner_llm) { logger.debug(`Planner LLM is set, determining planner model name`); if ('model_name' in this.settings.planner_llm) { this.plannerModelName = this.settings.planner_llm.getName(); logger.debug(`Planner model name set from model_name property: ${this.plannerModelName}`); } else if ('model' in this.settings.planner_llm) { this.plannerModelName = this.settings.planner_llm.name!; logger.debug(`Planner model name set from model.name property: ${this.plannerModelName}`); } else { this.plannerModelName = ""; logger.warn(`Could not determine planner model name from planner LLM instance, using default: ${this.plannerModelName}`); } } else { this.plannerModelName = ""; } } async _setup_action_models(): Promise<void> { this.ActionModel = this.controller.registry.create_action_model({}); // Create output model with the dynamic actions // Create a new class that extends AgentOutput instead of creating an instance this.AgentOutput=new AgentOutput({action:[this.ActionModel!]}) // used to force the done action when max_steps is reached this.DoneActionModel = this.controller.registry.create_action_model({ include_actions: ['done'] }); // Create a new class for DoneAgentOutput instead of using an instance this.DoneAgentOutput = new AgentOutput({action:[this.DoneActionModel]}) } _set_tool_calling_method(): ToolCallingMethod | null { const tool_calling_method = this.settings.tool_calling_method; if (tool_calling_method === 'auto') { if (this.model_name?.includes('deepseek-reasoner') || this.model_name?.includes('deepseek-r1')) { return 'raw'; } else if (this.chat_model_library === 'ChatGoogleGenerativeAI') { return null; } else if (this.chat_model_library === 'ChatOpenAI') { return 'function_calling'; } else if (this.chat_model_library === 'AzureChatOpenAI') { return 'function_calling'; } else { return null; } } else { return tool_calling_method as ToolCallingMethod; } } THINK_TAGS = /<think>.*?<\/think>/gs; STRAY_CLOSE_TAG = /^.*?<\/think>/s; _remove_think_tags(this: any, text: string): string { // Step 1: Remove well-formed <think>...</think> logger.debug(`Removing think tags from text (length: ${text.length})`); text = text.replace(this.THINK_TAGS, ''); // Step 2: If there's an unmatched closing tag </think>, // remove everything up to and including that. text = text.replace(this.STRAY_CLOSE_TAG, ''); return text.trim(); } _convert_input_messages(this: any, input_messages: BaseMessage[]): BaseMessage[] { logger.debug(`Converting ${input_messages.length} input messages for model: ${this.model_name}`); if (this.model_name === 'deepseek-reasoner' || this.model_name.includes('deepseek-r1')) { logger.debug(`Using specialized conversion for ${this.model_name}`); return convertInputMessages(input_messages, this.model_name); } else { return input_messages; } } extract_json_from_model_output(this: Agent, content: string): any { logger.debug(`Attempting to extract JSON from model output (length: ${content.length})`); try { const result = JSON.parse(content); logger.debug(`Successfully parsed JSON from model output`); return result; } catch (e:any) { logger.error(`Failed to parse JSON from model output: ${e.message}`); throw new Error(`Failed to parse JSON from model output: ${content}`); } } async get_next_action(this: Agent, input_messages: BaseMessage[]): Promise<AgentOutput> { const jsonSchema = this.AgentOutput!.toJson(); logger.debug(`Getting next action with ${input_messages.length} input messages`); await this._set_model_names(); input_messages = this._convert_input_messages(input_messages); let response:{ raw:AIMessage, parsed:any }; let parsed: AgentOutput | null = null; if (this.tool_calling_method === 'raw') { logger.debug(`Using raw tool calling method`); const output = await this.llm.invoke(input_messages); // TODO: currently invoke does not return reasoning_content, we should override invoke const content = this._remove_think_tags(String(output.content)); try { logger.debug(`Parsing raw model output`); const parsed_json = this.extract_json_from_model_output(content); parsed = new AgentOutput({action:[parsed_json]}); } catch (e:any) { logger.warn(`Failed to parse model output: ${e.message}`); throw new Error('Could not parse response.'); } } else if (this.tool_calling_method === null) { logger.debug(`Using structured output without specific tool calling method`); const structured_llm = this.llm.withStructuredOutput(jsonSchema, { includeRaw: true }); response = await structured_llm.invoke(input_messages); parsed = response['parsed'] as AgentOutput; } else { logger.debug(`Using structured output with ${this.tool_calling_method} tool calling method`); const structured_llm = this.llm.withStructuredOutput(jsonSchema, { includeRaw: true, method: this.tool_calling_method }); response = await structured_llm.invoke(input_messages); if (response.parsed.length<=0|| response.raw?.tool_calls ) { const raw_message=response.raw if (raw_message.tool_calls && raw_message.tool_calls.length > 0) { const tool_call = raw_message.tool_calls[0]; // Get action data which might be in different formats const actionData = tool_call.args.action; // For object format like [ { search: { query: 'text' } } ] if (Array.isArray(actionData) && actionData.length > 0) { const actionObj = actionData[0]; // Get the first key as the action name const tool_call_name = Object.keys(actionObj)[0]; // Get the parameters from the value of that key const tool_call_args = actionObj[tool_call_name]; logger.debug(`Found array action: ${tool_call_name}`); logger.debug(`With args: ${JSON.stringify(tool_call_args)}`); logger.success(`🤖🤖${tool_call_name}===>${JSON.stringify(tool_call_args)}`) // Create proper state and action models const current_state = new AgentBrain({ evaluation_previous_goal: "Executing action", memory: "Using Tool call", next_goal: `Execute ${tool_call_name}` }); // Create a proper ActionModel instance const action_schema = this.controller.registry.getActions()[tool_call_name]; const action = new ActionModel( {[tool_call_name]: action_schema.paramModel}, tool_call_args ); parsed = new AgentOutput({ current_state: current_state, action: [action] }); } // Handle direct object format with name/args else if (typeof actionData === 'object' && actionData !== null) { const tool_call_name = tool_call.args.action.name || Object.keys(actionData)[0]; const tool_call_args = tool_call.args.action.args || actionData[tool_call_name]; logger.debug(`Tool call name: ${tool_call_name}`); logger.debug(`Tool call args: ${JSON.stringify(tool_call_args)}`); logger.success(`🤖🤖${tool_call_name}===>${JSON.stringify(tool_call_args)}`) const current_state = new AgentBrain({ evaluation_previous_goal: "Executing action", memory: "Using Tool call", next_goal: `Execute ${tool_call_name}` }); // Create a proper ActionModel instance const action_schema = this.controller.registry.getActions()[tool_call_name]; const action = new ActionModel({[tool_call_name]: action_schema.paramModel}, tool_call_args); parsed = new AgentOutput({ current_state: current_state, action: [action] }); } else { logger.error(`Unsupported action format: ${JSON.stringify(actionData)}`); } } else { logger.error(`No tool calls found in response`); } } else {parsed=null} } if (parsed === null) { logger.error(`Could not parse model response after all attempts`); throw new Error('Could not parse response.'); } // cut the number of actions to max_actions_per_step if needed if (parsed.action && parsed.action.length > this.settings.max_actions_per_step) { logger.warn(`Limiting actions from ${parsed.action.length} to max ${this.settings.max_actions_per_step}`); parsed.action = parsed.action.slice(0, this.settings.max_actions_per_step); } logger.debug(`Successfully got next action with ${parsed.action?.length || 0} actions`); return parsed; } async rerun_history( history: AgentHistoryList, max_retries: number = 1, skip_failures: boolean = true, delay_between_actions: number = 2.0, ): Promise<ActionResult[]> { /* Rerun a saved history of actions with error handling and retry logic. Args: history: The history to replay max_retries: Maximum number of retries per action skip_failures: Whether to skip failed actions or stop execution delay_between_actions: Delay between actions in seconds Returns: List of action results */ // Execute initial actions if provided if (this.initial_actions) { logger.debug(`Executing ${this.initial_actions.length} initial actions before replay`); const result = await this.multi_act(this.initial_actions); this.state.last_result = result; } const results: ActionResult[] = []; logger.info(`Starting history replay with ${history.history.length} steps`); for (let i = 0; i < history.history.length; i++) { const history_item = history.history[i]; const goal = history_item.model_output ? history_item.model_output.current_state.next_goal : ''; logger.info(`Replaying step ${i + 1}/${history.history.length}: goal: ${goal}`); if ( !history_item.model_output || !history_item.model_output.action || history_item.model_output.action[0] === null ) { logger.error(`Step ${i + 1}: No action to replay, skipping`); results.push(new ActionResult({ error: 'No action to replay' })); continue; } let retry_count = 0; logger.debug(`Step ${i + 1}: Will attempt execution up to ${max_retries} times if needed`); while (retry_count < max_retries) { try { logger.debug(`Step ${i + 1}: Execution attempt ${retry_count + 1}`); const result = await this._execute_history_step(history_item, delay_between_actions); logger.debug(`Step ${i + 1}: Execution successful, got ${result.length} results`); results.push(...result); break; } catch (e:any) { retry_count += 1; logger.warn(`Step ${i + 1}: Execution attempt ${retry_count} failed: ${e.message}`); if (retry_count === max_retries) { const error_msg = `Step ${i + 1} failed after ${max_retries} attempts: ${(e as Error).message}`; logger.error(error_msg); if (!skip_failures) { results.push(new ActionResult({ error: error_msg })); throw new Error(error_msg); } } else { logger.error(`Step ${i + 1} failed (attempt ${retry_count}/${max_retries}), retrying...`); await new Promise(resolve => setTimeout(resolve, delay_between_actions * 1000)); } } } } return results; } async _execute_history_step(history_item: AgentHistory, delay: number): Promise<ActionResult[]> { /*Execute a single step from history with element validation*/ logger.debug(`Executing history step with delay: ${delay}s`); const state = await this.browser_context?.get_state(); if (!state || !history_item.model_output) { logger.error('Invalid state or model output in _execute_history_step'); throw new Error('Invalid state or model output'); } logger.debug(`Current page URL: ${state.url}, attempting to match ${history_item.model_output.action.length} actions`); const updated_actions:ActionModel[] = []; for (let i = 0; i < history_item.model_output.action.length; i++) { const action = history_item.model_output.action[i]; logger.debug(`Processing action ${i+1}/${history_item.model_output.action.length}: ${action.constructor.name}`); const element = history_item?.state?.interacted_element; const updated_action = await this._update_action_indices( element?.[i] ?? null, action, state, ); if (updated_action === null) { logger.error(`Failed to find matching element for action ${i+1} in current page state`); throw new Error(`Could not find matching element ${i} in current page`); } logger.debug(`Successfully updated action ${i+1} indices`); updated_actions.push(updated_action!); } logger.debug(`Executing ${updated_actions.length} updated actions`); const result = await this.multi_act(updated_actions); logger.debug(`Action execution complete, got ${result.length} results`); logger.debug(`Waiting for ${delay}s before next step`); await new Promise(resolve => setTimeout(resolve, delay * 1000)); return result; } async _update_action_indices( historical_element: DOMHistoryElement | null, action: ActionModel, // Type this properly based on your action model current_state: BrowserState, ): Promise<ActionModel | null> { /* Update action indices based on current page state. Returns updated action or null if element cannot be found. */ logger.debug(`Updating action indices for action type: ${action.constructor.name}`); if (!historical_element || !current_state.elementTree) { logger.debug(`No historical element or current element tree, returning original action`); return action; } logger.debug(`Searching for historical element in current page DOM structure`); const current_element = HistoryTreeProcessor.findHistoryElementInTree(historical_element, current_state.elementTree); if (!current_element || current_element.highlightIndex === null) { logger.warn(`Could not find matching element in current page state`); return null; } logger.debug(`Found matching element with highlight index: ${current_element.highlightIndex}`); const old_index = action.getIndex(); if (old_index !== current_element.highlightIndex) { action.setIndex(current_element.highlightIndex); logger.info(`Element moved in DOM, updated index from ${old_index} to ${current_element.highlightIndex}`); } else { logger.debug(`Element position unchanged, index remains: ${old_index}`); } return action; } save_history(file_path?: string): void { /*Save the history to a file*/ if (!file_path) { file_path = 'AgentHistory.json'; } logger.debug(`Saving agent history to file: ${file_path}`); try { this.state.history.saveToFile(file_path); logger.info(`✅ Successfully saved history to ${file_path}`); } catch (error:any) { logger.error(`Failed to save history to ${file_path}: ${error.message}`); } } pause(): void { /*Pause the agent before the next step*/ logger.info('🔄 pausing Agent '); this.state.paused = true; logger.debug(`Agent state updated: paused=${this.state.paused}`); } resume(): void { /*Resume the agent*/ logger.info('▶️ Agent resuming'); this.state.paused = false; logger.debug(`Agent state updated: paused=${this.state.paused}`); } stop(): void { /*Stop the agent from executing any more steps*/ logger.info('🛑 Agent stopping'); this.state.stopped = true; logger.debug(`Agent state updated: stopped=${this.state.stopped}`); } _convert_initial_actions(actions: Array<Record<string, Record<string, any>>>): ActionModel[] { /*Convert dictionary-based actions to ActionModel instances*/ const converted_actions: ActionModel[] = []; let action_model = this.ActionModel; for (const action_dict of actions) { // Each action_dict should have a single key-value pair const action_name = Object.keys(action_dict)[0]; const params = action_dict[action_name]; // Get the parameter model for this action from registry const action_info:any = this.controller.registry.getActions()[action_name] const param_model = action_info.paramModel; // Create validated parameters using the appropriate param model const validated_params = param_model.parse(params); // Create ActionModel instance with the validated parameters action_model = new ActionModel( {[action_name]:validated_params }, validated_params ); converted_actions.push(action_model); } return converted_actions; } async _run_planner(): Promise<string | null> { logger.info('Running planner'); /*Run the planner to analyze state and suggest next steps*/ // Skip planning if no planner_llm is set if (!this.settings.planner_llm) { return null; } // Create planner message history using full message history const planner_messages = [ new PlannerPrompt(this.controller.registry.getPromptDescription()).getSystemMessage(), ...this._message_manager.get_messages().slice(1), // Use full message history except the first ]; if (!this.settings.use_vision_for_planner && this.settings.use_vision) { const last_state_message: HumanMessage = planner_messages[planner_messages.length - 1]; // remove image from last state message let new_msg = ''; if (Array.isArray(last_state_message.content)) { for (const msg of last_state_message.content) { if (msg['type'] === 'text') { new_msg += msg['text']; } else if (msg['type'] === 'image_url') { continue; } } } else { new_msg = last_state_message.content; } planner_messages[planner_messages.length - 1] = new HumanMessage({ content: new_msg }); } const processed_planner_messages = convertInputMessages(planner_messages, this.settings.planner_llm.name); // Get planner output const response = await this.settings.planner_llm.invoke(processed_planner_messages); let plan = String(response.content); // if deepseek-reasoner, remove think tags if (this.settings.planner_llm.name && (this.settings.planner_llm.name.includes('deepseek-r1') || this.settings.planner_llm.name.includes('deepseek-reasoner'))) { plan = this._remove_think_tags(plan); } try { const plan_json = JSON.parse(plan); logger.info(`Planning Analysis:\n${JSON.stringify(plan_json, null, 4)}`); } catch (e) { if (e instanceof SyntaxError) { logger.info(`Planning Analysis:\n${plan}`); } else { logger.debug(`Error parsing planning analysis: ${(e as Error).message}`); logger.info(`Plan: ${plan}`); } } return plan; } get message_manager(): MessageManager { return this._message_manager; } async close(): Promise<void> { /*Close all resources*/ try { // First close browser resources if (this.browser_context && !this.injected_browser_context) { await this.browser_context.close(); } if (this.browser && !this.injected_browser) { await this.browser.close(); } } catch (e) { logger.error(`Error during cleanup: ${(e as Error).message}`); } } }