UNPKG

browser-use-typescript

Version:

A TypeScript-based browser automation framework

955 lines (953 loc) 46.5 kB
import { HumanMessage, SystemMessage } from "@langchain/core/messages"; import { Browser } from "../../browser/playwrightBrowser/browserService"; import { BrowserContext } from "../../browser/playwrightBrowser/browserContext"; import { Controller } from "../../controller/controllerContext"; import { RateLimitError } from "../types"; import { ActionResult, AgentError, AgentHistory, AgentOutput, AgentBrain, AgentSettings, AgentState, StepMetadata, ValidationError } from "../types"; import { AgentStepInfo } from "../types"; import { convertInputMessages } from "../message_manager/utils"; import { BrowserStateHistory } from "../../browser/playwrightBrowser/type"; import { MessageManager } from "../message_manager/services"; import { AgentMessagePrompt, PlannerPrompt, SystemPrompt } from "../prompt"; import { ActionModel } from "../../controller/registry/types"; import { HistoryTreeProcessor } from "../../domHIstory/historyTypes"; class Logger { debugEnabled = false; isDebugger = false; // ANSI color codes colors = { // Text colors reset: "\x1b[0m", white: "\x1b[37m", bold: "\x1b[1m", italic: "\x1b[3m", // Background colors bgRed: "\x1b[41m", bgPurple: "\x1b[45m", bgBlue: "\x1b[44m", bgYellow: "\x1b[43m", bgGreen: "\x1b[42m" }; constructor(debugEnabled = true, isDebugger = false) { this.debugEnabled = debugEnabled; this.isDebugger = isDebugger; } error(message) { if (this.debugEnabled) { console.error(`${this.colors.bgRed}${this.colors.white}ERROR: ${message}${this.colors.reset}`); } } log(message) { if (this.debugEnabled) { console.log(`${this.colors.bgBlue}${this.colors.white}LOG: ${message}${this.colors.reset}`); } } debug(message) { if (this.debugEnabled) { console.log(`${this.colors.bgPurple}${this.colors.white}DEBUG: ${message}${this.colors.reset}`); } } info(message) { if (this.debugEnabled) { console.log(`${this.colors.bgGreen}${this.colors.white}INFO: ${message}${this.colors.reset}`); } } warn(message) { if (this.debugEnabled) { console.warn(`${this.colors.bgYellow}${this.colors.white}WARN: ${message}${this.colors.reset}`); } } trace(message) { if (this.debugEnabled) { console.trace(`${this.colors.bgBlue}${this.colors.white}TRACE: ${message}${this.colors.reset}`); } } success(message) { if (!this.isDebugger) { console.log(`${this.colors.bold}${this.colors.bgGreen}${this.colors.white}SUCCESS: ${message}${this.colors.reset}`); } } debuggerError(message) { if (!this.isDebugger) { console.log(`${this.colors.italic}${this.colors.bgRed}${this.colors.white}ERROR: ${message}${this.colors.reset}`); } } } export const logger = new Logger(false, false); // Default to debug disabled and debugger disabled // Generic type to match Python's implementation export class Agent { // Core components task; llm; controller; sensitive_data; plannerModelName = ""; // Settings settings; // State state; // Models and Actions available_actions; tool_calling_method; initial_actions; chat_model_library; model_name; browser_use_version; browser_use_source; // Message Management _message_manager; // Browser browser; browser_context; injected_browser; injected_browser_context; // Callbacks register_new_step_callback; register_done_callback; register_external_agent_status_raise_error_callback; // Context context; DoneActionModel; DoneAgentOutput; // Telemetry // Replace with actual ProductTelemetry when implemented ActionModel; AgentOutput; /** * Agent constructor * Equivalent to Python's __init__ method */ constructor(task, llm, // Optional parameters browser = null, browser_context = null, controller = new Controller(), // Initial agent run parameters sensitive_data = null, initial_actions = null, // Cloud Callbacks register_new_step_callback = null, register_done_callback = null, register_external_agent_status_raise_error_callback = null, // Agent settings use_vision = true, use_vision_for_planner = false, save_conversation_path, save_conversation_path_encoding, max_failures = 3, retry_delay = 10, override_system_message, extend_system_message, max_input_tokens = 128000, validate_output = false, message_context, generate_gif = false, available_file_paths, include_attributes = [ 'title', 'type', 'name', 'role', 'aria-label', 'placeholder', 'value', 'alt', 'aria-expanded', 'data-date-format', ], max_actions_per_step = 10, tool_calling_method = null, page_extraction_llm = null, planner_llm = null, planner_interval = 1, // Run planner every N steps // Inject state injected_agent_state = null, // context = null) { const finalPageExtractionLlm = page_extraction_llm || llm; // Core components this.task = task; this.llm = llm; this.controller = controller; this.sensitive_data = sensitive_data; this.settings = new AgentSettings({ use_vision, use_vision_for_planner, save_conversation_path: save_conversation_path ?? undefined, save_conversation_path_encoding: save_conversation_path_encoding ?? undefined, max_failures, retry_delay, override_system_message: override_system_message ?? undefined, extend_system_message: extend_system_message ?? undefined, max_input_tokens, validate_output, message_context: message_context ?? undefined, generate_gif, available_file_paths: available_file_paths ?? undefined, include_attributes, max_actions_per_step, tool_calling_method: tool_calling_method || 'auto', page_extraction_llm: finalPageExtractionLlm, planner_llm: planner_llm ?? undefined, planner_interval, }); // Initialize state this.state = injected_agent_state || new AgentState(); // Action setup this._setup_action_models(); this.initial_actions = initial_actions ? this._convert_initial_actions(initial_actions) : undefined; // Model setup this._set_model_names(); // For models without tool calling, add available actions to context this.available_actions = this.controller.registry.getPromptDescription(); this.tool_calling_method = this._set_tool_calling_method() || 'auto'; this.settings.message_context = this._set_message_context() || undefined; // Initialize message manager with state this._message_manager = new MessageManager(task, new SystemPrompt(this.available_actions, this.settings.max_actions_per_step, override_system_message ?? undefined, extend_system_message ?? undefined).getSystemMessage(), { maxTokens: this.settings.max_input_tokens, numChatTurnsToKeep: 10, imageTokens: 800, includeAttributes: this.settings.include_attributes, messageContext: this.settings.message_context, sensitiveData: this.sensitive_data || undefined, availableFilePath: this.settings.available_file_paths, }, this.state.message_manager_state); // Browser setup this.injected_browser = browser !== null; this.injected_browser_context = browser_context !== null; if (browser_context) { this.browser = browser; this.browser_context = browser_context; } else { this.browser = browser || new Browser(); this.browser_context = new BrowserContext(); } // Callbacks this.register_new_step_callback = register_new_step_callback; this.register_done_callback = register_done_callback; this.register_external_agent_status_raise_error_callback = register_external_agent_status_raise_error_callback; // Context this.context = context; // Log conversation path if set if (this.settings.save_conversation_path) { console.info(`Saving conversation to ${this.settings.save_conversation_path}`); } } async take_step() { this.step(); if (this.state.history.isDone()) { if (this.settings.validate_output) { if (!await this._validate_output()) { return [true, false]; } } if (this.register_done_callback) { if (typeof this.register_done_callback === 'function') { await this.register_done_callback(this.state.history); } } return [true, true]; } return [false, false]; } async run(max_steps = 100) { try { if (this.initial_actions) { const result = await this.multi_act(this.initial_actions); this.state.last_result = result; } for (let step = 0; step < max_steps; step++) { if (this.state.consecutive_failures >= this.settings.max_failures) { logger.error(`❌ Stopping due to ${this.settings.max_failures} consecutive failures`); break; } if (this.state.stopped) { logger.info('Agent stopped'); break; } while (this.state.paused) { await new Promise(resolve => setTimeout(resolve, 200)); if (this.state.stopped) { break; } } const step_info = new AgentStepInfo(step, max_steps); await this.step(step_info); if (this.state.history.isDone()) { logger.info('✅ Task completed'); if (this.settings.validate_output && step < max_steps - 1) { if (!await this._validate_output()) { continue; } } break; } } logger.info('❌ Failed to complete task in maximum steps'); return this.state.history; } catch (e) { logger.error(e); } } async multi_act(actions, check_for_new_elements = true) { const results = []; const cached_selector_map = await this.browser_context.get_selector_map() || undefined; const cached_path_hashes = new Set(Array.from(Object.values(cached_selector_map)).map(e => e.hash.branch_path_hash)); await this.browser_context.remove_highlights(); for (let i = 0; i < actions.length; i++) { const action = actions[i]; if (action.getIndex() !== null && i !== 0) { const new_state = await this.browser_context.get_state(); const new_path_hashes = new Set(Array.from(Object.values(new_state.selectorMap)).map((e) => e.hash.branch_path_hash)); if (check_for_new_elements && !new_path_hashes.has(cached_path_hashes)) { const msg = `Something new appeared after action ${i} / ${actions.length}`; logger.info(msg); results.push(new ActionResult({ extractedContent: msg, includeInMemory: true })); break; } } await this._raise_if_stopped_or_paused(); const result = await this.controller.act(action, this.browser_context, this.settings.page_extraction_llm, this.sensitive_data || undefined, this.settings.available_file_paths, this.context); results.push(result); logger.debug(`Executed action ${i + 1} / ${actions.length}`); if (results[results.length - 1].isDone || results[results.length - 1].error || i === actions.length - 1) { break; } await new Promise(resolve => setTimeout(resolve, this.browser_context.config.wait_between_actions)); } return results; } async _validate_output() { const system_msg = 'You are a validator of an agent who interacts with a browser. ' + 'Validate if the output of last action is what the user wanted and if the task is completed. ' + 'If the task is unclear defined, you can let it pass. But if something is missing or the image does not show what was requested dont let it pass. ' + 'Try to understand the page and help the model with suggestions like scroll, do x, ... to get the solution right. ' + `Task to validate: ${this.task}. Return a JSON object with 2 keys: is_valid and reason. ` + 'is_valid is a boolean that indicates if the output is correct. ' + 'reason is a string that explains why it is valid or not.' + ' example: {"is_valid": false, "reason": "The user wanted to search for \\"cat photos\\", but the agent searched for \\"dog photos\\" instead."}'; if (this.browser_context.session) { const state = await this.browser_context.get_state(); const content = new AgentMessagePrompt(state, this.state.last_result || undefined, this.settings.include_attributes); const msg = [new SystemMessage({ content: system_msg }), content.getUserMessage(this.settings.use_vision)]; class ValidationResult { is_valid; reason; } const validator = this.llm.withStructuredOutput(ValidationResult, { "includeRaw": true }); const response = await validator.invoke(msg); const parsed = response['parsed']; const is_valid = parsed.is_valid; if (!is_valid) { logger.info(`❌ Validator decision: ${parsed.reason}`); const msg = `The output is not yet correct. ${parsed.reason}.`; this.state.last_result = [new ActionResult({ extractedContent: msg, includeInMemory: true })]; } else { logger.info(`✅ Validator decision: ${parsed.reason}`); } return is_valid; } else { return true; } } add_new_task(new_task) { this._message_manager.add_new_task(new_task); } async _raise_if_stopped_or_paused() { if (this.register_external_agent_status_raise_error_callback) { if (await this.register_external_agent_status_raise_error_callback()) { throw new Error("InterruptedError"); } } if (this.state.stopped || this.state.paused) { logger.debug('Agent paused'); throw new Error("InterruptedError"); } } async step(step_info) { logger.info(`📍 Current step`); let state = null; let model_output = null; let result = []; const step_start_time = Date.now(); let tokens = 0; try { state = await this.browser_context.get_state(); logger.debug("Browser COntext checked 1/5 "); await this._raise_if_stopped_or_paused(); logger.debug("Resuming 2/5"); this._message_manager.add_state_message(state, this.state.last_result, step_info, this.settings.use_vision); logger.debug("State message added 3/5"); if (this.settings.planner_llm && this.state.n_steps % this.settings.planner_interval == 0) { const plan = await this._run_planner(); this._message_manager.add_plan(plan, -1); logger.debug("Plan added 4/5"); } if (step_info && step_info.is_last_step()) { let msg = 'Now comes your last step. Use only the "done" action now. No other actions - so here your action sequence must have length 1.'; msg += '\nIf the task is not yet fully finished as requested by the user, set success in "done" to false! E.g. if not all steps are fully completed.'; msg += '\nIf the task is fully finished, set success in "done" to true.'; msg += '\nInclude everything you found out for the ultimate task in the done text.'; logger.info('Last step finishing up'); this._message_manager._add_message_with_tokens(new HumanMessage({ content: msg })); } const input_messages = this._message_manager.get_messages(); tokens = this._message_manager.state.history.current_tokens; try { logger.info(`Requesting next action from model at current step`); model_output = await this.get_next_action(input_messages); logger.info(`🪜🪜 Model Output Receivved ${model_output.action}, ${model_output.current_state} `); logger.info(`Successfully received model output with ${model_output?.action?.length || 0} actions`); this.state.n_steps += 1; this._message_manager._remove_last_state_message(); await this._raise_if_stopped_or_paused(); if (model_output) { logger.info(`Adding model output to message manager`); this._message_manager.add_model_output(model_output); } else { logger.warn(`Model output is null, skipping add to message manager`); } } catch (e) { logger.error(`Error getting next action: ${e.message}`); this._message_manager._remove_last_state_message(); throw e; } logger.debug(`Executing actions from model output`); if (model_output && model_output.action) { result = await this.multi_act(model_output.action); logger.info(`Completed ${result.length} actions`); } else { logger.warn(`No actions to execute, model output is null or has no actions`); result = []; } this.state.last_result = result; if (result.length > 0 && result[result.length - 1].isDone) { logger.info(`📄 Result: ${result[result.length - 1].extractedContent}`); } this.state.consecutive_failures = 0; } catch (e) { if (e.message === "InterruptedError") { logger.info('Agent paused'); this.state.last_result = [ new ActionResult({ error: 'The agent was paused - now continuing actions might need to be repeated', includeInMemory: true }) ]; return; } else { result = await this._handle_step_error(e); this.state.last_result = result; } } finally { if (!result) { result = []; } if (state) { const metadata = new StepMetadata(this.state.n_steps, step_start_time, new Date().getTime(), tokens); this._make_history_item(model_output, state, result, metadata); } } } async _handle_step_error(error) { let error_msg = AgentError.formatError(error); const prefix = `❌ Result failed ${this.state.consecutive_failures + 1}/${this.settings.max_failures} times:\n `; if (error instanceof ValidationError || error instanceof EvalError) { logger.error(`${prefix}${error_msg}`); if (error_msg.includes('Max token limit reached')) { this._message_manager.settings.maxTokens = this.settings.max_input_tokens - 500; logger.info(`Cutting tokens from history - new max input tokens: ${this._message_manager.settings.maxTokens}`); this._message_manager.cut_messages(); } else if (error_msg.includes('Could not parse response')) { error_msg += '\n\nReturn a valid JSON object with the required fields.'; } this.state.consecutive_failures += 1; } else { if (error instanceof RateLimitError || (error.message.includes('429') && error.message.includes('quota'))) { logger.debuggerError(`Your API key has reached its rate limit. Please buy more credits and try again later.${prefix}`); await new Promise(resolve => setTimeout(resolve, this.settings.retry_delay)); this.state.consecutive_failures += 1; } else { logger.debuggerError(`${prefix}${error_msg}`); this.state.consecutive_failures += 1; } } return [new ActionResult({ error: error_msg, includeInMemory: true })]; } async _make_history_item(model_output, state, result, metadata) { logger.debug(`Creating history item for current agent step`); let interacted_elements; if (model_output) { logger.debug(`Processing model output with ${model_output.action.length} actions for history`); interacted_elements = (await AgentHistory.getInteractedElement(model_output, state.selectorMap)) || []; logger.debug(`Found ${interacted_elements.filter(el => el !== null).length} interacted elements`); } else { logger.debug(`No model output, using null placeholder for interacted elements`); interacted_elements = []; } logger.debug(`Creating browser state history for URL: ${state.url}`); const state_history = new BrowserStateHistory(state.url, state.title, state.tab, interacted_elements, state.screenshot); const history_item = new AgentHistory(model_output, result, state_history, metadata); logger.debug(`History item created with ${result.length} action results`); this.state.history.history.push(history_item); logger.info(`Added history item to agent history (total: ${this.state.history.history.length} items)`); } _set_message_context() { logger.debug(`Setting message context for tool calling method: ${this.tool_calling_method}`); if (this.tool_calling_method === 'raw') { if (this.settings.message_context) { logger.debug(`Appending available actions to existing message context`); this.settings.message_context += `\n\nAvailable actions: ${this.available_actions}`; } else { logger.debug(`Creating new message context with available actions`); this.settings.message_context = `Available actions: ${this.available_actions}`; } } logger.debug(`Final message context set (${this.settings.message_context ? 'exists' : 'null'})`); return this.settings.message_context; } _set_model_names() { logger.debug(`Setting model names from LLM instance`); this.chat_model_library = this.llm.constructor.name; logger.debug(`Chat model library set to: ${this.chat_model_library}`); this.model_name = this.llm.name; if ('model_name' in this.llm) { const model = this.llm; this.model_name = model !== null ? model.getName() : undefined; logger.debug(`Model name set from model_name property: ${this.model_name}`); } else if ('model' in this.llm) { const model = this.llm; this.model_name = model.name !== null ? model.getName() : 'Unknown'; logger.debug(`Model name set from model.name property: ${this.model_name}`); } else { logger.warn(`Could not determine model name from LLM instance, using default: ${this.model_name}`); } if (this.settings.planner_llm) { logger.debug(`Planner LLM is set, determining planner model name`); if ('model_name' in this.settings.planner_llm) { this.plannerModelName = this.settings.planner_llm.getName(); logger.debug(`Planner model name set from model_name property: ${this.plannerModelName}`); } else if ('model' in this.settings.planner_llm) { this.plannerModelName = this.settings.planner_llm.name; logger.debug(`Planner model name set from model.name property: ${this.plannerModelName}`); } else { this.plannerModelName = ""; logger.warn(`Could not determine planner model name from planner LLM instance, using default: ${this.plannerModelName}`); } } else { this.plannerModelName = ""; } } async _setup_action_models() { this.ActionModel = this.controller.registry.create_action_model({}); // Create output model with the dynamic actions // Create a new class that extends AgentOutput instead of creating an instance this.AgentOutput = new AgentOutput({ action: [this.ActionModel] }); // used to force the done action when max_steps is reached this.DoneActionModel = this.controller.registry.create_action_model({ include_actions: ['done'] }); // Create a new class for DoneAgentOutput instead of using an instance this.DoneAgentOutput = new AgentOutput({ action: [this.DoneActionModel] }); } _set_tool_calling_method() { const tool_calling_method = this.settings.tool_calling_method; if (tool_calling_method === 'auto') { if (this.model_name?.includes('deepseek-reasoner') || this.model_name?.includes('deepseek-r1')) { return 'raw'; } else if (this.chat_model_library === 'ChatGoogleGenerativeAI') { return null; } else if (this.chat_model_library === 'ChatOpenAI') { return 'function_calling'; } else if (this.chat_model_library === 'AzureChatOpenAI') { return 'function_calling'; } else { return null; } } else { return tool_calling_method; } } THINK_TAGS = /<think>.*?<\/think>/gs; STRAY_CLOSE_TAG = /^.*?<\/think>/s; _remove_think_tags(text) { // Step 1: Remove well-formed <think>...</think> logger.debug(`Removing think tags from text (length: ${text.length})`); text = text.replace(this.THINK_TAGS, ''); // Step 2: If there's an unmatched closing tag </think>, // remove everything up to and including that. text = text.replace(this.STRAY_CLOSE_TAG, ''); return text.trim(); } _convert_input_messages(input_messages) { logger.debug(`Converting ${input_messages.length} input messages for model: ${this.model_name}`); if (this.model_name === 'deepseek-reasoner' || this.model_name.includes('deepseek-r1')) { logger.debug(`Using specialized conversion for ${this.model_name}`); return convertInputMessages(input_messages, this.model_name); } else { return input_messages; } } extract_json_from_model_output(content) { logger.debug(`Attempting to extract JSON from model output (length: ${content.length})`); try { const result = JSON.parse(content); logger.debug(`Successfully parsed JSON from model output`); return result; } catch (e) { logger.error(`Failed to parse JSON from model output: ${e.message}`); throw new Error(`Failed to parse JSON from model output: ${content}`); } } async get_next_action(input_messages) { const jsonSchema = this.AgentOutput.toJson(); logger.debug(`Getting next action with ${input_messages.length} input messages`); await this._set_model_names(); input_messages = this._convert_input_messages(input_messages); let response; let parsed = null; if (this.tool_calling_method === 'raw') { logger.debug(`Using raw tool calling method`); const output = await this.llm.invoke(input_messages); // TODO: currently invoke does not return reasoning_content, we should override invoke const content = this._remove_think_tags(String(output.content)); try { logger.debug(`Parsing raw model output`); const parsed_json = this.extract_json_from_model_output(content); parsed = new AgentOutput({ action: [parsed_json] }); } catch (e) { logger.warn(`Failed to parse model output: ${e.message}`); throw new Error('Could not parse response.'); } } else if (this.tool_calling_method === null) { logger.debug(`Using structured output without specific tool calling method`); const structured_llm = this.llm.withStructuredOutput(jsonSchema, { includeRaw: true }); response = await structured_llm.invoke(input_messages); parsed = response['parsed']; } else { logger.debug(`Using structured output with ${this.tool_calling_method} tool calling method`); const structured_llm = this.llm.withStructuredOutput(jsonSchema, { includeRaw: true, method: this.tool_calling_method }); response = await structured_llm.invoke(input_messages); if (response.parsed.length <= 0 || response.raw?.tool_calls) { const raw_message = response.raw; if (raw_message.tool_calls && raw_message.tool_calls.length > 0) { const tool_call = raw_message.tool_calls[0]; // Get action data which might be in different formats const actionData = tool_call.args.action; // For object format like [ { search: { query: 'text' } } ] if (Array.isArray(actionData) && actionData.length > 0) { const actionObj = actionData[0]; // Get the first key as the action name const tool_call_name = Object.keys(actionObj)[0]; // Get the parameters from the value of that key const tool_call_args = actionObj[tool_call_name]; logger.debug(`Found array action: ${tool_call_name}`); logger.debug(`With args: ${JSON.stringify(tool_call_args)}`); logger.success(`🤖🤖${tool_call_name}===>${JSON.stringify(tool_call_args)}`); // Create proper state and action models const current_state = new AgentBrain({ evaluation_previous_goal: "Executing action", memory: "Using Tool call", next_goal: `Execute ${tool_call_name}` }); // Create a proper ActionModel instance const action_schema = this.controller.registry.getActions()[tool_call_name]; const action = new ActionModel({ [tool_call_name]: action_schema.paramModel }, tool_call_args); parsed = new AgentOutput({ current_state: current_state, action: [action] }); } // Handle direct object format with name/args else if (typeof actionData === 'object' && actionData !== null) { const tool_call_name = tool_call.args.action.name || Object.keys(actionData)[0]; const tool_call_args = tool_call.args.action.args || actionData[tool_call_name]; logger.debug(`Tool call name: ${tool_call_name}`); logger.debug(`Tool call args: ${JSON.stringify(tool_call_args)}`); logger.success(`🤖🤖${tool_call_name}===>${JSON.stringify(tool_call_args)}`); const current_state = new AgentBrain({ evaluation_previous_goal: "Executing action", memory: "Using Tool call", next_goal: `Execute ${tool_call_name}` }); // Create a proper ActionModel instance const action_schema = this.controller.registry.getActions()[tool_call_name]; const action = new ActionModel({ [tool_call_name]: action_schema.paramModel }, tool_call_args); parsed = new AgentOutput({ current_state: current_state, action: [action] }); } else { logger.error(`Unsupported action format: ${JSON.stringify(actionData)}`); } } else { logger.error(`No tool calls found in response`); } } else { parsed = null; } } if (parsed === null) { logger.error(`Could not parse model response after all attempts`); throw new Error('Could not parse response.'); } // cut the number of actions to max_actions_per_step if needed if (parsed.action && parsed.action.length > this.settings.max_actions_per_step) { logger.warn(`Limiting actions from ${parsed.action.length} to max ${this.settings.max_actions_per_step}`); parsed.action = parsed.action.slice(0, this.settings.max_actions_per_step); } logger.debug(`Successfully got next action with ${parsed.action?.length || 0} actions`); return parsed; } async rerun_history(history, max_retries = 1, skip_failures = true, delay_between_actions = 2.0) { /* Rerun a saved history of actions with error handling and retry logic. Args: history: The history to replay max_retries: Maximum number of retries per action skip_failures: Whether to skip failed actions or stop execution delay_between_actions: Delay between actions in seconds Returns: List of action results */ // Execute initial actions if provided if (this.initial_actions) { logger.debug(`Executing ${this.initial_actions.length} initial actions before replay`); const result = await this.multi_act(this.initial_actions); this.state.last_result = result; } const results = []; logger.info(`Starting history replay with ${history.history.length} steps`); for (let i = 0; i < history.history.length; i++) { const history_item = history.history[i]; const goal = history_item.model_output ? history_item.model_output.current_state.next_goal : ''; logger.info(`Replaying step ${i + 1}/${history.history.length}: goal: ${goal}`); if (!history_item.model_output || !history_item.model_output.action || history_item.model_output.action[0] === null) { logger.error(`Step ${i + 1}: No action to replay, skipping`); results.push(new ActionResult({ error: 'No action to replay' })); continue; } let retry_count = 0; logger.debug(`Step ${i + 1}: Will attempt execution up to ${max_retries} times if needed`); while (retry_count < max_retries) { try { logger.debug(`Step ${i + 1}: Execution attempt ${retry_count + 1}`); const result = await this._execute_history_step(history_item, delay_between_actions); logger.debug(`Step ${i + 1}: Execution successful, got ${result.length} results`); results.push(...result); break; } catch (e) { retry_count += 1; logger.warn(`Step ${i + 1}: Execution attempt ${retry_count} failed: ${e.message}`); if (retry_count === max_retries) { const error_msg = `Step ${i + 1} failed after ${max_retries} attempts: ${e.message}`; logger.error(error_msg); if (!skip_failures) { results.push(new ActionResult({ error: error_msg })); throw new Error(error_msg); } } else { logger.error(`Step ${i + 1} failed (attempt ${retry_count}/${max_retries}), retrying...`); await new Promise(resolve => setTimeout(resolve, delay_between_actions * 1000)); } } } } return results; } async _execute_history_step(history_item, delay) { /*Execute a single step from history with element validation*/ logger.debug(`Executing history step with delay: ${delay}s`); const state = await this.browser_context?.get_state(); if (!state || !history_item.model_output) { logger.error('Invalid state or model output in _execute_history_step'); throw new Error('Invalid state or model output'); } logger.debug(`Current page URL: ${state.url}, attempting to match ${history_item.model_output.action.length} actions`); const updated_actions = []; for (let i = 0; i < history_item.model_output.action.length; i++) { const action = history_item.model_output.action[i]; logger.debug(`Processing action ${i + 1}/${history_item.model_output.action.length}: ${action.constructor.name}`); const element = history_item?.state?.interacted_element; const updated_action = await this._update_action_indices(element?.[i] ?? null, action, state); if (updated_action === null) { logger.error(`Failed to find matching element for action ${i + 1} in current page state`); throw new Error(`Could not find matching element ${i} in current page`); } logger.debug(`Successfully updated action ${i + 1} indices`); updated_actions.push(updated_action); } logger.debug(`Executing ${updated_actions.length} updated actions`); const result = await this.multi_act(updated_actions); logger.debug(`Action execution complete, got ${result.length} results`); logger.debug(`Waiting for ${delay}s before next step`); await new Promise(resolve => setTimeout(resolve, delay * 1000)); return result; } async _update_action_indices(historical_element, action, // Type this properly based on your action model current_state) { /* Update action indices based on current page state. Returns updated action or null if element cannot be found. */ logger.debug(`Updating action indices for action type: ${action.constructor.name}`); if (!historical_element || !current_state.elementTree) { logger.debug(`No historical element or current element tree, returning original action`); return action; } logger.debug(`Searching for historical element in current page DOM structure`); const current_element = HistoryTreeProcessor.findHistoryElementInTree(historical_element, current_state.elementTree); if (!current_element || current_element.highlightIndex === null) { logger.warn(`Could not find matching element in current page state`); return null; } logger.debug(`Found matching element with highlight index: ${current_element.highlightIndex}`); const old_index = action.getIndex(); if (old_index !== current_element.highlightIndex) { action.setIndex(current_element.highlightIndex); logger.info(`Element moved in DOM, updated index from ${old_index} to ${current_element.highlightIndex}`); } else { logger.debug(`Element position unchanged, index remains: ${old_index}`); } return action; } save_history(file_path) { /*Save the history to a file*/ if (!file_path) { file_path = 'AgentHistory.json'; } logger.debug(`Saving agent history to file: ${file_path}`); try { this.state.history.saveToFile(file_path); logger.info(`✅ Successfully saved history to ${file_path}`); } catch (error) { logger.error(`Failed to save history to ${file_path}: ${error.message}`); } } pause() { /*Pause the agent before the next step*/ logger.info('🔄 pausing Agent '); this.state.paused = true; logger.debug(`Agent state updated: paused=${this.state.paused}`); } resume() { /*Resume the agent*/ logger.info('▶️ Agent resuming'); this.state.paused = false; logger.debug(`Agent state updated: paused=${this.state.paused}`); } stop() { /*Stop the agent from executing any more steps*/ logger.info('🛑 Agent stopping'); this.state.stopped = true; logger.debug(`Agent state updated: stopped=${this.state.stopped}`); } _convert_initial_actions(actions) { /*Convert dictionary-based actions to ActionModel instances*/ const converted_actions = []; let action_model = this.ActionModel; for (const action_dict of actions) { // Each action_dict should have a single key-value pair const action_name = Object.keys(action_dict)[0]; const params = action_dict[action_name]; // Get the parameter model for this action from registry const action_info = this.controller.registry.getActions()[action_name]; const param_model = action_info.paramModel; // Create validated parameters using the appropriate param model const validated_params = param_model.parse(params); // Create ActionModel instance with the validated parameters action_model = new ActionModel({ [action_name]: validated_params }, validated_params); converted_actions.push(action_model); } return converted_actions; } async _run_planner() { logger.info('Running planner'); /*Run the planner to analyze state and suggest next steps*/ // Skip planning if no planner_llm is set if (!this.settings.planner_llm) { return null; } // Create planner message history using full message history const planner_messages = [ new PlannerPrompt(this.controller.registry.getPromptDescription()).getSystemMessage(), ...this._message_manager.get_messages().slice(1), // Use full message history except the first ]; if (!this.settings.use_vision_for_planner && this.settings.use_vision) { const last_state_message = planner_messages[planner_messages.length - 1]; // remove image from last state message let new_msg = ''; if (Array.isArray(last_state_message.content)) { for (const msg of last_state_message.content) { if (msg['type'] === 'text') { new_msg += msg['text']; } else if (msg['type'] === 'image_url') { continue; } } } else { new_msg = last_state_message.content; } planner_messages[planner_messages.length - 1] = new HumanMessage({ content: new_msg }); } const processed_planner_messages = convertInputMessages(planner_messages, this.settings.planner_llm.name); // Get planner output const response = await this.settings.planner_llm.invoke(processed_planner_messages); let plan = String(response.content); // if deepseek-reasoner, remove think tags if (this.settings.planner_llm.name && (this.settings.planner_llm.name.includes('deepseek-r1') || this.settings.planner_llm.name.includes('deepseek-reasoner'))) { plan = this._remove_think_tags(plan); } try { const plan_json = JSON.parse(plan); logger.info(`Planning Analysis:\n${JSON.stringify(plan_json, null, 4)}`); } catch (e) { if (e instanceof SyntaxError) { logger.info(`Planning Analysis:\n${plan}`); } else { logger.debug(`Error parsing planning analysis: ${e.message}`); logger.info(`Plan: ${plan}`); } } return plan; } get message_manager() { return this._message_manager; } async close() { /*Close all resources*/ try { // First close browser resources if (this.browser_context && !this.injected_browser_context) { await this.browser_context.close(); } if (this.browser && !this.injected_browser) { await this.browser.close(); } } catch (e) { logger.error(`Error during cleanup: ${e.message}`); } } } //# sourceMappingURL=agentClass.js.map