browser-use-typescript
Version:
A TypeScript-based browser automation framework
955 lines (953 loc) • 46.5 kB
JavaScript
import { HumanMessage, SystemMessage } from "@langchain/core/messages";
import { Browser } from "../../browser/playwrightBrowser/browserService";
import { BrowserContext } from "../../browser/playwrightBrowser/browserContext";
import { Controller } from "../../controller/controllerContext";
import { RateLimitError } from "../types";
import { ActionResult, AgentError, AgentHistory, AgentOutput, AgentBrain, AgentSettings, AgentState, StepMetadata, ValidationError } from "../types";
import { AgentStepInfo } from "../types";
import { convertInputMessages } from "../message_manager/utils";
import { BrowserStateHistory } from "../../browser/playwrightBrowser/type";
import { MessageManager } from "../message_manager/services";
import { AgentMessagePrompt, PlannerPrompt, SystemPrompt } from "../prompt";
import { ActionModel } from "../../controller/registry/types";
import { HistoryTreeProcessor } from "../../domHIstory/historyTypes";
class Logger {
debugEnabled = false;
isDebugger = false;
// ANSI color codes
colors = {
// Text colors
reset: "\x1b[0m",
white: "\x1b[37m",
bold: "\x1b[1m",
italic: "\x1b[3m",
// Background colors
bgRed: "\x1b[41m",
bgPurple: "\x1b[45m",
bgBlue: "\x1b[44m",
bgYellow: "\x1b[43m",
bgGreen: "\x1b[42m"
};
constructor(debugEnabled = true, isDebugger = false) {
this.debugEnabled = debugEnabled;
this.isDebugger = isDebugger;
}
error(message) {
if (this.debugEnabled) {
console.error(`${this.colors.bgRed}${this.colors.white}ERROR: ${message}${this.colors.reset}`);
}
}
log(message) {
if (this.debugEnabled) {
console.log(`${this.colors.bgBlue}${this.colors.white}LOG: ${message}${this.colors.reset}`);
}
}
debug(message) {
if (this.debugEnabled) {
console.log(`${this.colors.bgPurple}${this.colors.white}DEBUG: ${message}${this.colors.reset}`);
}
}
info(message) {
if (this.debugEnabled) {
console.log(`${this.colors.bgGreen}${this.colors.white}INFO: ${message}${this.colors.reset}`);
}
}
warn(message) {
if (this.debugEnabled) {
console.warn(`${this.colors.bgYellow}${this.colors.white}WARN: ${message}${this.colors.reset}`);
}
}
trace(message) {
if (this.debugEnabled) {
console.trace(`${this.colors.bgBlue}${this.colors.white}TRACE: ${message}${this.colors.reset}`);
}
}
success(message) {
if (!this.isDebugger) {
console.log(`${this.colors.bold}${this.colors.bgGreen}${this.colors.white}SUCCESS: ${message}${this.colors.reset}`);
}
}
debuggerError(message) {
if (!this.isDebugger) {
console.log(`${this.colors.italic}${this.colors.bgRed}${this.colors.white}ERROR: ${message}${this.colors.reset}`);
}
}
}
export const logger = new Logger(false, false); // Default to debug disabled and debugger disabled
// Generic type to match Python's implementation
export class Agent {
// Core components
task;
llm;
controller;
sensitive_data;
plannerModelName = "";
// Settings
settings;
// State
state;
// Models and Actions
available_actions;
tool_calling_method;
initial_actions;
chat_model_library;
model_name;
browser_use_version;
browser_use_source;
// Message Management
_message_manager;
// Browser
browser;
browser_context;
injected_browser;
injected_browser_context;
// Callbacks
register_new_step_callback;
register_done_callback;
register_external_agent_status_raise_error_callback;
// Context
context;
DoneActionModel;
DoneAgentOutput;
// Telemetry
// Replace with actual ProductTelemetry when implemented
ActionModel;
AgentOutput;
/**
* Agent constructor
* Equivalent to Python's __init__ method
*/
constructor(task, llm,
// Optional parameters
browser = null, browser_context = null, controller = new Controller(),
// Initial agent run parameters
sensitive_data = null, initial_actions = null,
// Cloud Callbacks
register_new_step_callback = null, register_done_callback = null, register_external_agent_status_raise_error_callback = null,
// Agent settings
use_vision = true, use_vision_for_planner = false, save_conversation_path, save_conversation_path_encoding, max_failures = 3, retry_delay = 10, override_system_message, extend_system_message, max_input_tokens = 128000, validate_output = false, message_context, generate_gif = false, available_file_paths, include_attributes = [
'title',
'type',
'name',
'role',
'aria-label',
'placeholder',
'value',
'alt',
'aria-expanded',
'data-date-format',
], max_actions_per_step = 10, tool_calling_method = null, page_extraction_llm = null, planner_llm = null, planner_interval = 1, // Run planner every N steps
// Inject state
injected_agent_state = null,
//
context = null) {
const finalPageExtractionLlm = page_extraction_llm || llm;
// Core components
this.task = task;
this.llm = llm;
this.controller = controller;
this.sensitive_data = sensitive_data;
this.settings = new AgentSettings({
use_vision,
use_vision_for_planner,
save_conversation_path: save_conversation_path ?? undefined,
save_conversation_path_encoding: save_conversation_path_encoding ?? undefined,
max_failures,
retry_delay,
override_system_message: override_system_message ?? undefined,
extend_system_message: extend_system_message ?? undefined,
max_input_tokens,
validate_output,
message_context: message_context ?? undefined,
generate_gif,
available_file_paths: available_file_paths ?? undefined,
include_attributes,
max_actions_per_step,
tool_calling_method: tool_calling_method || 'auto',
page_extraction_llm: finalPageExtractionLlm,
planner_llm: planner_llm ?? undefined,
planner_interval,
});
// Initialize state
this.state = injected_agent_state || new AgentState();
// Action setup
this._setup_action_models();
this.initial_actions = initial_actions ? this._convert_initial_actions(initial_actions) : undefined;
// Model setup
this._set_model_names();
// For models without tool calling, add available actions to context
this.available_actions = this.controller.registry.getPromptDescription();
this.tool_calling_method = this._set_tool_calling_method() || 'auto';
this.settings.message_context = this._set_message_context() || undefined;
// Initialize message manager with state
this._message_manager = new MessageManager(task, new SystemPrompt(this.available_actions, this.settings.max_actions_per_step, override_system_message ?? undefined, extend_system_message ?? undefined).getSystemMessage(), {
maxTokens: this.settings.max_input_tokens,
numChatTurnsToKeep: 10,
imageTokens: 800,
includeAttributes: this.settings.include_attributes,
messageContext: this.settings.message_context,
sensitiveData: this.sensitive_data || undefined,
availableFilePath: this.settings.available_file_paths,
}, this.state.message_manager_state);
// Browser setup
this.injected_browser = browser !== null;
this.injected_browser_context = browser_context !== null;
if (browser_context) {
this.browser = browser;
this.browser_context = browser_context;
}
else {
this.browser = browser || new Browser();
this.browser_context = new BrowserContext();
}
// Callbacks
this.register_new_step_callback = register_new_step_callback;
this.register_done_callback = register_done_callback;
this.register_external_agent_status_raise_error_callback = register_external_agent_status_raise_error_callback;
// Context
this.context = context;
// Log conversation path if set
if (this.settings.save_conversation_path) {
console.info(`Saving conversation to ${this.settings.save_conversation_path}`);
}
}
async take_step() {
this.step();
if (this.state.history.isDone()) {
if (this.settings.validate_output) {
if (!await this._validate_output()) {
return [true, false];
}
}
if (this.register_done_callback) {
if (typeof this.register_done_callback === 'function') {
await this.register_done_callback(this.state.history);
}
}
return [true, true];
}
return [false, false];
}
async run(max_steps = 100) {
try {
if (this.initial_actions) {
const result = await this.multi_act(this.initial_actions);
this.state.last_result = result;
}
for (let step = 0; step < max_steps; step++) {
if (this.state.consecutive_failures >= this.settings.max_failures) {
logger.error(`❌ Stopping due to ${this.settings.max_failures} consecutive failures`);
break;
}
if (this.state.stopped) {
logger.info('Agent stopped');
break;
}
while (this.state.paused) {
await new Promise(resolve => setTimeout(resolve, 200));
if (this.state.stopped) {
break;
}
}
const step_info = new AgentStepInfo(step, max_steps);
await this.step(step_info);
if (this.state.history.isDone()) {
logger.info('✅ Task completed');
if (this.settings.validate_output && step < max_steps - 1) {
if (!await this._validate_output()) {
continue;
}
}
break;
}
}
logger.info('❌ Failed to complete task in maximum steps');
return this.state.history;
}
catch (e) {
logger.error(e);
}
}
async multi_act(actions, check_for_new_elements = true) {
const results = [];
const cached_selector_map = await this.browser_context.get_selector_map() || undefined;
const cached_path_hashes = new Set(Array.from(Object.values(cached_selector_map)).map(e => e.hash.branch_path_hash));
await this.browser_context.remove_highlights();
for (let i = 0; i < actions.length; i++) {
const action = actions[i];
if (action.getIndex() !== null && i !== 0) {
const new_state = await this.browser_context.get_state();
const new_path_hashes = new Set(Array.from(Object.values(new_state.selectorMap)).map((e) => e.hash.branch_path_hash));
if (check_for_new_elements && !new_path_hashes.has(cached_path_hashes)) {
const msg = `Something new appeared after action ${i} / ${actions.length}`;
logger.info(msg);
results.push(new ActionResult({ extractedContent: msg, includeInMemory: true }));
break;
}
}
await this._raise_if_stopped_or_paused();
const result = await this.controller.act(action, this.browser_context, this.settings.page_extraction_llm, this.sensitive_data || undefined, this.settings.available_file_paths, this.context);
results.push(result);
logger.debug(`Executed action ${i + 1} / ${actions.length}`);
if (results[results.length - 1].isDone || results[results.length - 1].error || i === actions.length - 1) {
break;
}
await new Promise(resolve => setTimeout(resolve, this.browser_context.config.wait_between_actions));
}
return results;
}
async _validate_output() {
const system_msg = 'You are a validator of an agent who interacts with a browser. ' +
'Validate if the output of last action is what the user wanted and if the task is completed. ' +
'If the task is unclear defined, you can let it pass. But if something is missing or the image does not show what was requested dont let it pass. ' +
'Try to understand the page and help the model with suggestions like scroll, do x, ... to get the solution right. ' +
`Task to validate: ${this.task}. Return a JSON object with 2 keys: is_valid and reason. ` +
'is_valid is a boolean that indicates if the output is correct. ' +
'reason is a string that explains why it is valid or not.' +
' example: {"is_valid": false, "reason": "The user wanted to search for \\"cat photos\\", but the agent searched for \\"dog photos\\" instead."}';
if (this.browser_context.session) {
const state = await this.browser_context.get_state();
const content = new AgentMessagePrompt(state, this.state.last_result || undefined, this.settings.include_attributes);
const msg = [new SystemMessage({ content: system_msg }), content.getUserMessage(this.settings.use_vision)];
class ValidationResult {
is_valid;
reason;
}
const validator = this.llm.withStructuredOutput(ValidationResult, { "includeRaw": true });
const response = await validator.invoke(msg);
const parsed = response['parsed'];
const is_valid = parsed.is_valid;
if (!is_valid) {
logger.info(`❌ Validator decision: ${parsed.reason}`);
const msg = `The output is not yet correct. ${parsed.reason}.`;
this.state.last_result = [new ActionResult({ extractedContent: msg, includeInMemory: true })];
}
else {
logger.info(`✅ Validator decision: ${parsed.reason}`);
}
return is_valid;
}
else {
return true;
}
}
add_new_task(new_task) {
this._message_manager.add_new_task(new_task);
}
async _raise_if_stopped_or_paused() {
if (this.register_external_agent_status_raise_error_callback) {
if (await this.register_external_agent_status_raise_error_callback()) {
throw new Error("InterruptedError");
}
}
if (this.state.stopped || this.state.paused) {
logger.debug('Agent paused');
throw new Error("InterruptedError");
}
}
async step(step_info) {
logger.info(`📍 Current step`);
let state = null;
let model_output = null;
let result = [];
const step_start_time = Date.now();
let tokens = 0;
try {
state = await this.browser_context.get_state();
logger.debug("Browser COntext checked 1/5 ");
await this._raise_if_stopped_or_paused();
logger.debug("Resuming 2/5");
this._message_manager.add_state_message(state, this.state.last_result, step_info, this.settings.use_vision);
logger.debug("State message added 3/5");
if (this.settings.planner_llm && this.state.n_steps % this.settings.planner_interval == 0) {
const plan = await this._run_planner();
this._message_manager.add_plan(plan, -1);
logger.debug("Plan added 4/5");
}
if (step_info && step_info.is_last_step()) {
let msg = 'Now comes your last step. Use only the "done" action now. No other actions - so here your action sequence must have length 1.';
msg += '\nIf the task is not yet fully finished as requested by the user, set success in "done" to false! E.g. if not all steps are fully completed.';
msg += '\nIf the task is fully finished, set success in "done" to true.';
msg += '\nInclude everything you found out for the ultimate task in the done text.';
logger.info('Last step finishing up');
this._message_manager._add_message_with_tokens(new HumanMessage({ content: msg }));
}
const input_messages = this._message_manager.get_messages();
tokens = this._message_manager.state.history.current_tokens;
try {
logger.info(`Requesting next action from model at current step`);
model_output = await this.get_next_action(input_messages);
logger.info(`🪜🪜 Model Output Receivved ${model_output.action}, ${model_output.current_state} `);
logger.info(`Successfully received model output with ${model_output?.action?.length || 0} actions`);
this.state.n_steps += 1;
this._message_manager._remove_last_state_message();
await this._raise_if_stopped_or_paused();
if (model_output) {
logger.info(`Adding model output to message manager`);
this._message_manager.add_model_output(model_output);
}
else {
logger.warn(`Model output is null, skipping add to message manager`);
}
}
catch (e) {
logger.error(`Error getting next action: ${e.message}`);
this._message_manager._remove_last_state_message();
throw e;
}
logger.debug(`Executing actions from model output`);
if (model_output && model_output.action) {
result = await this.multi_act(model_output.action);
logger.info(`Completed ${result.length} actions`);
}
else {
logger.warn(`No actions to execute, model output is null or has no actions`);
result = [];
}
this.state.last_result = result;
if (result.length > 0 && result[result.length - 1].isDone) {
logger.info(`📄 Result: ${result[result.length - 1].extractedContent}`);
}
this.state.consecutive_failures = 0;
}
catch (e) {
if (e.message === "InterruptedError") {
logger.info('Agent paused');
this.state.last_result = [
new ActionResult({
error: 'The agent was paused - now continuing actions might need to be repeated', includeInMemory: true
})
];
return;
}
else {
result = await this._handle_step_error(e);
this.state.last_result = result;
}
}
finally {
if (!result) {
result = [];
}
if (state) {
const metadata = new StepMetadata(this.state.n_steps, step_start_time, new Date().getTime(), tokens);
this._make_history_item(model_output, state, result, metadata);
}
}
}
async _handle_step_error(error) {
let error_msg = AgentError.formatError(error);
const prefix = `❌ Result failed ${this.state.consecutive_failures + 1}/${this.settings.max_failures} times:\n `;
if (error instanceof ValidationError || error instanceof EvalError) {
logger.error(`${prefix}${error_msg}`);
if (error_msg.includes('Max token limit reached')) {
this._message_manager.settings.maxTokens = this.settings.max_input_tokens - 500;
logger.info(`Cutting tokens from history - new max input tokens: ${this._message_manager.settings.maxTokens}`);
this._message_manager.cut_messages();
}
else if (error_msg.includes('Could not parse response')) {
error_msg += '\n\nReturn a valid JSON object with the required fields.';
}
this.state.consecutive_failures += 1;
}
else {
if (error instanceof RateLimitError || (error.message.includes('429') && error.message.includes('quota'))) {
logger.debuggerError(`Your API key has reached its rate limit. Please buy more credits and try again later.${prefix}`);
await new Promise(resolve => setTimeout(resolve, this.settings.retry_delay));
this.state.consecutive_failures += 1;
}
else {
logger.debuggerError(`${prefix}${error_msg}`);
this.state.consecutive_failures += 1;
}
}
return [new ActionResult({ error: error_msg, includeInMemory: true })];
}
async _make_history_item(model_output, state, result, metadata) {
logger.debug(`Creating history item for current agent step`);
let interacted_elements;
if (model_output) {
logger.debug(`Processing model output with ${model_output.action.length} actions for history`);
interacted_elements = (await AgentHistory.getInteractedElement(model_output, state.selectorMap)) || [];
logger.debug(`Found ${interacted_elements.filter(el => el !== null).length} interacted elements`);
}
else {
logger.debug(`No model output, using null placeholder for interacted elements`);
interacted_elements = [];
}
logger.debug(`Creating browser state history for URL: ${state.url}`);
const state_history = new BrowserStateHistory(state.url, state.title, state.tab, interacted_elements, state.screenshot);
const history_item = new AgentHistory(model_output, result, state_history, metadata);
logger.debug(`History item created with ${result.length} action results`);
this.state.history.history.push(history_item);
logger.info(`Added history item to agent history (total: ${this.state.history.history.length} items)`);
}
_set_message_context() {
logger.debug(`Setting message context for tool calling method: ${this.tool_calling_method}`);
if (this.tool_calling_method === 'raw') {
if (this.settings.message_context) {
logger.debug(`Appending available actions to existing message context`);
this.settings.message_context += `\n\nAvailable actions: ${this.available_actions}`;
}
else {
logger.debug(`Creating new message context with available actions`);
this.settings.message_context = `Available actions: ${this.available_actions}`;
}
}
logger.debug(`Final message context set (${this.settings.message_context ? 'exists' : 'null'})`);
return this.settings.message_context;
}
_set_model_names() {
logger.debug(`Setting model names from LLM instance`);
this.chat_model_library = this.llm.constructor.name;
logger.debug(`Chat model library set to: ${this.chat_model_library}`);
this.model_name = this.llm.name;
if ('model_name' in this.llm) {
const model = this.llm;
this.model_name = model !== null ? model.getName() : undefined;
logger.debug(`Model name set from model_name property: ${this.model_name}`);
}
else if ('model' in this.llm) {
const model = this.llm;
this.model_name = model.name !== null ? model.getName() : 'Unknown';
logger.debug(`Model name set from model.name property: ${this.model_name}`);
}
else {
logger.warn(`Could not determine model name from LLM instance, using default: ${this.model_name}`);
}
if (this.settings.planner_llm) {
logger.debug(`Planner LLM is set, determining planner model name`);
if ('model_name' in this.settings.planner_llm) {
this.plannerModelName = this.settings.planner_llm.getName();
logger.debug(`Planner model name set from model_name property: ${this.plannerModelName}`);
}
else if ('model' in this.settings.planner_llm) {
this.plannerModelName = this.settings.planner_llm.name;
logger.debug(`Planner model name set from model.name property: ${this.plannerModelName}`);
}
else {
this.plannerModelName = "";
logger.warn(`Could not determine planner model name from planner LLM instance, using default: ${this.plannerModelName}`);
}
}
else {
this.plannerModelName = "";
}
}
async _setup_action_models() {
this.ActionModel = this.controller.registry.create_action_model({});
// Create output model with the dynamic actions
// Create a new class that extends AgentOutput instead of creating an instance
this.AgentOutput = new AgentOutput({ action: [this.ActionModel] });
// used to force the done action when max_steps is reached
this.DoneActionModel = this.controller.registry.create_action_model({ include_actions: ['done'] });
// Create a new class for DoneAgentOutput instead of using an instance
this.DoneAgentOutput = new AgentOutput({ action: [this.DoneActionModel] });
}
_set_tool_calling_method() {
const tool_calling_method = this.settings.tool_calling_method;
if (tool_calling_method === 'auto') {
if (this.model_name?.includes('deepseek-reasoner') || this.model_name?.includes('deepseek-r1')) {
return 'raw';
}
else if (this.chat_model_library === 'ChatGoogleGenerativeAI') {
return null;
}
else if (this.chat_model_library === 'ChatOpenAI') {
return 'function_calling';
}
else if (this.chat_model_library === 'AzureChatOpenAI') {
return 'function_calling';
}
else {
return null;
}
}
else {
return tool_calling_method;
}
}
THINK_TAGS = /<think>.*?<\/think>/gs;
STRAY_CLOSE_TAG = /^.*?<\/think>/s;
_remove_think_tags(text) {
// Step 1: Remove well-formed <think>...</think>
logger.debug(`Removing think tags from text (length: ${text.length})`);
text = text.replace(this.THINK_TAGS, '');
// Step 2: If there's an unmatched closing tag </think>,
// remove everything up to and including that.
text = text.replace(this.STRAY_CLOSE_TAG, '');
return text.trim();
}
_convert_input_messages(input_messages) {
logger.debug(`Converting ${input_messages.length} input messages for model: ${this.model_name}`);
if (this.model_name === 'deepseek-reasoner' || this.model_name.includes('deepseek-r1')) {
logger.debug(`Using specialized conversion for ${this.model_name}`);
return convertInputMessages(input_messages, this.model_name);
}
else {
return input_messages;
}
}
extract_json_from_model_output(content) {
logger.debug(`Attempting to extract JSON from model output (length: ${content.length})`);
try {
const result = JSON.parse(content);
logger.debug(`Successfully parsed JSON from model output`);
return result;
}
catch (e) {
logger.error(`Failed to parse JSON from model output: ${e.message}`);
throw new Error(`Failed to parse JSON from model output: ${content}`);
}
}
async get_next_action(input_messages) {
const jsonSchema = this.AgentOutput.toJson();
logger.debug(`Getting next action with ${input_messages.length} input messages`);
await this._set_model_names();
input_messages = this._convert_input_messages(input_messages);
let response;
let parsed = null;
if (this.tool_calling_method === 'raw') {
logger.debug(`Using raw tool calling method`);
const output = await this.llm.invoke(input_messages);
// TODO: currently invoke does not return reasoning_content, we should override invoke
const content = this._remove_think_tags(String(output.content));
try {
logger.debug(`Parsing raw model output`);
const parsed_json = this.extract_json_from_model_output(content);
parsed = new AgentOutput({ action: [parsed_json] });
}
catch (e) {
logger.warn(`Failed to parse model output: ${e.message}`);
throw new Error('Could not parse response.');
}
}
else if (this.tool_calling_method === null) {
logger.debug(`Using structured output without specific tool calling method`);
const structured_llm = this.llm.withStructuredOutput(jsonSchema, { includeRaw: true });
response = await structured_llm.invoke(input_messages);
parsed = response['parsed'];
}
else {
logger.debug(`Using structured output with ${this.tool_calling_method} tool calling method`);
const structured_llm = this.llm.withStructuredOutput(jsonSchema, {
includeRaw: true,
method: this.tool_calling_method
});
response = await structured_llm.invoke(input_messages);
if (response.parsed.length <= 0 || response.raw?.tool_calls) {
const raw_message = response.raw;
if (raw_message.tool_calls && raw_message.tool_calls.length > 0) {
const tool_call = raw_message.tool_calls[0];
// Get action data which might be in different formats
const actionData = tool_call.args.action;
// For object format like [ { search: { query: 'text' } } ]
if (Array.isArray(actionData) && actionData.length > 0) {
const actionObj = actionData[0];
// Get the first key as the action name
const tool_call_name = Object.keys(actionObj)[0];
// Get the parameters from the value of that key
const tool_call_args = actionObj[tool_call_name];
logger.debug(`Found array action: ${tool_call_name}`);
logger.debug(`With args: ${JSON.stringify(tool_call_args)}`);
logger.success(`🤖🤖${tool_call_name}===>${JSON.stringify(tool_call_args)}`);
// Create proper state and action models
const current_state = new AgentBrain({
evaluation_previous_goal: "Executing action",
memory: "Using Tool call",
next_goal: `Execute ${tool_call_name}`
});
// Create a proper ActionModel instance
const action_schema = this.controller.registry.getActions()[tool_call_name];
const action = new ActionModel({ [tool_call_name]: action_schema.paramModel }, tool_call_args);
parsed = new AgentOutput({
current_state: current_state,
action: [action]
});
}
// Handle direct object format with name/args
else if (typeof actionData === 'object' && actionData !== null) {
const tool_call_name = tool_call.args.action.name || Object.keys(actionData)[0];
const tool_call_args = tool_call.args.action.args || actionData[tool_call_name];
logger.debug(`Tool call name: ${tool_call_name}`);
logger.debug(`Tool call args: ${JSON.stringify(tool_call_args)}`);
logger.success(`🤖🤖${tool_call_name}===>${JSON.stringify(tool_call_args)}`);
const current_state = new AgentBrain({
evaluation_previous_goal: "Executing action",
memory: "Using Tool call",
next_goal: `Execute ${tool_call_name}`
});
// Create a proper ActionModel instance
const action_schema = this.controller.registry.getActions()[tool_call_name];
const action = new ActionModel({ [tool_call_name]: action_schema.paramModel }, tool_call_args);
parsed = new AgentOutput({
current_state: current_state,
action: [action]
});
}
else {
logger.error(`Unsupported action format: ${JSON.stringify(actionData)}`);
}
}
else {
logger.error(`No tool calls found in response`);
}
}
else {
parsed = null;
}
}
if (parsed === null) {
logger.error(`Could not parse model response after all attempts`);
throw new Error('Could not parse response.');
}
// cut the number of actions to max_actions_per_step if needed
if (parsed.action && parsed.action.length > this.settings.max_actions_per_step) {
logger.warn(`Limiting actions from ${parsed.action.length} to max ${this.settings.max_actions_per_step}`);
parsed.action = parsed.action.slice(0, this.settings.max_actions_per_step);
}
logger.debug(`Successfully got next action with ${parsed.action?.length || 0} actions`);
return parsed;
}
async rerun_history(history, max_retries = 1, skip_failures = true, delay_between_actions = 2.0) {
/*
Rerun a saved history of actions with error handling and retry logic.
Args:
history: The history to replay
max_retries: Maximum number of retries per action
skip_failures: Whether to skip failed actions or stop execution
delay_between_actions: Delay between actions in seconds
Returns:
List of action results
*/
// Execute initial actions if provided
if (this.initial_actions) {
logger.debug(`Executing ${this.initial_actions.length} initial actions before replay`);
const result = await this.multi_act(this.initial_actions);
this.state.last_result = result;
}
const results = [];
logger.info(`Starting history replay with ${history.history.length} steps`);
for (let i = 0; i < history.history.length; i++) {
const history_item = history.history[i];
const goal = history_item.model_output ? history_item.model_output.current_state.next_goal : '';
logger.info(`Replaying step ${i + 1}/${history.history.length}: goal: ${goal}`);
if (!history_item.model_output
|| !history_item.model_output.action
|| history_item.model_output.action[0] === null) {
logger.error(`Step ${i + 1}: No action to replay, skipping`);
results.push(new ActionResult({ error: 'No action to replay' }));
continue;
}
let retry_count = 0;
logger.debug(`Step ${i + 1}: Will attempt execution up to ${max_retries} times if needed`);
while (retry_count < max_retries) {
try {
logger.debug(`Step ${i + 1}: Execution attempt ${retry_count + 1}`);
const result = await this._execute_history_step(history_item, delay_between_actions);
logger.debug(`Step ${i + 1}: Execution successful, got ${result.length} results`);
results.push(...result);
break;
}
catch (e) {
retry_count += 1;
logger.warn(`Step ${i + 1}: Execution attempt ${retry_count} failed: ${e.message}`);
if (retry_count === max_retries) {
const error_msg = `Step ${i + 1} failed after ${max_retries} attempts: ${e.message}`;
logger.error(error_msg);
if (!skip_failures) {
results.push(new ActionResult({ error: error_msg }));
throw new Error(error_msg);
}
}
else {
logger.error(`Step ${i + 1} failed (attempt ${retry_count}/${max_retries}), retrying...`);
await new Promise(resolve => setTimeout(resolve, delay_between_actions * 1000));
}
}
}
}
return results;
}
async _execute_history_step(history_item, delay) {
/*Execute a single step from history with element validation*/
logger.debug(`Executing history step with delay: ${delay}s`);
const state = await this.browser_context?.get_state();
if (!state || !history_item.model_output) {
logger.error('Invalid state or model output in _execute_history_step');
throw new Error('Invalid state or model output');
}
logger.debug(`Current page URL: ${state.url}, attempting to match ${history_item.model_output.action.length} actions`);
const updated_actions = [];
for (let i = 0; i < history_item.model_output.action.length; i++) {
const action = history_item.model_output.action[i];
logger.debug(`Processing action ${i + 1}/${history_item.model_output.action.length}: ${action.constructor.name}`);
const element = history_item?.state?.interacted_element;
const updated_action = await this._update_action_indices(element?.[i] ?? null, action, state);
if (updated_action === null) {
logger.error(`Failed to find matching element for action ${i + 1} in current page state`);
throw new Error(`Could not find matching element ${i} in current page`);
}
logger.debug(`Successfully updated action ${i + 1} indices`);
updated_actions.push(updated_action);
}
logger.debug(`Executing ${updated_actions.length} updated actions`);
const result = await this.multi_act(updated_actions);
logger.debug(`Action execution complete, got ${result.length} results`);
logger.debug(`Waiting for ${delay}s before next step`);
await new Promise(resolve => setTimeout(resolve, delay * 1000));
return result;
}
async _update_action_indices(historical_element, action, // Type this properly based on your action model
current_state) {
/*
Update action indices based on current page state.
Returns updated action or null if element cannot be found.
*/
logger.debug(`Updating action indices for action type: ${action.constructor.name}`);
if (!historical_element || !current_state.elementTree) {
logger.debug(`No historical element or current element tree, returning original action`);
return action;
}
logger.debug(`Searching for historical element in current page DOM structure`);
const current_element = HistoryTreeProcessor.findHistoryElementInTree(historical_element, current_state.elementTree);
if (!current_element || current_element.highlightIndex === null) {
logger.warn(`Could not find matching element in current page state`);
return null;
}
logger.debug(`Found matching element with highlight index: ${current_element.highlightIndex}`);
const old_index = action.getIndex();
if (old_index !== current_element.highlightIndex) {
action.setIndex(current_element.highlightIndex);
logger.info(`Element moved in DOM, updated index from ${old_index} to ${current_element.highlightIndex}`);
}
else {
logger.debug(`Element position unchanged, index remains: ${old_index}`);
}
return action;
}
save_history(file_path) {
/*Save the history to a file*/
if (!file_path) {
file_path = 'AgentHistory.json';
}
logger.debug(`Saving agent history to file: ${file_path}`);
try {
this.state.history.saveToFile(file_path);
logger.info(`✅ Successfully saved history to ${file_path}`);
}
catch (error) {
logger.error(`Failed to save history to ${file_path}: ${error.message}`);
}
}
pause() {
/*Pause the agent before the next step*/
logger.info('🔄 pausing Agent ');
this.state.paused = true;
logger.debug(`Agent state updated: paused=${this.state.paused}`);
}
resume() {
/*Resume the agent*/
logger.info('▶️ Agent resuming');
this.state.paused = false;
logger.debug(`Agent state updated: paused=${this.state.paused}`);
}
stop() {
/*Stop the agent from executing any more steps*/
logger.info('🛑 Agent stopping');
this.state.stopped = true;
logger.debug(`Agent state updated: stopped=${this.state.stopped}`);
}
_convert_initial_actions(actions) {
/*Convert dictionary-based actions to ActionModel instances*/
const converted_actions = [];
let action_model = this.ActionModel;
for (const action_dict of actions) {
// Each action_dict should have a single key-value pair
const action_name = Object.keys(action_dict)[0];
const params = action_dict[action_name];
// Get the parameter model for this action from registry
const action_info = this.controller.registry.getActions()[action_name];
const param_model = action_info.paramModel;
// Create validated parameters using the appropriate param model
const validated_params = param_model.parse(params);
// Create ActionModel instance with the validated parameters
action_model = new ActionModel({ [action_name]: validated_params }, validated_params);
converted_actions.push(action_model);
}
return converted_actions;
}
async _run_planner() {
logger.info('Running planner');
/*Run the planner to analyze state and suggest next steps*/
// Skip planning if no planner_llm is set
if (!this.settings.planner_llm) {
return null;
}
// Create planner message history using full message history
const planner_messages = [
new PlannerPrompt(this.controller.registry.getPromptDescription()).getSystemMessage(),
...this._message_manager.get_messages().slice(1), // Use full message history except the first
];
if (!this.settings.use_vision_for_planner && this.settings.use_vision) {
const last_state_message = planner_messages[planner_messages.length - 1];
// remove image from last state message
let new_msg = '';
if (Array.isArray(last_state_message.content)) {
for (const msg of last_state_message.content) {
if (msg['type'] === 'text') {
new_msg += msg['text'];
}
else if (msg['type'] === 'image_url') {
continue;
}
}
}
else {
new_msg = last_state_message.content;
}
planner_messages[planner_messages.length - 1] = new HumanMessage({ content: new_msg });
}
const processed_planner_messages = convertInputMessages(planner_messages, this.settings.planner_llm.name);
// Get planner output
const response = await this.settings.planner_llm.invoke(processed_planner_messages);
let plan = String(response.content);
// if deepseek-reasoner, remove think tags
if (this.settings.planner_llm.name && (this.settings.planner_llm.name.includes('deepseek-r1') || this.settings.planner_llm.name.includes('deepseek-reasoner'))) {
plan = this._remove_think_tags(plan);
}
try {
const plan_json = JSON.parse(plan);
logger.info(`Planning Analysis:\n${JSON.stringify(plan_json, null, 4)}`);
}
catch (e) {
if (e instanceof SyntaxError) {
logger.info(`Planning Analysis:\n${plan}`);
}
else {
logger.debug(`Error parsing planning analysis: ${e.message}`);
logger.info(`Plan: ${plan}`);
}
}
return plan;
}
get message_manager() {
return this._message_manager;
}
async close() {
/*Close all resources*/
try {
// First close browser resources
if (this.browser_context && !this.injected_browser_context) {
await this.browser_context.close();
}
if (this.browser && !this.injected_browser) {
await this.browser.close();
}
}
catch (e) {
logger.error(`Error during cleanup: ${e.message}`);
}
}
}
//# sourceMappingURL=agentClass.js.map