browser-use-typescript
Version:
A TypeScript-based browser automation framework
1,214 lines (1,032 loc) • 46.9 kB
text/typescript
import { BaseChatModel } from "@langchain/core/language_models/chat_models";
import { HumanMessage, BaseMessage, AIMessage, SystemMessage } from "@langchain/core/messages";
import {Browser} from "../../browser/playwrightBrowser/browserService";
import { BrowserContext } from "../../browser/playwrightBrowser/browserContext";
import { Controller } from "../../controller/controllerContext";
import { RateLimitError } from "../types";
import {
ActionResult,
AgentError,
AgentHistory,
AgentHistoryList,
AgentOutput,
AgentBrain,
AgentSettings,
AgentState,
StepMetadata,
ToolCallingMethod,
ValidationError
} from "../types";
import { DOMHistoryElement } from "../../domHIstory/historyTypes";
import { AgentStepInfo } from "../types";
import { convertInputMessages } from "../message_manager/utils";
import { BrowserState,BrowserStateHistory } from "../../browser/playwrightBrowser/type";
import { MessageManager } from "../message_manager/services";
import { AgentMessagePrompt, PlannerPrompt, SystemPrompt } from "../prompt";
import { ActionModel, RegisteredAction } from "../../controller/registry/types";
import { HistoryTreeProcessor } from "../../domHIstory/historyTypes";
class Logger {
private debugEnabled = false;
private isDebugger = false;
// ANSI color codes
private colors = {
// Text colors
reset: "\x1b[0m",
white: "\x1b[37m",
bold: "\x1b[1m",
italic: "\x1b[3m",
// Background colors
bgRed: "\x1b[41m",
bgPurple: "\x1b[45m",
bgBlue: "\x1b[44m",
bgYellow: "\x1b[43m",
bgGreen: "\x1b[42m"
};
constructor(debugEnabled = true, isDebugger = false) {
this.debugEnabled = debugEnabled;
this.isDebugger = isDebugger;
}
error(message: string) {
if (this.debugEnabled) {
console.error(`${this.colors.bgRed}${this.colors.white}ERROR: ${message}${this.colors.reset}`);
}
}
log(message: string) {
if (this.debugEnabled) {
console.log(`${this.colors.bgBlue}${this.colors.white}LOG: ${message}${this.colors.reset}`);
}
}
debug(message: string) {
if (this.debugEnabled) {
console.log(`${this.colors.bgPurple}${this.colors.white}DEBUG: ${message}${this.colors.reset}`);
}
}
info(message: string) {
if (this.debugEnabled) {
console.log(`${this.colors.bgGreen}${this.colors.white}INFO: ${message}${this.colors.reset}`);
}
}
warn(message: string) {
if (this.debugEnabled) {
console.warn(`${this.colors.bgYellow}${this.colors.white}WARN: ${message}${this.colors.reset}`);
}
}
trace(message: string) {
if (this.debugEnabled) {
console.trace(`${this.colors.bgBlue}${this.colors.white}TRACE: ${message}${this.colors.reset}`);
}
}
success(message: string) {
if (!this.isDebugger) {
console.log(`${this.colors.bold}${this.colors.bgGreen}${this.colors.white}SUCCESS: ${message}${this.colors.reset}`);
}
}
debuggerError(message: string) {
if (!this.isDebugger) {
console.log(`${this.colors.italic}${this.colors.bgRed}${this.colors.white}ERROR: ${message}${this.colors.reset}`);
}
}
}
export const logger = new Logger(false, false); // Default to debug disabled and debugger disabled
// Type for the callback functions
type NewStepCallback =
| ((browserState: BrowserState, agentOutput: AgentOutput, step: number) => void)
| ((browserState: BrowserState, agentOutput: AgentOutput, step: number) => Promise<void>)
| null;
type DoneCallback =
| ((agentHistoryList: AgentHistoryList) => Promise<void>)
| ((agentHistoryList: AgentHistoryList) => void)
| null;
type ExternalAgentStatusCallback = (() => Promise<boolean>) | null;
// Generic type to match Python's implementation
export class Agent<Context = any> {
// Core components
task: string;
llm: BaseChatModel;
controller: Controller<Context>;
sensitive_data: Record<string, string> | null;
plannerModelName:string=""
// Settings
settings: AgentSettings;
// State
state: AgentState;
// Models and Actions
available_actions: string;
tool_calling_method: ToolCallingMethod;
initial_actions?:ActionModel[]
chat_model_library?: string;
model_name?: string;
browser_use_version?: string;
browser_use_source?: string;
// Message Management
private _message_manager: MessageManager;
// Browser
browser: Browser | null;
browser_context: BrowserContext | null;
injected_browser: boolean;
injected_browser_context: boolean;
// Callbacks
register_new_step_callback: NewStepCallback;
register_done_callback: DoneCallback;
register_external_agent_status_raise_error_callback: ExternalAgentStatusCallback;
// Context
context: Context | null;
DoneActionModel: any;
DoneAgentOutput:AgentOutput | undefined;
// Telemetry
// Replace with actual ProductTelemetry when implemented
ActionModel:ActionModel | undefined
AgentOutput: AgentOutput | undefined;
/**
* Agent constructor
* Equivalent to Python's __init__ method
*/
constructor(
task: string,
llm: BaseChatModel,
// Optional parameters
browser: Browser | null = null,
browser_context: BrowserContext | null = null,
controller: Controller<Context> = new Controller<Context>(),
// Initial agent run parameters
sensitive_data: Record<string, string> | null = null,
initial_actions: Array<Record<string, Record<string, any>>> | null = null,
// Cloud Callbacks
register_new_step_callback: NewStepCallback = null,
register_done_callback: DoneCallback = null,
register_external_agent_status_raise_error_callback: ExternalAgentStatusCallback = null,
// Agent settings
use_vision: boolean = true,
use_vision_for_planner: boolean = false,
save_conversation_path?: string | null,
save_conversation_path_encoding?: string | null,
max_failures: number = 3,
retry_delay: number = 10,
override_system_message?: string | null,
extend_system_message?: string | null,
max_input_tokens: number = 128000,
validate_output: boolean = false,
message_context?: string | null,
generate_gif: boolean | string = false,
available_file_paths?: string[] | null,
include_attributes: string[] = [
'title',
'type',
'name',
'role',
'aria-label',
'placeholder',
'value',
'alt',
'aria-expanded',
'data-date-format',
],
max_actions_per_step: number = 10,
tool_calling_method: ToolCallingMethod | null = null ,
page_extraction_llm: BaseChatModel | null = null,
planner_llm: BaseChatModel | null = null,
planner_interval: number = 1, // Run planner every N steps
// Inject state
injected_agent_state: AgentState | null = null,
//
context: Context | null = null,
) {
const finalPageExtractionLlm = page_extraction_llm || llm;
// Core components
this.task = task;
this.llm = llm;
this.controller = controller;
this.sensitive_data = sensitive_data;
this.settings = new AgentSettings({
use_vision,
use_vision_for_planner,
save_conversation_path: save_conversation_path ?? undefined,
save_conversation_path_encoding: save_conversation_path_encoding ?? undefined,
max_failures,
retry_delay,
override_system_message: override_system_message ?? undefined,
extend_system_message: extend_system_message ?? undefined,
max_input_tokens,
validate_output,
message_context: message_context ?? undefined,
generate_gif,
available_file_paths: available_file_paths ?? undefined,
include_attributes,
max_actions_per_step,
tool_calling_method: tool_calling_method || 'auto',
page_extraction_llm: finalPageExtractionLlm,
planner_llm: planner_llm ?? undefined,
planner_interval,
});
// Initialize state
this.state = injected_agent_state || new AgentState();
// Action setup
this._setup_action_models();
this.initial_actions = initial_actions ? this._convert_initial_actions(initial_actions) : undefined;
// Model setup
this._set_model_names();
// For models without tool calling, add available actions to context
this.available_actions = this.controller.registry.getPromptDescription();
this.tool_calling_method = this._set_tool_calling_method() || 'auto' ;
this.settings.message_context = this._set_message_context() || undefined;
// Initialize message manager with state
this._message_manager = new MessageManager(
task,
new SystemPrompt(
this.available_actions,
this.settings.max_actions_per_step,
override_system_message??undefined,
extend_system_message??undefined
).getSystemMessage(),
{
maxTokens: this.settings.max_input_tokens,
numChatTurnsToKeep:10,
imageTokens:800,
includeAttributes: this.settings.include_attributes,
messageContext: this.settings.message_context,
sensitiveData:this.sensitive_data||undefined,
availableFilePath: this.settings.available_file_paths,
},
this.state.message_manager_state
);
// Browser setup
this.injected_browser = browser !== null;
this.injected_browser_context = browser_context !== null;
if (browser_context) {
this.browser = browser;
this.browser_context = browser_context;
} else {
this.browser = browser || new Browser();
this.browser_context = new BrowserContext();
}
// Callbacks
this.register_new_step_callback = register_new_step_callback;
this.register_done_callback = register_done_callback;
this.register_external_agent_status_raise_error_callback = register_external_agent_status_raise_error_callback;
// Context
this.context = context;
// Log conversation path if set
if (this.settings.save_conversation_path) {
console.info(`Saving conversation to ${this.settings.save_conversation_path}`);
}
}
async take_step(): Promise<[boolean, boolean]> {
this.step();
if (this.state.history.isDone()) {
if (this.settings.validate_output) {
if (!await this._validate_output()) {
return [true, false];
}
}
if (this.register_done_callback) {
if (typeof this.register_done_callback === 'function') {
await this.register_done_callback(this.state.history);
}
}
return [true, true];
}
return [false, false];
}
async run(this: Agent, max_steps: number = 100) {
try {
if (this.initial_actions) {
const result = await this.multi_act(this.initial_actions,);
this.state.last_result = result;
}
for (let step = 0; step < max_steps; step++) {
if (this.state.consecutive_failures >= this.settings.max_failures) {
logger.error(`❌ Stopping due to ${this.settings.max_failures} consecutive failures`);
break;
}
if (this.state.stopped) {
logger.info('Agent stopped');
break;
}
while (this.state.paused) {
await new Promise(resolve => setTimeout(resolve, 200));
if (this.state.stopped) {
break;
}
}
const step_info = new AgentStepInfo( step, max_steps );
await this.step(step_info);
if (this.state.history.isDone()) {
logger.info('✅ Task completed');
if (this.settings.validate_output && step < max_steps - 1) {
if (!await this._validate_output()) {
continue;
}
}
break;
}
}
logger.info('❌ Failed to complete task in maximum steps');
return this.state.history;
} catch (e:any) {
logger.error(e);
}
}
async multi_act(
this: Agent,
actions:ActionModel[],
check_for_new_elements: boolean = true
): Promise<ActionResult[]> {
const results: ActionResult[] = [];
const cached_selector_map = await this.browser_context!.get_selector_map()||undefined;
const cached_path_hashes = new Set(
Array.from(Object.values(cached_selector_map)).map(e => e.hash.branch_path_hash)
);
await this.browser_context!.remove_highlights();
for (let i = 0; i < actions.length; i++) {
const action = actions[i];
if (action.getIndex() !== null && i !== 0) {
const new_state = await this.browser_context!.get_state();
const new_path_hashes = new Set(
Array.from(Object.values(new_state.selectorMap)).map((e:any) => e.hash.branch_path_hash)
);
if (check_for_new_elements && !new_path_hashes.has(cached_path_hashes)) {
const msg = `Something new appeared after action ${i} / ${actions.length}`;
logger.info(msg);
results.push(new ActionResult( {extractedContent: msg, includeInMemory: true } ));
break;
}
}
await this._raise_if_stopped_or_paused();
const result = await this.controller.act(
action,
this.browser_context!,
this.settings.page_extraction_llm,
this.sensitive_data||undefined,
this.settings.available_file_paths,
this.context
);
results.push(result);
logger.debug(`Executed action ${i + 1} / ${actions.length}`);
if (results[results.length - 1].isDone || results[results.length - 1].error || i === actions.length - 1) {
break;
}
await new Promise(resolve => setTimeout(resolve, this.browser_context!.config.wait_between_actions));
}
return results;
}
async _validate_output(): Promise<boolean> {
const system_msg =
'You are a validator of an agent who interacts with a browser. ' +
'Validate if the output of last action is what the user wanted and if the task is completed. ' +
'If the task is unclear defined, you can let it pass. But if something is missing or the image does not show what was requested dont let it pass. ' +
'Try to understand the page and help the model with suggestions like scroll, do x, ... to get the solution right. ' +
`Task to validate: ${this.task}. Return a JSON object with 2 keys: is_valid and reason. ` +
'is_valid is a boolean that indicates if the output is correct. ' +
'reason is a string that explains why it is valid or not.' +
' example: {"is_valid": false, "reason": "The user wanted to search for \\"cat photos\\", but the agent searched for \\"dog photos\\" instead."}';
if (this.browser_context!.session) {
const state = await this.browser_context!.get_state();
const content = new AgentMessagePrompt(
state,
this.state.last_result || undefined,
this.settings.include_attributes
);
const msg = [new SystemMessage({ content: system_msg }), content.getUserMessage(this.settings.use_vision)];
class ValidationResult {
is_valid: boolean|undefined;
reason: string|undefined;
}
const validator = this.llm.withStructuredOutput(ValidationResult, {"includeRaw": true});
const response = await validator.invoke(msg);
const parsed = response['parsed'] as ValidationResult;
const is_valid = parsed.is_valid;
if (!is_valid) {
logger.info(`❌ Validator decision: ${parsed.reason}`);
const msg = `The output is not yet correct. ${parsed.reason}.`;
this.state.last_result = [new ActionResult({ extractedContent: msg, includeInMemory: true })];
} else {
logger.info(`✅ Validator decision: ${parsed.reason}`);
}
return is_valid!;
} else {
return true;
}
}
add_new_task(new_task: string): void {
this._message_manager.add_new_task(new_task);
}
async _raise_if_stopped_or_paused(): Promise<void> {
if (this.register_external_agent_status_raise_error_callback) {
if (await this.register_external_agent_status_raise_error_callback()) {
throw new Error("InterruptedError");
}
}
if (this.state.stopped || this.state.paused) {
logger.debug('Agent paused');
throw new Error("InterruptedError");
}
}
async step(this: Agent,step_info?: AgentStepInfo): Promise<void> {
logger.info(`📍 Current step`);
let state : BrowserState | null = null ;
let model_output: AgentOutput| null = null;
let result: ActionResult[] = [];
const step_start_time = Date.now();
let tokens = 0;
try {
state = await this.browser_context!.get_state();
logger.debug("Browser COntext checked 1/5 ");
await this._raise_if_stopped_or_paused();
logger.debug("Resuming 2/5");
this._message_manager.add_state_message(state!, this.state.last_result, step_info!, this.settings.use_vision);
logger.debug("State message added 3/5");
if (this.settings.planner_llm && this.state.n_steps % this.settings.planner_interval == 0) {
const plan = await this._run_planner();
this._message_manager.add_plan(plan, -1);
logger.debug("Plan added 4/5");
}
if (step_info && step_info.is_last_step()) {
let msg = 'Now comes your last step. Use only the "done" action now. No other actions - so here your action sequence must have length 1.';
msg += '\nIf the task is not yet fully finished as requested by the user, set success in "done" to false! E.g. if not all steps are fully completed.';
msg += '\nIf the task is fully finished, set success in "done" to true.';
msg += '\nInclude everything you found out for the ultimate task in the done text.';
logger.info('Last step finishing up');
this._message_manager._add_message_with_tokens(new HumanMessage({content: msg}));
}
const input_messages = this._message_manager.get_messages();
tokens = this._message_manager.state.history.current_tokens;
try {
logger.info(`Requesting next action from model at current step`);
model_output = await this.get_next_action(input_messages);
logger.info(`🪜🪜 Model Output Receivved ${model_output.action}, ${model_output.current_state} `);
logger.info(`Successfully received model output with ${model_output?.action?.length || 0} actions`);
this.state.n_steps += 1;
this._message_manager._remove_last_state_message();
await this._raise_if_stopped_or_paused();
if (model_output) {
logger.info(`Adding model output to message manager`);
this._message_manager.add_model_output(model_output);
} else {
logger.warn(`Model output is null, skipping add to message manager`);
}
} catch (e:any) {
logger.error(`Error getting next action: ${e.message}`);
this._message_manager._remove_last_state_message();
throw e;
}
logger.debug(`Executing actions from model output`);
if (model_output && model_output.action) {
result = await this.multi_act(model_output.action);
logger.info(`Completed ${result.length} actions`);
} else {
logger.warn(`No actions to execute, model output is null or has no actions`);
result = [];
}
this.state.last_result = result;
if (result.length > 0 && result[result.length-1].isDone) {
logger.info(`📄 Result: ${result[result.length-1].extractedContent}`);
}
this.state.consecutive_failures = 0;
} catch (e) {
if ((e as Error).message === "InterruptedError") {
logger.info ('Agent paused');
this.state.last_result = [
new ActionResult({
error: 'The agent was paused - now continuing actions might need to be repeated', includeInMemory: true
})
];
return;
} else {
result = await this._handle_step_error(e as Error);
this.state.last_result = result;
}
} finally {
if (!result) {
result = [];
}
if (state) {
const metadata = new StepMetadata(
this.state.n_steps,
step_start_time,
new Date().getTime(),
tokens,
);
this._make_history_item(model_output, state, result, metadata);
}
}
}
async _handle_step_error(error: Error): Promise<ActionResult[]> {
let error_msg = AgentError.formatError(error)
const prefix = `❌ Result failed ${this.state.consecutive_failures + 1}/${this.settings.max_failures} times:\n `;
if (error instanceof ValidationError || error instanceof EvalError) {
logger.error(`${prefix}${error_msg}`);
if (error_msg.includes('Max token limit reached')) {
this._message_manager.settings.maxTokens = this.settings.max_input_tokens - 500;
logger.info(
`Cutting tokens from history - new max input tokens: ${this._message_manager.settings.maxTokens}`
);
this._message_manager.cut_messages();
} else if (error_msg.includes('Could not parse response')) {
error_msg += '\n\nReturn a valid JSON object with the required fields.';
}
this.state.consecutive_failures += 1;
} else {
if (error instanceof RateLimitError|| (error.message.includes('429')&&error.message.includes('quota'))) {
logger.debuggerError(`Your API key has reached its rate limit. Please buy more credits and try again later.${prefix}`);
await new Promise(resolve => setTimeout(resolve, this.settings.retry_delay));
this.state.consecutive_failures += 1;
} else {
logger.debuggerError(`${prefix}${error_msg}`);
this.state.consecutive_failures += 1;
}
}
return [new ActionResult({error: error_msg, includeInMemory: true})];
}
async _make_history_item(
model_output: AgentOutput | null,
state: BrowserState,
result: ActionResult[],
metadata?: StepMetadata,
): Promise<void> {
logger.debug(`Creating history item for current agent step`);
let interacted_elements:DOMHistoryElement[];
if (model_output) {
logger.debug(`Processing model output with ${model_output.action.length} actions for history`);
interacted_elements = (await AgentHistory.getInteractedElement(model_output, state.selectorMap))! as DOMHistoryElement[] || [];
logger.debug(`Found ${interacted_elements.filter(el => el !== null).length} interacted elements`);
} else {
logger.debug(`No model output, using null placeholder for interacted elements`);
interacted_elements = [];
}
logger.debug(`Creating browser state history for URL: ${state.url}`);
const state_history = new BrowserStateHistory(
state.url,
state.title,
state.tab,
interacted_elements,
state.screenshot,
);
const history_item = new AgentHistory(model_output, result, state_history, metadata);
logger.debug(`History item created with ${result.length} action results`);
this.state.history.history.push(history_item);
logger.info(`Added history item to agent history (total: ${this.state.history.history.length} items)`);
}
_set_message_context(): string | null {
logger.debug(`Setting message context for tool calling method: ${this.tool_calling_method}`);
if (this.tool_calling_method === 'raw') {
if (this.settings.message_context) {
logger.debug(`Appending available actions to existing message context`);
this.settings.message_context += `\n\nAvailable actions: ${this.available_actions}`;
} else {
logger.debug(`Creating new message context with available actions`);
this.settings.message_context = `Available actions: ${this.available_actions}`;
}
}
logger.debug(`Final message context set (${this.settings.message_context ? 'exists' : 'null'})`);
return this.settings.message_context!;
}
_set_model_names(): void {
logger.debug(`Setting model names from LLM instance`);
this.chat_model_library = this.llm.constructor.name;
logger.debug(`Chat model library set to: ${this.chat_model_library}`);
this.model_name = this.llm.name;
if ('model_name' in this.llm) {
const model = this.llm;
this.model_name = model !== null ? model.getName() : undefined;
logger.debug(`Model name set from model_name property: ${this.model_name}`);
} else if ('model' in this.llm) {
const model = this.llm;
this.model_name = model.name !== null ? model.getName() : 'Unknown';
logger.debug(`Model name set from model.name property: ${this.model_name}`);
} else {
logger.warn(`Could not determine model name from LLM instance, using default: ${this.model_name}`);
}
if (this.settings.planner_llm) {
logger.debug(`Planner LLM is set, determining planner model name`);
if ('model_name' in this.settings.planner_llm) {
this.plannerModelName = this.settings.planner_llm.getName();
logger.debug(`Planner model name set from model_name property: ${this.plannerModelName}`);
} else if ('model' in this.settings.planner_llm) {
this.plannerModelName = this.settings.planner_llm.name!;
logger.debug(`Planner model name set from model.name property: ${this.plannerModelName}`);
} else {
this.plannerModelName = "";
logger.warn(`Could not determine planner model name from planner LLM instance, using default: ${this.plannerModelName}`);
}
} else {
this.plannerModelName = "";
}
}
async _setup_action_models(): Promise<void> {
this.ActionModel = this.controller.registry.create_action_model({});
// Create output model with the dynamic actions
// Create a new class that extends AgentOutput instead of creating an instance
this.AgentOutput=new AgentOutput({action:[this.ActionModel!]})
// used to force the done action when max_steps is reached
this.DoneActionModel = this.controller.registry.create_action_model({ include_actions: ['done'] });
// Create a new class for DoneAgentOutput instead of using an instance
this.DoneAgentOutput = new AgentOutput({action:[this.DoneActionModel]})
}
_set_tool_calling_method(): ToolCallingMethod | null {
const tool_calling_method = this.settings.tool_calling_method;
if (tool_calling_method === 'auto') {
if (this.model_name?.includes('deepseek-reasoner') || this.model_name?.includes('deepseek-r1')) {
return 'raw';
} else if (this.chat_model_library === 'ChatGoogleGenerativeAI') {
return null;
} else if (this.chat_model_library === 'ChatOpenAI') {
return 'function_calling';
} else if (this.chat_model_library === 'AzureChatOpenAI') {
return 'function_calling';
} else {
return null;
}
} else {
return tool_calling_method as ToolCallingMethod;
}
}
THINK_TAGS = /<think>.*?<\/think>/gs;
STRAY_CLOSE_TAG = /^.*?<\/think>/s;
_remove_think_tags(this: any, text: string): string {
// Step 1: Remove well-formed <think>...</think>
logger.debug(`Removing think tags from text (length: ${text.length})`);
text = text.replace(this.THINK_TAGS, '');
// Step 2: If there's an unmatched closing tag </think>,
// remove everything up to and including that.
text = text.replace(this.STRAY_CLOSE_TAG, '');
return text.trim();
}
_convert_input_messages(this: any, input_messages: BaseMessage[]): BaseMessage[] {
logger.debug(`Converting ${input_messages.length} input messages for model: ${this.model_name}`);
if (this.model_name === 'deepseek-reasoner' || this.model_name.includes('deepseek-r1')) {
logger.debug(`Using specialized conversion for ${this.model_name}`);
return convertInputMessages(input_messages, this.model_name);
} else {
return input_messages;
}
}
extract_json_from_model_output(this: Agent, content: string): any {
logger.debug(`Attempting to extract JSON from model output (length: ${content.length})`);
try {
const result = JSON.parse(content);
logger.debug(`Successfully parsed JSON from model output`);
return result;
} catch (e:any) {
logger.error(`Failed to parse JSON from model output: ${e.message}`);
throw new Error(`Failed to parse JSON from model output: ${content}`);
}
}
async get_next_action(this: Agent, input_messages: BaseMessage[]): Promise<AgentOutput> {
const jsonSchema = this.AgentOutput!.toJson();
logger.debug(`Getting next action with ${input_messages.length} input messages`);
await this._set_model_names();
input_messages = this._convert_input_messages(input_messages);
let response:{
raw:AIMessage,
parsed:any
};
let parsed: AgentOutput | null = null;
if (this.tool_calling_method === 'raw') {
logger.debug(`Using raw tool calling method`);
const output = await this.llm.invoke(input_messages);
// TODO: currently invoke does not return reasoning_content, we should override invoke
const content = this._remove_think_tags(String(output.content));
try {
logger.debug(`Parsing raw model output`);
const parsed_json = this.extract_json_from_model_output(content);
parsed = new AgentOutput({action:[parsed_json]});
} catch (e:any) {
logger.warn(`Failed to parse model output: ${e.message}`);
throw new Error('Could not parse response.');
}
} else if (this.tool_calling_method === null) {
logger.debug(`Using structured output without specific tool calling method`);
const structured_llm = this.llm.withStructuredOutput(jsonSchema, { includeRaw: true });
response = await structured_llm.invoke(input_messages);
parsed = response['parsed'] as AgentOutput;
} else {
logger.debug(`Using structured output with ${this.tool_calling_method} tool calling method`);
const structured_llm = this.llm.withStructuredOutput(jsonSchema, {
includeRaw: true,
method: this.tool_calling_method
});
response = await structured_llm.invoke(input_messages);
if (response.parsed.length<=0|| response.raw?.tool_calls ) {
const raw_message=response.raw
if (raw_message.tool_calls && raw_message.tool_calls.length > 0) {
const tool_call = raw_message.tool_calls[0];
// Get action data which might be in different formats
const actionData = tool_call.args.action;
// For object format like [ { search: { query: 'text' } } ]
if (Array.isArray(actionData) && actionData.length > 0) {
const actionObj = actionData[0];
// Get the first key as the action name
const tool_call_name = Object.keys(actionObj)[0];
// Get the parameters from the value of that key
const tool_call_args = actionObj[tool_call_name];
logger.debug(`Found array action: ${tool_call_name}`);
logger.debug(`With args: ${JSON.stringify(tool_call_args)}`);
logger.success(`🤖🤖${tool_call_name}===>${JSON.stringify(tool_call_args)}`)
// Create proper state and action models
const current_state = new AgentBrain({
evaluation_previous_goal: "Executing action",
memory: "Using Tool call",
next_goal: `Execute ${tool_call_name}`
});
// Create a proper ActionModel instance
const action_schema = this.controller.registry.getActions()[tool_call_name];
const action = new ActionModel(
{[tool_call_name]: action_schema.paramModel},
tool_call_args
);
parsed = new AgentOutput({
current_state: current_state,
action: [action]
});
}
// Handle direct object format with name/args
else if (typeof actionData === 'object' && actionData !== null) {
const tool_call_name = tool_call.args.action.name || Object.keys(actionData)[0];
const tool_call_args = tool_call.args.action.args || actionData[tool_call_name];
logger.debug(`Tool call name: ${tool_call_name}`);
logger.debug(`Tool call args: ${JSON.stringify(tool_call_args)}`);
logger.success(`🤖🤖${tool_call_name}===>${JSON.stringify(tool_call_args)}`)
const current_state = new AgentBrain({
evaluation_previous_goal: "Executing action",
memory: "Using Tool call",
next_goal: `Execute ${tool_call_name}`
});
// Create a proper ActionModel instance
const action_schema = this.controller.registry.getActions()[tool_call_name];
const action = new ActionModel({[tool_call_name]: action_schema.paramModel}, tool_call_args);
parsed = new AgentOutput({
current_state: current_state,
action: [action]
});
}
else {
logger.error(`Unsupported action format: ${JSON.stringify(actionData)}`);
}
}
else {
logger.error(`No tool calls found in response`);
}
}
else {parsed=null}
}
if (parsed === null) {
logger.error(`Could not parse model response after all attempts`);
throw new Error('Could not parse response.');
}
// cut the number of actions to max_actions_per_step if needed
if (parsed.action && parsed.action.length > this.settings.max_actions_per_step) {
logger.warn(`Limiting actions from ${parsed.action.length} to max ${this.settings.max_actions_per_step}`);
parsed.action = parsed.action.slice(0, this.settings.max_actions_per_step);
}
logger.debug(`Successfully got next action with ${parsed.action?.length || 0} actions`);
return parsed;
}
async rerun_history(
history: AgentHistoryList,
max_retries: number = 1,
skip_failures: boolean = true,
delay_between_actions: number = 2.0,
): Promise<ActionResult[]> {
/*
Rerun a saved history of actions with error handling and retry logic.
Args:
history: The history to replay
max_retries: Maximum number of retries per action
skip_failures: Whether to skip failed actions or stop execution
delay_between_actions: Delay between actions in seconds
Returns:
List of action results
*/
// Execute initial actions if provided
if (this.initial_actions) {
logger.debug(`Executing ${this.initial_actions.length} initial actions before replay`);
const result = await this.multi_act(this.initial_actions);
this.state.last_result = result;
}
const results: ActionResult[] = [];
logger.info(`Starting history replay with ${history.history.length} steps`);
for (let i = 0; i < history.history.length; i++) {
const history_item = history.history[i];
const goal = history_item.model_output ? history_item.model_output.current_state.next_goal : '';
logger.info(`Replaying step ${i + 1}/${history.history.length}: goal: ${goal}`);
if (
!history_item.model_output
|| !history_item.model_output.action
|| history_item.model_output.action[0] === null
) {
logger.error(`Step ${i + 1}: No action to replay, skipping`);
results.push(new ActionResult({ error: 'No action to replay' }));
continue;
}
let retry_count = 0;
logger.debug(`Step ${i + 1}: Will attempt execution up to ${max_retries} times if needed`);
while (retry_count < max_retries) {
try {
logger.debug(`Step ${i + 1}: Execution attempt ${retry_count + 1}`);
const result = await this._execute_history_step(history_item, delay_between_actions);
logger.debug(`Step ${i + 1}: Execution successful, got ${result.length} results`);
results.push(...result);
break;
} catch (e:any) {
retry_count += 1;
logger.warn(`Step ${i + 1}: Execution attempt ${retry_count} failed: ${e.message}`);
if (retry_count === max_retries) {
const error_msg = `Step ${i + 1} failed after ${max_retries} attempts: ${(e as Error).message}`;
logger.error(error_msg);
if (!skip_failures) {
results.push(new ActionResult({ error: error_msg }));
throw new Error(error_msg);
}
} else {
logger.error(`Step ${i + 1} failed (attempt ${retry_count}/${max_retries}), retrying...`);
await new Promise(resolve => setTimeout(resolve, delay_between_actions * 1000));
}
}
}
}
return results;
}
async _execute_history_step(history_item: AgentHistory, delay: number): Promise<ActionResult[]> {
/*Execute a single step from history with element validation*/
logger.debug(`Executing history step with delay: ${delay}s`);
const state = await this.browser_context?.get_state();
if (!state || !history_item.model_output) {
logger.error('Invalid state or model output in _execute_history_step');
throw new Error('Invalid state or model output');
}
logger.debug(`Current page URL: ${state.url}, attempting to match ${history_item.model_output.action.length} actions`);
const updated_actions:ActionModel[] = [];
for (let i = 0; i < history_item.model_output.action.length; i++) {
const action = history_item.model_output.action[i];
logger.debug(`Processing action ${i+1}/${history_item.model_output.action.length}: ${action.constructor.name}`);
const element = history_item?.state?.interacted_element;
const updated_action = await this._update_action_indices(
element?.[i] ?? null,
action,
state,
);
if (updated_action === null) {
logger.error(`Failed to find matching element for action ${i+1} in current page state`);
throw new Error(`Could not find matching element ${i} in current page`);
}
logger.debug(`Successfully updated action ${i+1} indices`);
updated_actions.push(updated_action!);
}
logger.debug(`Executing ${updated_actions.length} updated actions`);
const result = await this.multi_act(updated_actions);
logger.debug(`Action execution complete, got ${result.length} results`);
logger.debug(`Waiting for ${delay}s before next step`);
await new Promise(resolve => setTimeout(resolve, delay * 1000));
return result;
}
async _update_action_indices(
historical_element: DOMHistoryElement | null,
action: ActionModel, // Type this properly based on your action model
current_state: BrowserState,
): Promise<ActionModel | null> {
/*
Update action indices based on current page state.
Returns updated action or null if element cannot be found.
*/
logger.debug(`Updating action indices for action type: ${action.constructor.name}`);
if (!historical_element || !current_state.elementTree) {
logger.debug(`No historical element or current element tree, returning original action`);
return action;
}
logger.debug(`Searching for historical element in current page DOM structure`);
const current_element = HistoryTreeProcessor.findHistoryElementInTree(historical_element, current_state.elementTree);
if (!current_element || current_element.highlightIndex === null) {
logger.warn(`Could not find matching element in current page state`);
return null;
}
logger.debug(`Found matching element with highlight index: ${current_element.highlightIndex}`);
const old_index = action.getIndex();
if (old_index !== current_element.highlightIndex) {
action.setIndex(current_element.highlightIndex);
logger.info(`Element moved in DOM, updated index from ${old_index} to ${current_element.highlightIndex}`);
} else {
logger.debug(`Element position unchanged, index remains: ${old_index}`);
}
return action;
}
save_history(file_path?: string): void {
/*Save the history to a file*/
if (!file_path) {
file_path = 'AgentHistory.json';
}
logger.debug(`Saving agent history to file: ${file_path}`);
try {
this.state.history.saveToFile(file_path);
logger.info(`✅ Successfully saved history to ${file_path}`);
} catch (error:any) {
logger.error(`Failed to save history to ${file_path}: ${error.message}`);
}
}
pause(): void {
/*Pause the agent before the next step*/
logger.info('🔄 pausing Agent ');
this.state.paused = true;
logger.debug(`Agent state updated: paused=${this.state.paused}`);
}
resume(): void {
/*Resume the agent*/
logger.info('▶️ Agent resuming');
this.state.paused = false;
logger.debug(`Agent state updated: paused=${this.state.paused}`);
}
stop(): void {
/*Stop the agent from executing any more steps*/
logger.info('🛑 Agent stopping');
this.state.stopped = true;
logger.debug(`Agent state updated: stopped=${this.state.stopped}`);
}
_convert_initial_actions(actions: Array<Record<string, Record<string, any>>>): ActionModel[] {
/*Convert dictionary-based actions to ActionModel instances*/
const converted_actions: ActionModel[] = [];
let action_model = this.ActionModel;
for (const action_dict of actions) {
// Each action_dict should have a single key-value pair
const action_name = Object.keys(action_dict)[0];
const params = action_dict[action_name];
// Get the parameter model for this action from registry
const action_info:any = this.controller.registry.getActions()[action_name]
const param_model = action_info.paramModel;
// Create validated parameters using the appropriate param model
const validated_params = param_model.parse(params);
// Create ActionModel instance with the validated parameters
action_model = new ActionModel(
{[action_name]:validated_params },
validated_params
);
converted_actions.push(action_model);
}
return converted_actions;
}
async _run_planner(): Promise<string | null> {
logger.info('Running planner');
/*Run the planner to analyze state and suggest next steps*/
// Skip planning if no planner_llm is set
if (!this.settings.planner_llm) {
return null;
}
// Create planner message history using full message history
const planner_messages = [
new PlannerPrompt(this.controller.registry.getPromptDescription()).getSystemMessage(),
...this._message_manager.get_messages().slice(1), // Use full message history except the first
];
if (!this.settings.use_vision_for_planner && this.settings.use_vision) {
const last_state_message: HumanMessage = planner_messages[planner_messages.length - 1];
// remove image from last state message
let new_msg = '';
if (Array.isArray(last_state_message.content)) {
for (const msg of last_state_message.content) {
if (msg['type'] === 'text') {
new_msg += msg['text'];
} else if (msg['type'] === 'image_url') {
continue;
}
}
} else {
new_msg = last_state_message.content;
}
planner_messages[planner_messages.length - 1] = new HumanMessage({ content: new_msg });
}
const processed_planner_messages = convertInputMessages(planner_messages, this.settings.planner_llm.name);
// Get planner output
const response = await this.settings.planner_llm.invoke(processed_planner_messages);
let plan = String(response.content);
// if deepseek-reasoner, remove think tags
if (this.settings.planner_llm.name && (this.settings.planner_llm.name.includes('deepseek-r1') || this.settings.planner_llm.name.includes('deepseek-reasoner'))) {
plan = this._remove_think_tags(plan);
}
try {
const plan_json = JSON.parse(plan);
logger.info(`Planning Analysis:\n${JSON.stringify(plan_json, null, 4)}`);
} catch (e) {
if (e instanceof SyntaxError) {
logger.info(`Planning Analysis:\n${plan}`);
} else {
logger.debug(`Error parsing planning analysis: ${(e as Error).message}`);
logger.info(`Plan: ${plan}`);
}
}
return plan;
}
get message_manager(): MessageManager {
return this._message_manager;
}
async close(): Promise<void> {
/*Close all resources*/
try {
// First close browser resources
if (this.browser_context && !this.injected_browser_context) {
await this.browser_context.close();
}
if (this.browser && !this.injected_browser) {
await this.browser.close();
}
} catch (e) {
logger.error(`Error during cleanup: ${(e as Error).message}`);
}
}
}