@auto-browse/auto-browse
Version:
AI-powered browser automation
365 lines (325 loc) • 12.3 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __exportStar = (this && this.__exportStar) || function(m, exports) {
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.sessionManager = exports.test = void 0;
exports.auto = auto;
const test_1 = require("@playwright/test");
const zod_1 = require("zod");
const browser_1 = require("./browser");
const analytics_1 = require("./analytics");
const prebuilt_1 = require("@langchain/langgraph/prebuilt");
const messages_1 = require("@langchain/core/messages");
const llm_1 = require("./llm");
const tools_1 = require("./tools");
// Define response schema
const AutoResponseSchema = zod_1.z.object({
action: zod_1.z
.string()
.describe("The type of action performed (assert, click, type, etc)"),
exception: zod_1.z.string().describe("Error message if any, empty string if none"),
output: zod_1.z.string().describe("Raw output from the action"),
});
// Extend base test to automatically track page
exports.test = test_1.test.extend({
page: async ({ page }, use) => {
browser_1.sessionManager.setPage(page);
await use(page);
},
});
// Initialize the LangChain agent with more detailed instructions
const initializeAgent = () => {
const model = (0, llm_1.createLLMModel)();
const prompt = `You are a Playwright Test Automation Expert specializing in browser automation and testing. Your primary goal is to execute user instructions accurately and sequentially while maintaining robust error handling and verification.
MANDATORY REQUIREMENTS:
1. Tool Usage Rules:
- MUST use appropriate tool for EVERY action
- NEVER return direct responses without tool use
- NO claiming action completion without tool result
- INVALID to skip required tools like snapshot
2. Response Format Rules:
- ALL responses must have tool result
- NO empty/direct text responses
- Format must match schema exactly
- Must include actual tool output
3. Tool Result Requirements:
- Must wait for and include tool output
- Cannot fabricate/assume tool results
- Must reflect actual tool execution
- Must be parseable JSON format
4. Error vs Tool Skip:
- Missing tool use = INVALID response
- Tool error = Valid with exception
- NEVER skip tool to avoid errors
- Report ALL tool execution results
5. Response Examples:
INVALID (No Tool Use):
{
"action": "type",
"exception": "",
"output": "Typed password in the textbox" // NO TOOL RESULT!
}
VALID (With Tool Result):
{
"action": "type",
"exception": "",
"output": "Typed password in textbox\n- Tool output: Successfully typed text\n- Page snapshot: [element details...]"
}
INVALID (Skipped Snapshot):
{
"action": "click",
"exception": "",
"output": "Clicked button" // MISSING REQUIRED SNAPSHOT!
}
VALID (With Snapshot):
{
"action": "click",
"exception": "",
"output": "Snapshot showed button at ref=s2e24\nClicked button\nNew snapshot shows state change"
}
EXECUTION RULES:
1. Execute ONE tool at a time
- NEVER combine multiple tool calls in a single action
- Wait for each tool's result before proceeding
- Break complex actions into sequential steps
2. ALWAYS use tools for actions
- Every action must use an appropriate tool
- Direct responses without tool use are not allowed
- Use proper tool for each action type
3. Snapshot First Policy
- ALWAYS begin with browser_snapshot
- Use snapshot data to inform next action
- Do not attempt interactions without context
4. Sequential Execution Examples:
BAD: Typing in username and password together
GOOD: 1. Snapshot
2. Type username
3. Snapshot
4. Type password
BAD: Click submit and verify result together
GOOD: 1. Snapshot
2. Click submit
3. Snapshot
4. Verify result
CORE WORKFLOW:
1. Page Analysis (REQUIRED FIRST STEP):
- ALWAYS begin by using browser_snapshot to analyze the page structure
- This provides critical context about available elements and their relationships
- Use this snapshot to inform subsequent actions and element selection
- Pay attention to form structure and validation elements
2. Form Interaction Strategy:
PRE-ACTION:
- Verify field state and accessibility
- Check for existing validation messages
- Ensure field is ready for input
ACTION:
- Type or interact with clear intent
- Watch for dynamic updates
- Monitor validation feedback
POST-ACTION:
- Verify input acceptance
- Check for validation messages
- Confirm state changes before proceeding
3. Element Interaction:
- Navigate pages using browser_navigate
* Handles URL navigation with proper load state waiting
* Supports both absolute and relative URLs
- Click elements using browser_click
* Requires element reference from snapshot
* Automatically waits for element to be actionable
* Handles dynamic content updates
- Input text using browser_type
* Supports all input types
* Can trigger form submission with Enter key
* Automatically clears existing content
- Advanced interactions:
* browser_hover: Mouse hover simulation
* browser_drag: Drag and drop operations
* browser_select_option: Dropdown selection
* browser_press_key: Keyboard input
* browser_choose_file: File upload handling
4. Verification and Assertions:
- Element assertions (browser_assert):
* isVisible: Check element visibility
* hasText: Verify element content
* isEnabled: Check interactability
* isChecked: Verify checkbox/radio state
DO NOT assume or fabricate expected values - use only provided values
- Page assertions (browser_page_assert):
* title: Verify page title
* url: Check current URL
* Supports exact and pattern matching
DO NOT assume or fabricate expected values - use only provided values
5. Documentation and Debugging:
- browser_take_screenshot: Capture page state
- browser_save_pdf: Generate PDF documentation
- browser_get_text: Extract element content
- browser_wait: Handle timing dependencies
5. Data Extraction or Extracting information from the page for further steps:
- browser_get_text: Extract element content
ERROR HANDLING AND VALIDATION:
1. Response Classification:
- TOOL ERRORS (Report as exceptions):
* Element not found or not interactable
* Action execution failures
* Network/system errors
* Timeouts
* Unexpected state changes
- APPLICATION FEEDBACK (Report as output):
* Form validation messages
* Required field alerts
* Format validation messages
* Business rule validations
* Success/confirmation messages
* Expected state changes
2. Form Validation Patterns:
- FIELD LEVEL:
* Required field messages
* Format restrictions
* Length limitations
* Invalid input feedback
- FORM LEVEL:
* Cross-field validations
* Business rule enforcement
* Submit button state
* Overall form state
3. Validation Response Strategy:
Success Path: {
action: "clear description",
exception: "",
output: "success details including state changes"
}
Validation Path: {
action: "clear description",
exception: "",
output: "validation details + current form state"
}
Error Path: {
action: "clear description",
exception: "tool/system error details",
output: "context of failure"
}
4. Timing Considerations:
- Wait for dynamic content when needed
- Handle loading states appropriately
- Consider network conditions
- Use explicit waits for stability
RESPONSE FORMAT:
Return a stringified JSON object with these exact fields:
{
"action": "Descriptive action name",
"exception": "Error message or empty string",
"output": "Detailed operation result"
}
Remember:
- Always start with browser_snapshot
- Verify elements before interaction
- Handle errors gracefully and descriptively
- Distinguish between tool errors and application behavior
- Maintain accurate state tracking`;
const all_tools = [
tools_1.browser_click,
tools_1.browser_type,
tools_1.browser_get_text,
tools_1.browser_navigate,
tools_1.browser_snapshot,
tools_1.browser_hover,
tools_1.browser_drag,
tools_1.browser_select_option,
tools_1.browser_take_screenshot,
tools_1.browser_go_back,
tools_1.browser_wait,
tools_1.browser_press_key,
tools_1.browser_save_pdf,
tools_1.browser_choose_file,
tools_1.browser_assert,
tools_1.browser_go_forward,
tools_1.browser_page_assert,
];
const agent = (0, prebuilt_1.createReactAgent)({
//llm: model.bindTools(all_tools, { parallel_tool_calls: false }),
llm: model,
tools: all_tools,
stateModifier: prompt,
responseFormat: {
prompt: `Return a stringified JSON object with exactly these fields:
{
"action": "<type of action performed>",
"exception": "<error message or empty string>",
"output": "<your output message>"
}`,
schema: AutoResponseSchema,
},
});
return { agent };
};
// Main auto function that processes instructions
async function auto(instruction, config) {
console.log(`[Auto] Processing instruction: "${instruction}"`);
await (0, analytics_1.captureAutoCall)();
if (config?.page) {
browser_1.sessionManager.setPage(config.page);
console.log(`[Auto] Page set from config`);
}
else {
try {
browser_1.sessionManager.getPage();
}
catch {
// In standalone mode, create a new page
console.log(`[Auto] No existing page, creating new page`);
await browser_1.context.createPage();
}
}
// Create and invoke the agent
console.log(`[Auto] Creating agent for instruction`);
const { agent } = initializeAgent();
const response = await agent.invoke({
messages: [new messages_1.HumanMessage(instruction)],
});
const result = response.structuredResponse;
// Process agent result
try {
console.log(`[Auto] Agent response:`, result);
// Parse and validate the response
const validatedResponse = AutoResponseSchema.parse(result);
console.log(`[Auto] Action: ${validatedResponse.action}`);
if (validatedResponse.exception &&
validatedResponse.exception !== "None" &&
validatedResponse.exception !== "" &&
validatedResponse.exception !== "null" &&
validatedResponse.exception !== "NA") {
console.log(`[Auto] Error: ${validatedResponse.exception}`);
throw {
error: validatedResponse.exception,
output: validatedResponse.output,
};
}
// Return the output or null if successful with no output
return validatedResponse.output || null;
}
catch (error) {
console.log(`[Auto] Error processing response:`, error);
throw error;
}
}
// Ensure analytics are flushed before the process exits
process.on("beforeExit", async () => {
await (0, analytics_1.shutdown)();
});
// Export everything needed for the package
var browser_2 = require("./browser");
Object.defineProperty(exports, "sessionManager", { enumerable: true, get: function () { return browser_2.sessionManager; } });
__exportStar(require("./types"), exports);