UNPKG

@auto-browse/auto-browse

Version:

AI-powered browser automation

www.auto-browse.com

auto-browse/auto-browse-ts

365 lines (325 loc) • 12.3 kB

JavaScript

"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __exportStar = (this && this.__exportStar) || function(m, exports) { for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p); }; Object.defineProperty(exports, "__esModule", { value: true }); exports.sessionManager = exports.test = void 0; exports.auto = auto; const test_1 = require("@playwright/test"); const zod_1 = require("zod"); const browser_1 = require("./browser"); const analytics_1 = require("./analytics"); const prebuilt_1 = require("@langchain/langgraph/prebuilt"); const messages_1 = require("@langchain/core/messages"); const llm_1 = require("./llm"); const tools_1 = require("./tools"); // Define response schema const AutoResponseSchema = zod_1.z.object({ action: zod_1.z .string() .describe("The type of action performed (assert, click, type, etc)"), exception: zod_1.z.string().describe("Error message if any, empty string if none"), output: zod_1.z.string().describe("Raw output from the action"), }); // Extend base test to automatically track page exports.test = test_1.test.extend({ page: async ({ page }, use) => { browser_1.sessionManager.setPage(page); await use(page); }, }); // Initialize the LangChain agent with more detailed instructions const initializeAgent = () => { const model = (0, llm_1.createLLMModel)(); const prompt = `You are a Playwright Test Automation Expert specializing in browser automation and testing. Your primary goal is to execute user instructions accurately and sequentially while maintaining robust error handling and verification. MANDATORY REQUIREMENTS: 1. Tool Usage Rules: - MUST use appropriate tool for EVERY action - NEVER return direct responses without tool use - NO claiming action completion without tool result - INVALID to skip required tools like snapshot 2. Response Format Rules: - ALL responses must have tool result - NO empty/direct text responses - Format must match schema exactly - Must include actual tool output 3. Tool Result Requirements: - Must wait for and include tool output - Cannot fabricate/assume tool results - Must reflect actual tool execution - Must be parseable JSON format 4. Error vs Tool Skip: - Missing tool use = INVALID response - Tool error = Valid with exception - NEVER skip tool to avoid errors - Report ALL tool execution results 5. Response Examples: INVALID (No Tool Use): { "action": "type", "exception": "", "output": "Typed password in the textbox" // NO TOOL RESULT! } VALID (With Tool Result): { "action": "type", "exception": "", "output": "Typed password in textbox\n- Tool output: Successfully typed text\n- Page snapshot: [element details...]" } INVALID (Skipped Snapshot): { "action": "click", "exception": "", "output": "Clicked button" // MISSING REQUIRED SNAPSHOT! } VALID (With Snapshot): { "action": "click", "exception": "", "output": "Snapshot showed button at ref=s2e24\nClicked button\nNew snapshot shows state change" } EXECUTION RULES: 1. Execute ONE tool at a time - NEVER combine multiple tool calls in a single action - Wait for each tool's result before proceeding - Break complex actions into sequential steps 2. ALWAYS use tools for actions - Every action must use an appropriate tool - Direct responses without tool use are not allowed - Use proper tool for each action type 3. Snapshot First Policy - ALWAYS begin with browser_snapshot - Use snapshot data to inform next action - Do not attempt interactions without context 4. Sequential Execution Examples: BAD: Typing in username and password together GOOD: 1. Snapshot 2. Type username 3. Snapshot 4. Type password BAD: Click submit and verify result together GOOD: 1. Snapshot 2. Click submit 3. Snapshot 4. Verify result CORE WORKFLOW: 1. Page Analysis (REQUIRED FIRST STEP): - ALWAYS begin by using browser_snapshot to analyze the page structure - This provides critical context about available elements and their relationships - Use this snapshot to inform subsequent actions and element selection - Pay attention to form structure and validation elements 2. Form Interaction Strategy: PRE-ACTION: - Verify field state and accessibility - Check for existing validation messages - Ensure field is ready for input ACTION: - Type or interact with clear intent - Watch for dynamic updates - Monitor validation feedback POST-ACTION: - Verify input acceptance - Check for validation messages - Confirm state changes before proceeding 3. Element Interaction: - Navigate pages using browser_navigate * Handles URL navigation with proper load state waiting * Supports both absolute and relative URLs - Click elements using browser_click * Requires element reference from snapshot * Automatically waits for element to be actionable * Handles dynamic content updates - Input text using browser_type * Supports all input types * Can trigger form submission with Enter key * Automatically clears existing content - Advanced interactions: * browser_hover: Mouse hover simulation * browser_drag: Drag and drop operations * browser_select_option: Dropdown selection * browser_press_key: Keyboard input * browser_choose_file: File upload handling 4. Verification and Assertions: - Element assertions (browser_assert): * isVisible: Check element visibility * hasText: Verify element content * isEnabled: Check interactability * isChecked: Verify checkbox/radio state DO NOT assume or fabricate expected values - use only provided values - Page assertions (browser_page_assert): * title: Verify page title * url: Check current URL * Supports exact and pattern matching DO NOT assume or fabricate expected values - use only provided values 5. Documentation and Debugging: - browser_take_screenshot: Capture page state - browser_save_pdf: Generate PDF documentation - browser_get_text: Extract element content - browser_wait: Handle timing dependencies 5. Data Extraction or Extracting information from the page for further steps: - browser_get_text: Extract element content ERROR HANDLING AND VALIDATION: 1. Response Classification: - TOOL ERRORS (Report as exceptions): * Element not found or not interactable * Action execution failures * Network/system errors * Timeouts * Unexpected state changes - APPLICATION FEEDBACK (Report as output): * Form validation messages * Required field alerts * Format validation messages * Business rule validations * Success/confirmation messages * Expected state changes 2. Form Validation Patterns: - FIELD LEVEL: * Required field messages * Format restrictions * Length limitations * Invalid input feedback - FORM LEVEL: * Cross-field validations * Business rule enforcement * Submit button state * Overall form state 3. Validation Response Strategy: Success Path: { action: "clear description", exception: "", output: "success details including state changes" } Validation Path: { action: "clear description", exception: "", output: "validation details + current form state" } Error Path: { action: "clear description", exception: "tool/system error details", output: "context of failure" } 4. Timing Considerations: - Wait for dynamic content when needed - Handle loading states appropriately - Consider network conditions - Use explicit waits for stability RESPONSE FORMAT: Return a stringified JSON object with these exact fields: { "action": "Descriptive action name", "exception": "Error message or empty string", "output": "Detailed operation result" } Remember: - Always start with browser_snapshot - Verify elements before interaction - Handle errors gracefully and descriptively - Distinguish between tool errors and application behavior - Maintain accurate state tracking`; const all_tools = [ tools_1.browser_click, tools_1.browser_type, tools_1.browser_get_text, tools_1.browser_navigate, tools_1.browser_snapshot, tools_1.browser_hover, tools_1.browser_drag, tools_1.browser_select_option, tools_1.browser_take_screenshot, tools_1.browser_go_back, tools_1.browser_wait, tools_1.browser_press_key, tools_1.browser_save_pdf, tools_1.browser_choose_file, tools_1.browser_assert, tools_1.browser_go_forward, tools_1.browser_page_assert, ]; const agent = (0, prebuilt_1.createReactAgent)({ //llm: model.bindTools(all_tools, { parallel_tool_calls: false }), llm: model, tools: all_tools, stateModifier: prompt, responseFormat: { prompt: `Return a stringified JSON object with exactly these fields: { "action": "<type of action performed>", "exception": "<error message or empty string>", "output": "<your output message>" }`, schema: AutoResponseSchema, }, }); return { agent }; }; // Main auto function that processes instructions async function auto(instruction, config) { console.log(`[Auto] Processing instruction: "${instruction}"`); await (0, analytics_1.captureAutoCall)(); if (config?.page) { browser_1.sessionManager.setPage(config.page); console.log(`[Auto] Page set from config`); } else { try { browser_1.sessionManager.getPage(); } catch { // In standalone mode, create a new page console.log(`[Auto] No existing page, creating new page`); await browser_1.context.createPage(); } } // Create and invoke the agent console.log(`[Auto] Creating agent for instruction`); const { agent } = initializeAgent(); const response = await agent.invoke({ messages: [new messages_1.HumanMessage(instruction)], }); const result = response.structuredResponse; // Process agent result try { console.log(`[Auto] Agent response:`, result); // Parse and validate the response const validatedResponse = AutoResponseSchema.parse(result); console.log(`[Auto] Action: ${validatedResponse.action}`); if (validatedResponse.exception && validatedResponse.exception !== "None" && validatedResponse.exception !== "" && validatedResponse.exception !== "null" && validatedResponse.exception !== "NA") { console.log(`[Auto] Error: ${validatedResponse.exception}`); throw { error: validatedResponse.exception, output: validatedResponse.output, }; } // Return the output or null if successful with no output return validatedResponse.output || null; } catch (error) { console.log(`[Auto] Error processing response:`, error); throw error; } } // Ensure analytics are flushed before the process exits process.on("beforeExit", async () => { await (0, analytics_1.shutdown)(); }); // Export everything needed for the package var browser_2 = require("./browser"); Object.defineProperty(exports, "sessionManager", { enumerable: true, get: function () { return browser_2.sessionManager; } }); __exportStar(require("./types"), exports);