UNPKG

browser-x-mcp

Version:

AI-Powered Browser Automation with Advanced Form Testing - A Model Context Provider (MCP) server that enables intelligent browser automation with form testing, element extraction, and comprehensive logging

580 lines (495 loc) 19.1 kB
/** * Atomic Navigation System for Browser[X]MCP * Two-step approach: 1) List available elements, 2) Provide atomic navigation * * @author Browser[X]MCP Team * @version 1.0.0 */ /** * @typedef {Object} NavigationElement * @property {string} id - Unique element identifier * @property {string} type - Element type (button, input, link, etc.) * @property {string} action - Available action (click, input, select) * @property {string} description - Human-readable description * @property {number[]} coordinates - [x, y, width, height] * @property {boolean} primary - Whether this is a primary action * @property {string} context - Context/purpose of the element */ /** * @typedef {Object} AtomicAction * @property {string} element_id - Target element ID * @property {string} action_type - Type of action to perform * @property {string} action_description - What this action will do * @property {number[]} exact_coordinates - Precise click coordinates * @property {Object} parameters - Action-specific parameters */ class AtomicNavigationSystem { constructor() { this.lastCanvasData = null; this.navigationHistory = []; } /** * Step 1: List all available navigation elements * @param {Object} canvasData - Virtual canvas data * @returns {Object} Structured list of actionable elements */ listNavigationElements(canvasData) { this.lastCanvasData = canvasData; const elements = canvasData.visible_elements .filter(el => el.interactive) .map(el => this.createNavigationElement(el, canvasData.context)); // Group by purpose/context const grouped = this.groupElementsByPurpose(elements); return { page_context: { type: canvasData.context.page_type, main_flow: canvasData.context.flow, has_forms: canvasData.context.forms_count > 0, has_navigation: canvasData.context.has_navigation }, available_actions: { primary: elements.filter(el => el.primary), forms: grouped.forms || [], navigation: grouped.navigation || [], content: grouped.content || [], other: grouped.other || [] }, summary: { total_interactive: elements.length, primary_actions: elements.filter(el => el.primary).length, form_fields: elements.filter(el => el.type.includes('input')).length, clickable_buttons: elements.filter(el => el.type === 'button').length } }; } /** * Step 2: Generate atomic navigation action * @param {string} intent - User's navigation intent * @param {string} elementId - Optional specific element ID * @returns {AtomicAction} Precise navigation action */ generateAtomicAction(intent, elementId = null) { if (!this.lastCanvasData) { throw new Error('No canvas data available. Call listNavigationElements first.'); } // Find target element let targetElement; if (elementId) { // Direct element selection targetElement = this.lastCanvasData.visible_elements.find(el => el.id === elementId); } else { // Intent-based selection targetElement = this.findElementByIntent(intent); } if (!targetElement) { throw new Error(`No suitable element found for intent: "${intent}"`); } // Generate atomic action const action = this.createAtomicAction(targetElement, intent); // Add to history this.navigationHistory.push({ timestamp: Date.now(), intent, element_id: targetElement.id, action: action.action_type, coordinates: action.exact_coordinates }); return action; } /** * Create navigation element from canvas data */ createNavigationElement(element, pageContext) { const description = this.generateElementDescription(element, pageContext); const context = this.determineElementContext(element, pageContext); return { id: element.id, type: element.type, action: element.action, description, coordinates: element.rect, primary: element.primary, context, content_preview: element.content.substring(0, 50), accessibility: { has_label: element.content.length > 0, clickable: element.interactive, keyboard_accessible: element.type !== 'div' } }; } /** * Generate human-readable description for element */ generateElementDescription(element, pageContext) { const [x, y, width, height] = element.rect; const position = this.describePosition(x, y, width, height); let description = `${element.type}`; if (element.content) { description += ` "${element.content}"`; } if (element.primary) { description += ' (primary action)'; } description += ` located ${position}`; // Add context-specific descriptions if (pageContext.page_type === 'login_page') { if (element.action === 'login') { description += ' - will submit login form'; } else if (element.type.includes('input')) { if (element.content.toLowerCase().includes('password')) { description += ' - password field'; } else { description += ' - username/email field'; } } } return description; } /** * Describe element position in human terms */ describePosition(x, y, width, height) { const viewportWidth = 1280; // Assume standard viewport const viewportHeight = 720; let horizontal = ''; if (x < viewportWidth * 0.3) horizontal = 'left'; else if (x > viewportWidth * 0.7) horizontal = 'right'; else horizontal = 'center'; let vertical = ''; if (y < viewportHeight * 0.3) vertical = 'top'; else if (y > viewportHeight * 0.7) vertical = 'bottom'; else vertical = 'middle'; return `${vertical}-${horizontal}`; } /** * Determine element context/purpose */ determineElementContext(element, pageContext) { if (element.primary) return 'primary_action'; if (element.type.includes('input')) return 'form_input'; if (element.type === 'link' && element.content.toLowerCase().includes('nav')) return 'navigation'; if (element.action === 'search') return 'search'; if (element.content.toLowerCase().includes('login')) return 'authentication'; if (element.content.toLowerCase().includes('buy') || element.content.toLowerCase().includes('cart')) return 'commerce'; return 'general_interaction'; } /** * Group elements by their purpose */ groupElementsByPurpose(elements) { const groups = { forms: [], navigation: [], content: [], other: [] }; elements.forEach(el => { switch (el.context) { case 'form_input': case 'primary_action': case 'authentication': groups.forms.push(el); break; case 'navigation': groups.navigation.push(el); break; case 'search': case 'commerce': groups.content.push(el); break; default: groups.other.push(el); } }); return groups; } /** * Find element by user intent */ findElementByIntent(intent) { const elements = this.lastCanvasData.visible_elements.filter(el => el.interactive); const lowerIntent = intent.toLowerCase(); // Priority matching const matches = []; // 1. Exact content match const exactMatch = elements.find(el => el.content.toLowerCase().includes(lowerIntent) ); if (exactMatch) matches.push({ element: exactMatch, confidence: 0.9 }); // 2. Action type match const actionMatch = elements.find(el => el.action.toLowerCase().includes(lowerIntent) ); if (actionMatch) matches.push({ element: actionMatch, confidence: 0.8 }); // 3. Primary action for common intents if (['submit', 'login', 'enter', 'go'].includes(lowerIntent)) { const primaryAction = elements.find(el => el.primary); if (primaryAction) matches.push({ element: primaryAction, confidence: 0.85 }); } // 4. Type-based matching if (lowerIntent.includes('input') || lowerIntent.includes('field')) { const inputField = elements.find(el => el.type.includes('input')); if (inputField) matches.push({ element: inputField, confidence: 0.7 }); } // Return highest confidence match if (matches.length > 0) { matches.sort((a, b) => b.confidence - a.confidence); return matches[0].element; } return null; } /** * Create atomic action for target element */ createAtomicAction(element, intent) { const [x, y, width, height] = element.rect; // Calculate optimal click coordinates (center of element) const clickX = x + Math.round(width / 2); const clickY = y + Math.round(height / 2); const action = { element_id: element.id, action_type: this.determineActionType(element, intent), action_description: this.generateActionDescription(element, intent), exact_coordinates: [clickX, clickY], parameters: this.generateActionParameters(element, intent), verification: { expected_element_type: element.type, expected_content: element.content, element_bounds: element.rect }, fallback: { alternative_coordinates: [ [x + 5, y + 5], // Top-left fallback [x + width - 5, y + height - 5] // Bottom-right fallback ] } }; return action; } /** * Determine specific action type */ determineActionType(element, intent) { if (element.type.includes('input')) { return 'input_text'; } else if (element.type === 'select') { return 'select_option'; } else if (element.type === 'button' || element.type === 'link') { return 'click'; } else { return 'interact'; } } /** * Generate action description */ generateActionDescription(element, intent) { const actionType = this.determineActionType(element, intent); switch (actionType) { case 'click': return `Click on ${element.type} "${element.content}" to ${intent}`; case 'input_text': return `Input text into ${element.content || 'field'} for ${intent}`; case 'select_option': return `Select option from ${element.content || 'dropdown'} for ${intent}`; default: return `Interact with ${element.type} to ${intent}`; } } /** * Generate action-specific parameters */ generateActionParameters(element, intent) { const params = { wait_after_action: 500, // ms verify_action: true, retry_on_failure: true, max_retries: 3 }; if (element.type.includes('input')) { params.clear_first = true; params.input_method = 'type'; // vs 'fill' params.trigger_events = ['input', 'change']; } if (element.primary) { params.wait_after_action = 2000; // Longer wait for primary actions params.expect_navigation = true; } return params; } /** * Get navigation history */ getNavigationHistory() { return this.navigationHistory; } /** * Reset system state */ reset() { this.lastCanvasData = null; this.navigationHistory = []; } } // Improved element matching algorithm function improveElementMatching(intent, elements) { const intentLower = intent.toLowerCase(); const intentWords = intent.split(' ').map(w => w.toLowerCase()); // 1. Exact text matches (highest priority) const exactMatches = elements.filter(el => el.content && el.content.toLowerCase().includes(intentLower) ); if (exactMatches.length > 0) { return exactMatches; } // 2. Word-based matches const wordMatches = elements.filter(el => { if (!el.content) return false; const contentLower = el.content.toLowerCase(); return intentWords.some(word => contentLower.includes(word)); }); if (wordMatches.length > 0) { return wordMatches; } // 3. Aria-label and title matches const attributeMatches = elements.filter(el => { const ariaLabel = el.ariaLabel?.toLowerCase() || ''; const title = el.title?.toLowerCase() || ''; return ariaLabel.includes(intentLower) || title.includes(intentLower) || intentWords.some(word => ariaLabel.includes(word) || title.includes(word)); }); return attributeMatches.length > 0 ? attributeMatches : elements; } // Analyze page context for better element selection function analyzePageContext(virtualCanvasData) { const context = { pageType: 'general', mainSections: [], navigationElements: [], primaryActions: [], isMenuOpen: false }; // Detect if navigation menu is open context.isMenuOpen = virtualCanvasData.visible_elements.some(el => el.content && (el.content.includes('Trending') || el.content.includes('Explore')) ); // Identify navigation elements context.navigationElements = virtualCanvasData.visible_elements.filter(el => el.type === 'navigation' || (el.interactive && ['Home', 'Trending', 'Subscriptions', 'Shorts'].some(nav => el.content && el.content.includes(nav))) ); // Identify primary actions context.primaryActions = virtualCanvasData.visible_elements.filter(el => el.primary); return context; } // Find element candidates with improved scoring function findElementCandidates(intent, virtualCanvasData, pageContext) { const allElements = virtualCanvasData.visible_elements.filter(el => el.interactive); const matchedElements = improveElementMatching(intent, allElements); // Score candidates based on multiple factors return matchedElements.map(element => { let score = 0; const contentLower = (element.content || '').toLowerCase(); const intentLower = intent.toLowerCase(); // Exact match bonus if (contentLower === intentLower) score += 100; // Contains intent bonus if (contentLower.includes(intentLower)) score += 50; // Primary action bonus if (element.primary) score += 30; // Navigation element bonus for navigation intents if (['trending', 'home', 'subscriptions', 'shorts'].includes(intentLower) && pageContext.navigationElements.includes(element)) { score += 40; } // Interactive element bonus if (element.interactive) score += 20; // Reduce score for elements with very long content (likely not what we want) if (element.content && element.content.length > 100) score -= 10; return { ...element, score, centerCoordinates: [ Math.round(element.rect[0] + element.rect[2] / 2), Math.round(element.rect[1] + element.rect[3] / 2) ] }; }); } // Rank element candidates by relevance function rankElementCandidates(candidates, intent, pageContext) { // Sort by score (highest first) const ranked = candidates.sort((a, b) => b.score - a.score); // Return best match with confidence score const bestMatch = ranked[0]; if (!bestMatch) { throw new Error(`No suitable element found for intent: ${intent}`); } bestMatch.confidence = Math.min(bestMatch.score / 100, 1.0); bestMatch.recommendedAction = getRecommendedAction(bestMatch); return bestMatch; } function getRecommendedAction(element) { if (element.type === 'input_text') return 'input'; if (element.type === 'button') return 'click'; if (element.type === 'link') return 'navigate'; if (element.action) return element.action; return 'click'; } // Enhanced atomic action generation function generateSmartAction(intent, elementId, virtualCanvasData) { const pageContext = analyzePageContext(virtualCanvasData); let targetElement; if (elementId) { // Find specific element by ID targetElement = virtualCanvasData.visible_elements.find(el => el.id === elementId); if (!targetElement) { throw new Error(`Element with ID '${elementId}' not found`); } } else { // Find best matching element by intent const candidates = findElementCandidates(intent, virtualCanvasData, pageContext); targetElement = rankElementCandidates(candidates, intent, pageContext); } const centerCoords = targetElement.centerCoordinates || [ Math.round(targetElement.rect[0] + targetElement.rect[2] / 2), Math.round(targetElement.rect[1] + targetElement.rect[3] / 2) ]; // Generate fallback coordinates const fallbackCoords = [ [centerCoords[0] - 10, centerCoords[1] - 10], [centerCoords[0] + 10, centerCoords[1] + 10], [targetElement.rect[0] + 5, targetElement.rect[1] + 5] ]; return { element_id: targetElement.id, action_type: targetElement.recommendedAction || 'click', action_description: `${targetElement.recommendedAction || 'Click'} on ${targetElement.type} "${targetElement.content || 'element'}" to ${intent}`, exact_coordinates: centerCoords, parameters: { wait_after_action: 500, verify_action: true, retry_on_failure: true, max_retries: 3 }, verification: { expected_element_type: targetElement.type, expected_content: targetElement.content, element_bounds: targetElement.rect }, fallback: { alternative_coordinates: fallbackCoords }, confidence: targetElement.confidence || 0.8 }; } export { improveElementMatching, analyzePageContext, findElementCandidates, rankElementCandidates, generateSmartAction, getRecommendedAction };