browser-x-mcp
Version:
AI-Powered Browser Automation with Advanced Form Testing - A Model Context Provider (MCP) server that enables intelligent browser automation with form testing, element extraction, and comprehensive logging
580 lines (495 loc) • 19.1 kB
JavaScript
/**
* Atomic Navigation System for Browser[X]MCP
* Two-step approach: 1) List available elements, 2) Provide atomic navigation
*
* @author Browser[X]MCP Team
* @version 1.0.0
*/
/**
* @typedef {Object} NavigationElement
* @property {string} id - Unique element identifier
* @property {string} type - Element type (button, input, link, etc.)
* @property {string} action - Available action (click, input, select)
* @property {string} description - Human-readable description
* @property {number[]} coordinates - [x, y, width, height]
* @property {boolean} primary - Whether this is a primary action
* @property {string} context - Context/purpose of the element
*/
/**
* @typedef {Object} AtomicAction
* @property {string} element_id - Target element ID
* @property {string} action_type - Type of action to perform
* @property {string} action_description - What this action will do
* @property {number[]} exact_coordinates - Precise click coordinates
* @property {Object} parameters - Action-specific parameters
*/
class AtomicNavigationSystem {
constructor() {
this.lastCanvasData = null;
this.navigationHistory = [];
}
/**
* Step 1: List all available navigation elements
* @param {Object} canvasData - Virtual canvas data
* @returns {Object} Structured list of actionable elements
*/
listNavigationElements(canvasData) {
this.lastCanvasData = canvasData;
const elements = canvasData.visible_elements
.filter(el => el.interactive)
.map(el => this.createNavigationElement(el, canvasData.context));
// Group by purpose/context
const grouped = this.groupElementsByPurpose(elements);
return {
page_context: {
type: canvasData.context.page_type,
main_flow: canvasData.context.flow,
has_forms: canvasData.context.forms_count > 0,
has_navigation: canvasData.context.has_navigation
},
available_actions: {
primary: elements.filter(el => el.primary),
forms: grouped.forms || [],
navigation: grouped.navigation || [],
content: grouped.content || [],
other: grouped.other || []
},
summary: {
total_interactive: elements.length,
primary_actions: elements.filter(el => el.primary).length,
form_fields: elements.filter(el => el.type.includes('input')).length,
clickable_buttons: elements.filter(el => el.type === 'button').length
}
};
}
/**
* Step 2: Generate atomic navigation action
* @param {string} intent - User's navigation intent
* @param {string} elementId - Optional specific element ID
* @returns {AtomicAction} Precise navigation action
*/
generateAtomicAction(intent, elementId = null) {
if (!this.lastCanvasData) {
throw new Error('No canvas data available. Call listNavigationElements first.');
}
// Find target element
let targetElement;
if (elementId) {
// Direct element selection
targetElement = this.lastCanvasData.visible_elements.find(el => el.id === elementId);
} else {
// Intent-based selection
targetElement = this.findElementByIntent(intent);
}
if (!targetElement) {
throw new Error(`No suitable element found for intent: "${intent}"`);
}
// Generate atomic action
const action = this.createAtomicAction(targetElement, intent);
// Add to history
this.navigationHistory.push({
timestamp: Date.now(),
intent,
element_id: targetElement.id,
action: action.action_type,
coordinates: action.exact_coordinates
});
return action;
}
/**
* Create navigation element from canvas data
*/
createNavigationElement(element, pageContext) {
const description = this.generateElementDescription(element, pageContext);
const context = this.determineElementContext(element, pageContext);
return {
id: element.id,
type: element.type,
action: element.action,
description,
coordinates: element.rect,
primary: element.primary,
context,
content_preview: element.content.substring(0, 50),
accessibility: {
has_label: element.content.length > 0,
clickable: element.interactive,
keyboard_accessible: element.type !== 'div'
}
};
}
/**
* Generate human-readable description for element
*/
generateElementDescription(element, pageContext) {
const [x, y, width, height] = element.rect;
const position = this.describePosition(x, y, width, height);
let description = `${element.type}`;
if (element.content) {
description += ` "${element.content}"`;
}
if (element.primary) {
description += ' (primary action)';
}
description += ` located ${position}`;
// Add context-specific descriptions
if (pageContext.page_type === 'login_page') {
if (element.action === 'login') {
description += ' - will submit login form';
} else if (element.type.includes('input')) {
if (element.content.toLowerCase().includes('password')) {
description += ' - password field';
} else {
description += ' - username/email field';
}
}
}
return description;
}
/**
* Describe element position in human terms
*/
describePosition(x, y, width, height) {
const viewportWidth = 1280; // Assume standard viewport
const viewportHeight = 720;
let horizontal = '';
if (x < viewportWidth * 0.3) horizontal = 'left';
else if (x > viewportWidth * 0.7) horizontal = 'right';
else horizontal = 'center';
let vertical = '';
if (y < viewportHeight * 0.3) vertical = 'top';
else if (y > viewportHeight * 0.7) vertical = 'bottom';
else vertical = 'middle';
return `${vertical}-${horizontal}`;
}
/**
* Determine element context/purpose
*/
determineElementContext(element, pageContext) {
if (element.primary) return 'primary_action';
if (element.type.includes('input')) return 'form_input';
if (element.type === 'link' && element.content.toLowerCase().includes('nav')) return 'navigation';
if (element.action === 'search') return 'search';
if (element.content.toLowerCase().includes('login')) return 'authentication';
if (element.content.toLowerCase().includes('buy') || element.content.toLowerCase().includes('cart')) return 'commerce';
return 'general_interaction';
}
/**
* Group elements by their purpose
*/
groupElementsByPurpose(elements) {
const groups = {
forms: [],
navigation: [],
content: [],
other: []
};
elements.forEach(el => {
switch (el.context) {
case 'form_input':
case 'primary_action':
case 'authentication':
groups.forms.push(el);
break;
case 'navigation':
groups.navigation.push(el);
break;
case 'search':
case 'commerce':
groups.content.push(el);
break;
default:
groups.other.push(el);
}
});
return groups;
}
/**
* Find element by user intent
*/
findElementByIntent(intent) {
const elements = this.lastCanvasData.visible_elements.filter(el => el.interactive);
const lowerIntent = intent.toLowerCase();
// Priority matching
const matches = [];
// 1. Exact content match
const exactMatch = elements.find(el =>
el.content.toLowerCase().includes(lowerIntent)
);
if (exactMatch) matches.push({ element: exactMatch, confidence: 0.9 });
// 2. Action type match
const actionMatch = elements.find(el =>
el.action.toLowerCase().includes(lowerIntent)
);
if (actionMatch) matches.push({ element: actionMatch, confidence: 0.8 });
// 3. Primary action for common intents
if (['submit', 'login', 'enter', 'go'].includes(lowerIntent)) {
const primaryAction = elements.find(el => el.primary);
if (primaryAction) matches.push({ element: primaryAction, confidence: 0.85 });
}
// 4. Type-based matching
if (lowerIntent.includes('input') || lowerIntent.includes('field')) {
const inputField = elements.find(el => el.type.includes('input'));
if (inputField) matches.push({ element: inputField, confidence: 0.7 });
}
// Return highest confidence match
if (matches.length > 0) {
matches.sort((a, b) => b.confidence - a.confidence);
return matches[0].element;
}
return null;
}
/**
* Create atomic action for target element
*/
createAtomicAction(element, intent) {
const [x, y, width, height] = element.rect;
// Calculate optimal click coordinates (center of element)
const clickX = x + Math.round(width / 2);
const clickY = y + Math.round(height / 2);
const action = {
element_id: element.id,
action_type: this.determineActionType(element, intent),
action_description: this.generateActionDescription(element, intent),
exact_coordinates: [clickX, clickY],
parameters: this.generateActionParameters(element, intent),
verification: {
expected_element_type: element.type,
expected_content: element.content,
element_bounds: element.rect
},
fallback: {
alternative_coordinates: [
[x + 5, y + 5], // Top-left fallback
[x + width - 5, y + height - 5] // Bottom-right fallback
]
}
};
return action;
}
/**
* Determine specific action type
*/
determineActionType(element, intent) {
if (element.type.includes('input')) {
return 'input_text';
} else if (element.type === 'select') {
return 'select_option';
} else if (element.type === 'button' || element.type === 'link') {
return 'click';
} else {
return 'interact';
}
}
/**
* Generate action description
*/
generateActionDescription(element, intent) {
const actionType = this.determineActionType(element, intent);
switch (actionType) {
case 'click':
return `Click on ${element.type} "${element.content}" to ${intent}`;
case 'input_text':
return `Input text into ${element.content || 'field'} for ${intent}`;
case 'select_option':
return `Select option from ${element.content || 'dropdown'} for ${intent}`;
default:
return `Interact with ${element.type} to ${intent}`;
}
}
/**
* Generate action-specific parameters
*/
generateActionParameters(element, intent) {
const params = {
wait_after_action: 500, // ms
verify_action: true,
retry_on_failure: true,
max_retries: 3
};
if (element.type.includes('input')) {
params.clear_first = true;
params.input_method = 'type'; // vs 'fill'
params.trigger_events = ['input', 'change'];
}
if (element.primary) {
params.wait_after_action = 2000; // Longer wait for primary actions
params.expect_navigation = true;
}
return params;
}
/**
* Get navigation history
*/
getNavigationHistory() {
return this.navigationHistory;
}
/**
* Reset system state
*/
reset() {
this.lastCanvasData = null;
this.navigationHistory = [];
}
}
// Improved element matching algorithm
function improveElementMatching(intent, elements) {
const intentLower = intent.toLowerCase();
const intentWords = intent.split(' ').map(w => w.toLowerCase());
// 1. Exact text matches (highest priority)
const exactMatches = elements.filter(el =>
el.content && el.content.toLowerCase().includes(intentLower)
);
if (exactMatches.length > 0) {
return exactMatches;
}
// 2. Word-based matches
const wordMatches = elements.filter(el => {
if (!el.content) return false;
const contentLower = el.content.toLowerCase();
return intentWords.some(word => contentLower.includes(word));
});
if (wordMatches.length > 0) {
return wordMatches;
}
// 3. Aria-label and title matches
const attributeMatches = elements.filter(el => {
const ariaLabel = el.ariaLabel?.toLowerCase() || '';
const title = el.title?.toLowerCase() || '';
return ariaLabel.includes(intentLower) || title.includes(intentLower) ||
intentWords.some(word => ariaLabel.includes(word) || title.includes(word));
});
return attributeMatches.length > 0 ? attributeMatches : elements;
}
// Analyze page context for better element selection
function analyzePageContext(virtualCanvasData) {
const context = {
pageType: 'general',
mainSections: [],
navigationElements: [],
primaryActions: [],
isMenuOpen: false
};
// Detect if navigation menu is open
context.isMenuOpen = virtualCanvasData.visible_elements.some(el =>
el.content && (el.content.includes('Trending') || el.content.includes('Explore'))
);
// Identify navigation elements
context.navigationElements = virtualCanvasData.visible_elements.filter(el =>
el.type === 'navigation' ||
(el.interactive && ['Home', 'Trending', 'Subscriptions', 'Shorts'].some(nav =>
el.content && el.content.includes(nav)))
);
// Identify primary actions
context.primaryActions = virtualCanvasData.visible_elements.filter(el => el.primary);
return context;
}
// Find element candidates with improved scoring
function findElementCandidates(intent, virtualCanvasData, pageContext) {
const allElements = virtualCanvasData.visible_elements.filter(el => el.interactive);
const matchedElements = improveElementMatching(intent, allElements);
// Score candidates based on multiple factors
return matchedElements.map(element => {
let score = 0;
const contentLower = (element.content || '').toLowerCase();
const intentLower = intent.toLowerCase();
// Exact match bonus
if (contentLower === intentLower) score += 100;
// Contains intent bonus
if (contentLower.includes(intentLower)) score += 50;
// Primary action bonus
if (element.primary) score += 30;
// Navigation element bonus for navigation intents
if (['trending', 'home', 'subscriptions', 'shorts'].includes(intentLower) &&
pageContext.navigationElements.includes(element)) {
score += 40;
}
// Interactive element bonus
if (element.interactive) score += 20;
// Reduce score for elements with very long content (likely not what we want)
if (element.content && element.content.length > 100) score -= 10;
return {
...element,
score,
centerCoordinates: [
Math.round(element.rect[0] + element.rect[2] / 2),
Math.round(element.rect[1] + element.rect[3] / 2)
]
};
});
}
// Rank element candidates by relevance
function rankElementCandidates(candidates, intent, pageContext) {
// Sort by score (highest first)
const ranked = candidates.sort((a, b) => b.score - a.score);
// Return best match with confidence score
const bestMatch = ranked[0];
if (!bestMatch) {
throw new Error(`No suitable element found for intent: ${intent}`);
}
bestMatch.confidence = Math.min(bestMatch.score / 100, 1.0);
bestMatch.recommendedAction = getRecommendedAction(bestMatch);
return bestMatch;
}
function getRecommendedAction(element) {
if (element.type === 'input_text') return 'input';
if (element.type === 'button') return 'click';
if (element.type === 'link') return 'navigate';
if (element.action) return element.action;
return 'click';
}
// Enhanced atomic action generation
function generateSmartAction(intent, elementId, virtualCanvasData) {
const pageContext = analyzePageContext(virtualCanvasData);
let targetElement;
if (elementId) {
// Find specific element by ID
targetElement = virtualCanvasData.visible_elements.find(el => el.id === elementId);
if (!targetElement) {
throw new Error(`Element with ID '${elementId}' not found`);
}
} else {
// Find best matching element by intent
const candidates = findElementCandidates(intent, virtualCanvasData, pageContext);
targetElement = rankElementCandidates(candidates, intent, pageContext);
}
const centerCoords = targetElement.centerCoordinates || [
Math.round(targetElement.rect[0] + targetElement.rect[2] / 2),
Math.round(targetElement.rect[1] + targetElement.rect[3] / 2)
];
// Generate fallback coordinates
const fallbackCoords = [
[centerCoords[0] - 10, centerCoords[1] - 10],
[centerCoords[0] + 10, centerCoords[1] + 10],
[targetElement.rect[0] + 5, targetElement.rect[1] + 5]
];
return {
element_id: targetElement.id,
action_type: targetElement.recommendedAction || 'click',
action_description: `${targetElement.recommendedAction || 'Click'} on ${targetElement.type} "${targetElement.content || 'element'}" to ${intent}`,
exact_coordinates: centerCoords,
parameters: {
wait_after_action: 500,
verify_action: true,
retry_on_failure: true,
max_retries: 3
},
verification: {
expected_element_type: targetElement.type,
expected_content: targetElement.content,
element_bounds: targetElement.rect
},
fallback: {
alternative_coordinates: fallbackCoords
},
confidence: targetElement.confidence || 0.8
};
}
export {
improveElementMatching,
analyzePageContext,
findElementCandidates,
rankElementCandidates,
generateSmartAction,
getRecommendedAction
};