mcp-appium-visual
Version:
MCP Server for Appium mobile automation with visual recovery
449 lines • 15.2 kB
TypeScript
import { Browser, Element, ElementArray } from "webdriverio";
import { AppiumCapabilities } from "./appiumTypes.js";
/**
* Helper class for Appium operations
*/
export declare class AppiumHelper {
private driver;
private screenshotDir;
private readonly maxRetries;
private readonly retryDelay;
private lastCapabilities;
private lastAppiumUrl;
/**
* Create a new AppiumHelper instance
*
* @param screenshotDir Directory to save screenshots to
*/
constructor(screenshotDir?: string);
/**
* Initialize the Appium driver with provided capabilities
*
* @param capabilities Appium capabilities
* @param appiumUrl Appium server URL
* @returns Reference to the initialized driver
*/
initializeDriver(capabilities: AppiumCapabilities, appiumUrl?: string): Promise<Browser>;
/**
* Check if the session is still valid and attempt to recover if not
*
* @returns true if session is valid or was successfully recovered
*/
validateSession(): Promise<boolean>;
/**
* Safely execute an Appium command with session validation
*
* @param operation Function that performs the Appium operation
* @param errorMessage Error message to throw if operation fails
* @returns Result of the operation
*/
safeExecute<T>(operation: () => Promise<T>, errorMessage: string): Promise<T>;
/**
* Get the current driver instance
*
* @returns The driver instance or throws if not initialized
*/
getDriver(): Browser;
/**
* Close the Appium session
*/
closeDriver(): Promise<void>;
/**
* Take a screenshot and save it to the specified directory
*
* @param name Screenshot name
* @returns Path to the saved screenshot
*/
takeScreenshot(name: string): Promise<string>;
/**
* Check if an element exists
*
* @param selector Element selector
* @param strategy Selection strategy
* @returns true if the element exists
*/
elementExists(selector: string, strategy?: string): Promise<boolean>;
/**
* Find an element by its selector with retry mechanism
*
* @param selector Element selector
* @param strategy Selection strategy
* @param timeoutMs Timeout in milliseconds
* @returns WebdriverIO element if found
*/
findElement(selector: string, strategy?: string, timeoutMs?: number): Promise<Element>;
/**
* Find multiple elements by selector
*
* @param selector Element selector
* @param strategy Selection strategy
* @returns Array of WebdriverIO elements
*/
findElements(selector: string, strategy?: string): Promise<ElementArray>;
/**
* Tap on an element with retry mechanism
* Uses W3C Actions API with fallback to TouchAction API for compatibility
*
* @param selector Element selector
* @param strategy Selection strategy
* @returns true if successful
* @throws AppiumError if the operation fails after retries
*/
tapElement(selector: string, strategy?: string): Promise<boolean>;
/**
* Click on an element - alias for tapElement for better Selenium compatibility
*
* @param selector Element selector
* @param strategy Selection strategy
* @returns true if successful
* @throws AppiumError if the operation fails after retries
*/
click(selector: string, strategy?: string): Promise<boolean>;
/**
* Send keys to an element with retry mechanism
*
* @param selector Element selector
* @param text Text to send
* @param strategy Selection strategy
* @returns true if successful
* @throws AppiumError if the operation fails after retries
*/
sendKeys(selector: string, text: string, strategy?: string): Promise<boolean>;
/**
* Get the page source (XML representation of the current UI)
*
* @param refreshFirst Whether to try refreshing the UI before getting page source
* @param suppressErrors Whether to suppress specific iOS errors and return empty source
* @returns XML string of the current UI
*/
getPageSource(refreshFirst?: boolean, suppressErrors?: boolean): Promise<string>;
/**
* Perform a swipe gesture
*
* @param startX Starting X coordinate
* @param startY Starting Y coordinate
* @param endX Ending X coordinate
* @param endY Ending Y coordinate
* @param duration Swipe duration in milliseconds
* @returns true if successful
*/
swipe(startX: number, startY: number, endX: number, endY: number, duration?: number): Promise<boolean>;
/**
* Wait for an element to be present
*
* @param selector Element selector
* @param strategy Selection strategy
* @param timeoutMs Timeout in milliseconds
* @returns true if the element is found within the timeout
*/
waitForElement(selector: string, strategy?: string, timeoutMs?: number): Promise<boolean>;
/**
* Long press on an element
*/
longPress(selector: string, duration?: number, strategy?: string): Promise<boolean>;
/**
* Scroll to an element
*
* @param selector Element selector to scroll to
* @param direction Direction to scroll ('up', 'down', 'left', 'right')
* @param strategy Selection strategy
* @param maxScrolls Maximum number of scroll attempts
* @returns true if element was found and scrolled to
*/
scrollToElement(selector: string, direction?: "up" | "down" | "left" | "right", strategy?: string, maxScrolls?: number): Promise<boolean>;
/**
* Get device orientation
*/
getOrientation(): Promise<"PORTRAIT" | "LANDSCAPE">;
/**
* Set device orientation
*
* @param orientation Desired orientation ('PORTRAIT' or 'LANDSCAPE')
*/
setOrientation(orientation: "PORTRAIT" | "LANDSCAPE"): Promise<void>;
/**
* Hide the keyboard if visible
*/
hideKeyboard(): Promise<void>;
/**
* Get the current activity (Android) or bundle ID (iOS)
*/
getCurrentPackage(): Promise<string>;
/**
* Get the current activity (Android only)
*/
getCurrentActivity(): Promise<string>;
/**
* Launch the app
*/
launchApp(): Promise<void>;
/**
* Close the app
*/
closeApp(): Promise<void>;
/**
* Reset the app (clear app data)
*/
resetApp(): Promise<void>;
/**
* Get device time
*
* @returns Device time string
*/
getDeviceTime(): Promise<string>;
/**
* Get battery info (if supported by the device)
* Note: This is a custom implementation as WebdriverIO doesn't directly support this
*/
getBatteryInfo(): Promise<{
level: number;
state: number;
}>;
/**
* Lock the device
*
* @param duration Duration in seconds to lock the device
*/
lockDevice(duration?: number): Promise<void>;
/**
* Check if device is locked
*/
isDeviceLocked(): Promise<boolean>;
/**
* Unlock the device
*/
unlockDevice(): Promise<void>;
/**
* Press a key on the device (Android only)
*
* @param keycode Android keycode
*/
pressKeyCode(keycode: number): Promise<void>;
/**
* Open notifications (Android only)
*/
openNotifications(): Promise<void>;
/**
* Get all contexts (NATIVE_APP, WEBVIEW, etc.)
*/
getContexts(): Promise<string[]>;
/**
* Switch context (between NATIVE_APP and WEBVIEW)
*
* @param context Context name to switch to
*/
switchContext(context: string): Promise<void>;
/**
* Get current context
*/
getCurrentContext(): Promise<string>;
/**
* Pull file from device
*
* @param path Path to file on device
* @returns Base64 encoded file content
*/
pullFile(path: string): Promise<string>;
/**
* Push file to device
*
* @param path Path on device to write to
* @param data Base64 encoded file content
*/
pushFile(path: string, data: string): Promise<void>;
/**
* Find an iOS predicate string element (iOS only)
*
* @param predicateString iOS predicate string
* @param timeoutMs Timeout in milliseconds
* @returns WebdriverIO element if found
*/
findByIosPredicate(predicateString: string, timeoutMs?: number): Promise<Element>;
/**
* Find an iOS class chain element (iOS only)
*
* @param classChain iOS class chain
* @param timeoutMs Timeout in milliseconds
* @returns WebdriverIO element if found
*/
findByIosClassChain(classChain: string, timeoutMs?: number): Promise<Element>;
/**
* Get list of available iOS simulators
* Note: This method isn't tied to an Appium session, so it doesn't require an initialized driver
* This uses the executeScript capability of WebdriverIO to run a mobile command
*
* @returns Array of simulator objects
*/
getIosSimulators(): Promise<any[]>;
/**
* Perform iOS-specific touch ID (fingerprint) simulation
*
* @param match Whether the fingerprint should match (true) or not match (false)
* @returns true if successful
*/
performTouchId(match: boolean): Promise<boolean>;
/**
* Simulate iOS shake gesture
*
* @returns true if successful
*/
shakeDevice(): Promise<boolean>;
/**
* Start recording the screen on iOS or Android device
*
* @param options Recording options
* @returns true if recording started successfully
*/
startRecording(options?: {
videoType?: string;
timeLimit?: number;
videoQuality?: string;
videoFps?: number;
}): Promise<boolean>;
/**
* Stop recording the screen and get the recording content as base64
*
* @returns Base64-encoded recording data
*/
stopRecording(): Promise<string>;
/**
* Execute a custom mobile command
*
* @param command Mobile command to execute
* @param args Arguments for the command
* @returns Command result
*/
executeMobileCommand(command: string, args?: any[]): Promise<any>;
/**
* Get text from an element
*
* @param selector Element selector
* @param strategy Selection strategy
* @returns Text content of the element
* @throws AppiumError if element is not found or has no text
*/
getText(selector: string, strategy?: string): Promise<string>;
/**
* Send keys directly to the device (without focusing on an element)
*
* @param text Text to send
* @returns true if successful
*/
sendKeysToDevice(text: string): Promise<boolean>;
/**
* Send key events to the device (e.g. HOME button, BACK button)
*
* @param keyEvent Key event name or code
* @returns true if successful
*/
sendKeyEvent(keyEvent: string | number): Promise<boolean>;
/**
* Clear text from an input element
*
* @param selector Element selector
* @param strategy Selection strategy
* @returns true if successful
*/
clearElement(selector: string, strategy?: string): Promise<boolean>;
/**
* Scroll using predefined directions - scrollDown, scrollUp, scrollLeft, scrollRight
* Implemented using W3C Actions API for better compatibility with modern Appium versions
*
* @param direction Direction to scroll: "down", "up", "left", "right"
* @param distance Optional percentage of screen to scroll (0.0-1.0), defaults to 0.5
* @returns true if successful
*/
scrollScreen(direction: "down" | "up" | "left" | "right", distance?: number): Promise<boolean>;
/**
* Get element attributes - useful for debugging and inspecting
*
* @param selector Element selector
* @param strategy Selection strategy
* @returns Object with element attributes
*/
getElementAttributes(selector: string, strategy?: string): Promise<Record<string, any>>;
/**
* Get detailed element analysis with all available information
* (useful for inspector functionality)
*
* @param selector Element selector
* @param strategy Selection strategy
* @returns Comprehensive element info
*/
inspectElement(selector: string, strategy?: string): Promise<Record<string, any>>;
/**
* Get a visual tree of elements under a parent element or from the root
* Helps create a hierarchical view of the UI elements (inspector functionality)
*
* @param parentSelector Optional parent element selector, if not provided will use root
* @param parentStrategy Selection strategy for parent
* @param maxDepth Maximum depth to traverse
* @returns Hierarchical object representing the element tree
*/
getElementTree(parentSelector?: string, parentStrategy?: string, maxDepth?: number): Promise<Record<string, any>>;
/**
* Verify if text is present in the page source
*
* @param text Text to search for
* @returns true if text is found
*/
hasTextInSource(text: string): Promise<boolean>;
/**
* Find all elements containing specific text
*
* @param text Text to search for
* @returns Array of WebdriverIO elements that contain the text
*/
findElementsByText(text: string): Promise<ElementArray>;
/**
* Open a deep link URL directly in an app
*
* @param url The URL/URI to open (e.g. "myapp://details/1234" or a http/https URL)
* @returns true if successful
* @throws AppiumError if the operation fails
*/
openDeepLink(url: string): Promise<boolean>;
/**
* Open a deep link using Android Intent
* This is a more specific Android-only method that allows setting additional intent parameters
*
* @param url The URL/URI to open
* @param extras Optional extras to add to the intent
* @returns true if successful
*/
openAndroidDeepLink(url: string, extras?: Record<string, string>): Promise<boolean>;
/**
* Get the window size (screen dimensions)
*
* @returns Object containing width and height of the screen
*/
getWindowSize(): Promise<{
width: number;
height: number;
}>;
/**
* Tap at specific coordinates on the screen
*
* @param x X-coordinate
* @param y Y-coordinate
* @returns true if successful
*/
tapByCoordinates(x: number, y: number): Promise<boolean>;
/**
* Perform advanced touch actions using W3C Actions API
* Allows for complex gestures like multi-touch, long press, etc.
*
* @param actions Array of W3C Action objects
* @returns true if successful
*/
performActions(actions: any[]): Promise<boolean>;
/**
* Send text to the currently active/focused element
* Useful when you've already focused on an input field
*
* @param text Text to send
* @returns true if successful
*/
sendTextToActiveElement(text: string): Promise<boolean>;
}
//# sourceMappingURL=appiumHelper.d.ts.map