UNPKG

mcp-appium-visual

Version:

MCP Server for Appium mobile automation with visual recovery

449 lines 15.2 kB
import { Browser, Element, ElementArray } from "webdriverio"; import { AppiumCapabilities } from "./appiumTypes.js"; /** * Helper class for Appium operations */ export declare class AppiumHelper { private driver; private screenshotDir; private readonly maxRetries; private readonly retryDelay; private lastCapabilities; private lastAppiumUrl; /** * Create a new AppiumHelper instance * * @param screenshotDir Directory to save screenshots to */ constructor(screenshotDir?: string); /** * Initialize the Appium driver with provided capabilities * * @param capabilities Appium capabilities * @param appiumUrl Appium server URL * @returns Reference to the initialized driver */ initializeDriver(capabilities: AppiumCapabilities, appiumUrl?: string): Promise<Browser>; /** * Check if the session is still valid and attempt to recover if not * * @returns true if session is valid or was successfully recovered */ validateSession(): Promise<boolean>; /** * Safely execute an Appium command with session validation * * @param operation Function that performs the Appium operation * @param errorMessage Error message to throw if operation fails * @returns Result of the operation */ safeExecute<T>(operation: () => Promise<T>, errorMessage: string): Promise<T>; /** * Get the current driver instance * * @returns The driver instance or throws if not initialized */ getDriver(): Browser; /** * Close the Appium session */ closeDriver(): Promise<void>; /** * Take a screenshot and save it to the specified directory * * @param name Screenshot name * @returns Path to the saved screenshot */ takeScreenshot(name: string): Promise<string>; /** * Check if an element exists * * @param selector Element selector * @param strategy Selection strategy * @returns true if the element exists */ elementExists(selector: string, strategy?: string): Promise<boolean>; /** * Find an element by its selector with retry mechanism * * @param selector Element selector * @param strategy Selection strategy * @param timeoutMs Timeout in milliseconds * @returns WebdriverIO element if found */ findElement(selector: string, strategy?: string, timeoutMs?: number): Promise<Element>; /** * Find multiple elements by selector * * @param selector Element selector * @param strategy Selection strategy * @returns Array of WebdriverIO elements */ findElements(selector: string, strategy?: string): Promise<ElementArray>; /** * Tap on an element with retry mechanism * Uses W3C Actions API with fallback to TouchAction API for compatibility * * @param selector Element selector * @param strategy Selection strategy * @returns true if successful * @throws AppiumError if the operation fails after retries */ tapElement(selector: string, strategy?: string): Promise<boolean>; /** * Click on an element - alias for tapElement for better Selenium compatibility * * @param selector Element selector * @param strategy Selection strategy * @returns true if successful * @throws AppiumError if the operation fails after retries */ click(selector: string, strategy?: string): Promise<boolean>; /** * Send keys to an element with retry mechanism * * @param selector Element selector * @param text Text to send * @param strategy Selection strategy * @returns true if successful * @throws AppiumError if the operation fails after retries */ sendKeys(selector: string, text: string, strategy?: string): Promise<boolean>; /** * Get the page source (XML representation of the current UI) * * @param refreshFirst Whether to try refreshing the UI before getting page source * @param suppressErrors Whether to suppress specific iOS errors and return empty source * @returns XML string of the current UI */ getPageSource(refreshFirst?: boolean, suppressErrors?: boolean): Promise<string>; /** * Perform a swipe gesture * * @param startX Starting X coordinate * @param startY Starting Y coordinate * @param endX Ending X coordinate * @param endY Ending Y coordinate * @param duration Swipe duration in milliseconds * @returns true if successful */ swipe(startX: number, startY: number, endX: number, endY: number, duration?: number): Promise<boolean>; /** * Wait for an element to be present * * @param selector Element selector * @param strategy Selection strategy * @param timeoutMs Timeout in milliseconds * @returns true if the element is found within the timeout */ waitForElement(selector: string, strategy?: string, timeoutMs?: number): Promise<boolean>; /** * Long press on an element */ longPress(selector: string, duration?: number, strategy?: string): Promise<boolean>; /** * Scroll to an element * * @param selector Element selector to scroll to * @param direction Direction to scroll ('up', 'down', 'left', 'right') * @param strategy Selection strategy * @param maxScrolls Maximum number of scroll attempts * @returns true if element was found and scrolled to */ scrollToElement(selector: string, direction?: "up" | "down" | "left" | "right", strategy?: string, maxScrolls?: number): Promise<boolean>; /** * Get device orientation */ getOrientation(): Promise<"PORTRAIT" | "LANDSCAPE">; /** * Set device orientation * * @param orientation Desired orientation ('PORTRAIT' or 'LANDSCAPE') */ setOrientation(orientation: "PORTRAIT" | "LANDSCAPE"): Promise<void>; /** * Hide the keyboard if visible */ hideKeyboard(): Promise<void>; /** * Get the current activity (Android) or bundle ID (iOS) */ getCurrentPackage(): Promise<string>; /** * Get the current activity (Android only) */ getCurrentActivity(): Promise<string>; /** * Launch the app */ launchApp(): Promise<void>; /** * Close the app */ closeApp(): Promise<void>; /** * Reset the app (clear app data) */ resetApp(): Promise<void>; /** * Get device time * * @returns Device time string */ getDeviceTime(): Promise<string>; /** * Get battery info (if supported by the device) * Note: This is a custom implementation as WebdriverIO doesn't directly support this */ getBatteryInfo(): Promise<{ level: number; state: number; }>; /** * Lock the device * * @param duration Duration in seconds to lock the device */ lockDevice(duration?: number): Promise<void>; /** * Check if device is locked */ isDeviceLocked(): Promise<boolean>; /** * Unlock the device */ unlockDevice(): Promise<void>; /** * Press a key on the device (Android only) * * @param keycode Android keycode */ pressKeyCode(keycode: number): Promise<void>; /** * Open notifications (Android only) */ openNotifications(): Promise<void>; /** * Get all contexts (NATIVE_APP, WEBVIEW, etc.) */ getContexts(): Promise<string[]>; /** * Switch context (between NATIVE_APP and WEBVIEW) * * @param context Context name to switch to */ switchContext(context: string): Promise<void>; /** * Get current context */ getCurrentContext(): Promise<string>; /** * Pull file from device * * @param path Path to file on device * @returns Base64 encoded file content */ pullFile(path: string): Promise<string>; /** * Push file to device * * @param path Path on device to write to * @param data Base64 encoded file content */ pushFile(path: string, data: string): Promise<void>; /** * Find an iOS predicate string element (iOS only) * * @param predicateString iOS predicate string * @param timeoutMs Timeout in milliseconds * @returns WebdriverIO element if found */ findByIosPredicate(predicateString: string, timeoutMs?: number): Promise<Element>; /** * Find an iOS class chain element (iOS only) * * @param classChain iOS class chain * @param timeoutMs Timeout in milliseconds * @returns WebdriverIO element if found */ findByIosClassChain(classChain: string, timeoutMs?: number): Promise<Element>; /** * Get list of available iOS simulators * Note: This method isn't tied to an Appium session, so it doesn't require an initialized driver * This uses the executeScript capability of WebdriverIO to run a mobile command * * @returns Array of simulator objects */ getIosSimulators(): Promise<any[]>; /** * Perform iOS-specific touch ID (fingerprint) simulation * * @param match Whether the fingerprint should match (true) or not match (false) * @returns true if successful */ performTouchId(match: boolean): Promise<boolean>; /** * Simulate iOS shake gesture * * @returns true if successful */ shakeDevice(): Promise<boolean>; /** * Start recording the screen on iOS or Android device * * @param options Recording options * @returns true if recording started successfully */ startRecording(options?: { videoType?: string; timeLimit?: number; videoQuality?: string; videoFps?: number; }): Promise<boolean>; /** * Stop recording the screen and get the recording content as base64 * * @returns Base64-encoded recording data */ stopRecording(): Promise<string>; /** * Execute a custom mobile command * * @param command Mobile command to execute * @param args Arguments for the command * @returns Command result */ executeMobileCommand(command: string, args?: any[]): Promise<any>; /** * Get text from an element * * @param selector Element selector * @param strategy Selection strategy * @returns Text content of the element * @throws AppiumError if element is not found or has no text */ getText(selector: string, strategy?: string): Promise<string>; /** * Send keys directly to the device (without focusing on an element) * * @param text Text to send * @returns true if successful */ sendKeysToDevice(text: string): Promise<boolean>; /** * Send key events to the device (e.g. HOME button, BACK button) * * @param keyEvent Key event name or code * @returns true if successful */ sendKeyEvent(keyEvent: string | number): Promise<boolean>; /** * Clear text from an input element * * @param selector Element selector * @param strategy Selection strategy * @returns true if successful */ clearElement(selector: string, strategy?: string): Promise<boolean>; /** * Scroll using predefined directions - scrollDown, scrollUp, scrollLeft, scrollRight * Implemented using W3C Actions API for better compatibility with modern Appium versions * * @param direction Direction to scroll: "down", "up", "left", "right" * @param distance Optional percentage of screen to scroll (0.0-1.0), defaults to 0.5 * @returns true if successful */ scrollScreen(direction: "down" | "up" | "left" | "right", distance?: number): Promise<boolean>; /** * Get element attributes - useful for debugging and inspecting * * @param selector Element selector * @param strategy Selection strategy * @returns Object with element attributes */ getElementAttributes(selector: string, strategy?: string): Promise<Record<string, any>>; /** * Get detailed element analysis with all available information * (useful for inspector functionality) * * @param selector Element selector * @param strategy Selection strategy * @returns Comprehensive element info */ inspectElement(selector: string, strategy?: string): Promise<Record<string, any>>; /** * Get a visual tree of elements under a parent element or from the root * Helps create a hierarchical view of the UI elements (inspector functionality) * * @param parentSelector Optional parent element selector, if not provided will use root * @param parentStrategy Selection strategy for parent * @param maxDepth Maximum depth to traverse * @returns Hierarchical object representing the element tree */ getElementTree(parentSelector?: string, parentStrategy?: string, maxDepth?: number): Promise<Record<string, any>>; /** * Verify if text is present in the page source * * @param text Text to search for * @returns true if text is found */ hasTextInSource(text: string): Promise<boolean>; /** * Find all elements containing specific text * * @param text Text to search for * @returns Array of WebdriverIO elements that contain the text */ findElementsByText(text: string): Promise<ElementArray>; /** * Open a deep link URL directly in an app * * @param url The URL/URI to open (e.g. "myapp://details/1234" or a http/https URL) * @returns true if successful * @throws AppiumError if the operation fails */ openDeepLink(url: string): Promise<boolean>; /** * Open a deep link using Android Intent * This is a more specific Android-only method that allows setting additional intent parameters * * @param url The URL/URI to open * @param extras Optional extras to add to the intent * @returns true if successful */ openAndroidDeepLink(url: string, extras?: Record<string, string>): Promise<boolean>; /** * Get the window size (screen dimensions) * * @returns Object containing width and height of the screen */ getWindowSize(): Promise<{ width: number; height: number; }>; /** * Tap at specific coordinates on the screen * * @param x X-coordinate * @param y Y-coordinate * @returns true if successful */ tapByCoordinates(x: number, y: number): Promise<boolean>; /** * Perform advanced touch actions using W3C Actions API * Allows for complex gestures like multi-touch, long press, etc. * * @param actions Array of W3C Action objects * @returns true if successful */ performActions(actions: any[]): Promise<boolean>; /** * Send text to the currently active/focused element * Useful when you've already focused on an input field * * @param text Text to send * @returns true if successful */ sendTextToActiveElement(text: string): Promise<boolean>; } //# sourceMappingURL=appiumHelper.d.ts.map