@mastra/core
Version:
Mastra is a framework for building AI-powered applications and agents with a modern TypeScript stack.
677 lines • 28.1 kB
TypeScript
/**
* MastraBrowser Base Class
*
* Abstract base class for browser providers. Extends MastraBase for logger integration.
*
* ## Architecture
*
* Each browser provider defines its own tools via the `getTools()` method.
* This allows different providers to offer different capabilities:
*
* - **AgentBrowser**: 17 deterministic tools using refs ([ref=e1], [ref=e2])
* - **StagehandBrowser**: AI-powered tools (act, extract, observe)
*
* ## Two Paradigms
*
* Browser providers fall into two paradigms:
*
* 1. **Deterministic** (Playwright, agent-browser) - Uses refs and selectors
* 2. **AI-powered** (Stagehand) - Uses natural language instructions
*
* Both extend this base class and implement `getTools()` to return their tools.
*/
import { MastraBase } from '../base.js';
import type { InputProcessor, InputProcessorOrWorkflow } from '../processors/index.js';
import type { Tool } from '../tools/tool.js';
import type { BrowserToolError, ErrorCode } from './errors.js';
import type { ScreencastOptions as ScreencastOptionsType } from './screencast/types.js';
import type { BrowserState, BrowserTabState, BrowserScope, ThreadManager } from './thread-manager.js';
export type { ScreencastOptions, ScreencastFrameData, ScreencastEvents } from './screencast/types.js';
type ScreencastOptions = ScreencastOptionsType;
/**
* Clean up stale Chrome lock files from a profile directory.
*
* Chrome creates lock files (SingletonLock, SingletonSocket, etc.) to prevent
* multiple instances from using the same profile. If the browser crashes or
* doesn't shut down cleanly, these files can remain and block future launches.
*
* This function removes these lock files, allowing the profile to be reused.
* It's safe to call even if the files don't exist.
*
* @param profilePath - Path to the Chrome profile directory
* @param logger - Optional logger for debug output
*/
export declare function cleanupProfileLockFiles(profilePath: string, logger?: {
debug?: (message: string) => void;
warn?: (message: string) => void;
}): void;
/**
* Kill a browser process and its children by sending SIGKILL to the process group.
*
* When Chrome/Chromium is launched, it spawns child processes (GPU, renderer,
* network, storage, crashpad handlers). If the main process exits uncleanly,
* these children can become orphaned. Killing the process group ensures all
* related processes are cleaned up.
*
* Note: Process group signaling (`-pid`) is POSIX-only. On Windows, this
* function is a no-op and orphaned child processes must be cleaned up by
* other means (e.g., taskkill).
*
* @param pid - The PID of the main browser process. If undefined, this is a no-op.
* @param logger - Optional logger for debug output.
*/
export declare function killProcessGroup(pid: number | undefined, logger?: {
debug?: (message: string) => void;
warn?: (message: string) => void;
}): void;
/**
* Browser provider status.
*/
export type BrowserStatus = 'pending' | 'launching' | 'ready' | 'error' | 'closing' | 'closed';
/**
* Lifecycle hook that fires during browser state transitions.
*/
export type BrowserLifecycleHook = (args: {
browser: MastraBrowser;
}) => void | Promise<void>;
/**
* CDP URL provider - can be a static string or an async function.
* Useful for cloud providers where the CDP URL may change per session.
*/
export type CdpUrlProvider = string | (() => string | Promise<string>);
/**
* Base configuration properties shared by all browser providers.
* This interface contains fields common to all browser configurations.
*
* **For extending**: Use this interface when creating provider-specific configs
* (e.g., `interface MyProviderConfig extends BrowserConfigBase`).
*
* **For consuming**: Use {@link BrowserConfig} which adds compile-time validation
* that `cdpUrl` and `scope: 'thread'` cannot be used together.
*/
export interface BrowserConfigBase {
/**
* Whether to run the browser in headless mode (no visible UI).
* @default true
*/
headless?: boolean;
/**
* Browser viewport dimensions.
* Controls the size of the browser window and how websites render.
*/
viewport?: {
width: number;
height: number;
};
/**
* Default timeout in milliseconds for browser operations.
* @default 10000 (10 seconds)
*/
timeout?: number;
/**
* CDP WebSocket URL or async provider function.
* When provided, connects to an existing browser instead of launching a new one.
* Useful for cloud providers (Browserbase, Browserless, Kernel, etc.).
*
* **Important:** When using `cdpUrl`, you must use `scope: 'shared'` (or omit `scope`
* to let it default to 'shared' behavior). Using `cdpUrl` with `scope: 'thread'`
* will throw an error because thread isolation requires spawning separate browser
* instances, which isn't possible when connecting to an existing browser via CDP.
*
* @example
* ```ts
* // Connect to a local Chrome with remote debugging enabled
* { cdpUrl: 'ws://localhost:9222' }
*
* // Connect to Browserless cloud provider
* { cdpUrl: 'wss://chrome.browserless.io?token=YOUR_TOKEN', scope: 'shared' }
*
* // Use an async provider function for dynamic URLs
* { cdpUrl: async () => await fetchBrowserlessUrl() }
* ```
*/
cdpUrl?: CdpUrlProvider;
/**
* Browser instance scope across threads.
*
* - `'thread'` (default): Each thread gets its own isolated browser instance.
* Best for parallel agents that need separate browser states.
*
* - `'shared'`: All threads share a single browser instance.
* Required when using `cdpUrl` to connect to an existing browser.
*
* **Important:** `scope: 'thread'` cannot be used with `cdpUrl` because thread
* isolation requires spawning new browser instances, which isn't possible when
* connecting to an existing browser via CDP. This configuration will throw an error.
*
* @default 'thread'
*
* @example
* ```ts
* // Isolated browsers per thread (default)
* { scope: 'thread' }
*
* // Shared browser for all threads
* { scope: 'shared' }
*
* // When using cdpUrl, scope must be 'shared'
* { cdpUrl: 'ws://localhost:9222', scope: 'shared' }
* ```
*/
scope?: BrowserScope;
/**
* Called after the browser reaches 'ready' status.
*/
onLaunch?: BrowserLifecycleHook;
/**
* Called before the browser is closed.
*/
onClose?: BrowserLifecycleHook;
/**
* Screencast options for streaming browser frames.
* Controls image format, quality, and dimensions.
*/
screencast?: ScreencastOptions;
/**
* Path to a Chrome/Chromium user data directory (profile).
* When provided, the browser will use this profile's cookies, localStorage,
* extensions, and other session data.
*
* **Important:** Chrome only allows one process to access a profile at a time.
* If Chrome is already running with this profile, the browser will fail to launch.
* Either close Chrome first, or use a copy of the profile.
*
* @example
* ```ts
* // macOS Chrome default profile
* { profile: '/Users/you/Library/Application Support/Google/Chrome' }
*
* // Custom profile directory
* { profile: '/path/to/my-automation-profile' }
* ```
*/
profile?: string;
/**
* Path to the browser executable to use.
* By default, Playwright/Stagehand use their bundled Chromium.
* Use this to launch a specific browser installation instead.
*
* @example
* ```ts
* // macOS Chrome
* { executablePath: '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome' }
*
* // Linux Chrome
* { executablePath: '/usr/bin/google-chrome' }
*
* // Windows Chrome
* { executablePath: 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe' }
* ```
*/
executablePath?: string;
}
/**
* Browser configuration with compile-time enforcement of cdpUrl/scope compatibility.
*
* This type enforces that `cdpUrl` and `scope: 'thread'` cannot be used together:
* - When `cdpUrl` is provided, `scope` must be `'shared'` or omitted
* - When `scope: 'thread'` is used, `cdpUrl` must not be provided
*
* @example
* ```ts
* // Valid configurations:
* { headless: true } // Local browser, thread scope (default)
* { scope: 'thread' } // Explicit thread isolation
* { scope: 'shared' } // Shared browser
* { cdpUrl: 'ws://localhost:9222' } // CDP connection, defaults to shared
* { cdpUrl: 'ws://localhost:9222', scope: 'shared' } // CDP with explicit shared
*
* // Invalid configuration (TypeScript error):
* { cdpUrl: 'ws://localhost:9222', scope: 'thread' } // Error: cannot combine cdpUrl with thread scope
* ```
*/
export type BrowserConfig = (BrowserConfigBase & {
cdpUrl?: undefined;
scope?: BrowserScope;
}) | (BrowserConfigBase & {
cdpUrl: CdpUrlProvider;
scope?: 'shared';
});
/**
* A screencast stream that emits frames.
* Uses EventEmitter pattern for frame delivery.
*/
export interface ScreencastStream {
/** Stop the screencast */
stop(): Promise<void>;
/** Check if screencast is active */
isActive(): boolean;
/** Reconnect the screencast (e.g., after tab change) */
reconnect(): Promise<void>;
/** Register event handlers */
on(event: 'frame', handler: (frame: {
data: string;
viewport: {
width: number;
height: number;
};
}) => void): this;
on(event: 'stop', handler: (reason: string) => void): this;
on(event: 'error', handler: (error: Error) => void): this;
on(event: 'url', handler: (url: string) => void): this;
/** Emit a URL update (called by browser providers on navigation) */
emitUrl(url: string): void;
}
/**
* Mouse event parameters for CDP injection.
*/
export interface MouseEventParams {
type: 'mousePressed' | 'mouseReleased' | 'mouseMoved' | 'mouseWheel';
x: number;
y: number;
button?: 'left' | 'right' | 'middle' | 'none';
clickCount?: number;
deltaX?: number;
deltaY?: number;
modifiers?: number;
}
/**
* Keyboard event parameters for CDP injection.
*/
export interface KeyboardEventParams {
type: 'keyDown' | 'keyUp' | 'char';
key?: string;
code?: string;
text?: string;
modifiers?: number;
/** Windows virtual key code (required for non-printable keys like Enter, Tab, Arrow keys) */
windowsVirtualKeyCode?: number;
}
/**
* Abstract base class for browser providers.
*
* Providers extend this class and implement the abstract methods.
* Each method corresponds to one of the 17 flat tools.
*/
export declare abstract class MastraBrowser extends MastraBase {
/** Unique instance identifier */
abstract readonly id: string;
/** Human-readable name */
abstract readonly name: string;
/** Provider identifier (e.g., 'playwright', 'stagehand', 'browserbase') */
abstract readonly provider: string;
/**
* Provider type for runtime enforcement.
* - 'sdk': SDK providers (AgentBrowser, StagehandBrowser) — use with Agent.browser
* - 'cli': CLI providers (BrowserViewer) — use with Workspace.browser
* Defaults to 'sdk' for backward compatibility with existing providers.
*/
readonly providerType: 'sdk' | 'cli';
/** Current lifecycle status */
status: BrowserStatus;
/** Error message when status is 'error' */
error?: string;
/**
* Whether the browser is running in headless mode.
* Returns true by default if not explicitly configured.
*/
get headless(): boolean;
/** Last known browser state before browser was closed (for restore on relaunch) */
protected lastBrowserState?: BrowserState;
/**
* Shared manager instance for 'shared' scope mode.
* Type varies by provider (e.g., BrowserManager for agent-browser, Stagehand for stagehand).
* Providers should cast this to their specific type when accessing.
*/
protected sharedManager: unknown;
/** Configuration */
protected readonly config: BrowserConfig;
/**
* Thread manager for handling thread-scoped browser sessions.
* Set by subclasses that support thread isolation.
*/
protected threadManager?: ThreadManager;
/**
* Current thread ID for browser operations.
* Used by thread isolation to route operations to the correct session.
*/
protected currentThreadId: string;
/** Default key for shared scope screencast streams */
protected static readonly SHARED_STREAM_KEY = "__shared__";
/** Active screencast streams per thread (for triggering reconnects on tab changes) */
protected activeScreencastStreams: Map<string, ScreencastStream>;
/**
* PID of the shared browser process.
* Set by providers after launch so the base class can kill the process group
* (GPU, renderer, crashpad, etc.) when the browser disconnects or closes.
*/
protected sharedBrowserPid?: number;
/**
* PIDs of per-thread browser processes.
* Set by providers after creating a thread session.
*/
protected threadBrowserPids: Map<string, number>;
/**
* Get the stream key for a thread (or shared key for shared scope).
* @param threadId - Optional thread ID
* @returns The stream key to use for the screencast streams map
*/
protected getStreamKey(threadId?: string): string;
/**
* Reconnect the active screencast for a specific thread.
* Called internally when tabs are switched or closed.
*/
protected reconnectScreencastForThread(threadId: string | undefined, reason: string): Promise<void>;
/**
* Update the browser state in the thread session.
* Called on navigation, tab open/close to keep state fresh.
*/
protected updateSessionBrowserState(threadId?: string): void;
private _launchPromise?;
private _closePromise?;
constructor(config?: BrowserConfig);
/**
* Launch the browser. Override in subclass.
* Called by launch() wrapper which handles status and race conditions.
*/
protected abstract doLaunch(): Promise<void>;
/**
* Close the browser. Override in subclass.
* Called by close() wrapper which handles status and race conditions.
*/
protected abstract doClose(): Promise<void>;
/**
* Get the CDP WebSocket URL for connecting to this browser.
* CLI providers (BrowserViewer) implement this to expose the URL for CLI tools.
* SDK providers typically return null as they manage their own CDP connections.
*
* @param _threadId - Thread identifier (for thread-scoped browsers)
* @returns The CDP WebSocket URL (e.g., ws://127.0.0.1:9222/devtools/browser/...)
*/
getCdpUrl(_threadId?: string): string | null;
/**
* Launch the browser.
* Race-condition-safe - handles concurrent calls, status management, and lifecycle hooks.
* @param _threadId - Thread identifier (for thread-scoped browsers, launches a browser for that thread)
*/
launch(threadId?: string): Promise<void>;
/**
* Close the browser.
* Race-condition-safe - handles concurrent calls, status management, and lifecycle hooks.
*/
close(): Promise<void>;
/**
* Connect to an external browser via CDP URL for screencast.
*
* Use this when an agent is using their own external CDP (e.g., browser-use cloud).
* Connects Playwright to the external browser to enable screencast without launching
* our own browser.
*
* Override this in subclasses that support external CDP connections.
* The base implementation throws an error.
*
* @param cdpUrl - The external CDP WebSocket URL (wss://... or ws://...)
* @param threadId - Thread ID to associate the session with
*/
connectToExternalCdp(_cdpUrl: string, _threadId?: string): Promise<void>;
/**
* Ensure the browser is ready, launching if needed.
* If browser was previously closed, it will be re-launched.
*/
ensureReady(): Promise<void>;
/**
* Check if the browser is still alive.
* Override in subclass to detect externally closed browsers.
* @returns true if browser is alive, false if it was externally closed
*/
protected checkBrowserAlive(): Promise<boolean>;
/**
* Check if the browser is currently running.
* @param _threadId - Thread identifier (for thread-scoped browsers)
*/
isBrowserRunning(_threadId?: string): boolean;
/**
* Resolve a CDP URL from a static string or async provider function.
* @param cdpUrl - Static string or async function returning the CDP URL
* @returns Resolved CDP URL string
*/
protected resolveCdpUrl(cdpUrl: CdpUrlProvider): Promise<string>;
/**
* Resolve an HTTP CDP endpoint to a WebSocket URL by fetching /json/version.
*
* Cloud browser providers (Browser-Use, Browserless, etc.) often expose HTTP
* endpoints that need to be resolved to WebSocket URLs for direct CDP connections.
*
* - If the URL starts with `ws://` or `wss://`, returns it as-is
* - If the URL starts with `http://` or `https://`, fetches /json/version to get webSocketDebuggerUrl
*
* @param url - CDP URL (HTTP or WebSocket)
* @returns WebSocket URL for CDP connection
*/
protected resolveWebSocketUrl(url: string): Promise<string>;
/**
* Error patterns that indicate browser disconnection.
* Used by isDisconnectionError() to detect external browser closure.
*/
protected static readonly DISCONNECTION_PATTERNS: string[];
/**
* Check if an error message indicates browser disconnection.
* @param message - Error message to check
* @returns true if the message indicates disconnection
*/
isDisconnectionError(message: string): boolean;
/**
* Handle browser disconnection by updating status and notifying listeners.
* Called when browser is detected as externally closed.
*
* For 'thread' scope: clears only the specific thread's session (other threads unaffected)
* For 'shared' scope: clears the shared manager and updates global status
*/
handleBrowserDisconnected(): void;
/**
* Create a BrowserToolError from an exception.
* Handles common error patterns including disconnection detection.
* Subclasses can override to add provider-specific error handling.
*
* @param error - The caught error
* @param context - Description of what operation failed (e.g., "Click operation")
* @returns Structured BrowserToolError
*/
protected createErrorFromException(error: unknown, context: string): BrowserToolError;
/**
* Create a specific error type.
* Convenience method for providers to create typed errors.
*/
protected createError(code: ErrorCode, message: string, hint?: string): BrowserToolError;
private _onReadyCallbacks;
private _onClosedCallbacks;
/** Thread-specific ready callbacks. Key is threadId. */
private _onThreadReadyCallbacks;
/** Thread-specific closed callbacks. Key is threadId. */
private _onThreadClosedCallbacks;
/**
* Register a callback to be invoked when the browser becomes ready.
* If browser is already running, callback is invoked immediately.
* The callback is ALWAYS registered (even if invoked immediately) so it will
* also fire on future "ready" events (e.g., session creation for thread isolation).
* @param callback - Function to call when browser is ready
* @param threadId - Optional thread ID to scope the callback to a specific thread
* @returns Cleanup function to unregister the callback
*/
onBrowserReady(callback: () => void, threadId?: string): () => void;
/**
* Register a callback to be invoked when the browser closes.
* Useful for screencast to broadcast browser_closed status.
* @param callback - Function to call when browser closes
* @param threadId - Optional thread ID to scope the callback to a specific thread
* @returns Cleanup function to unregister the callback
*/
onBrowserClosed(callback: () => void, threadId?: string): () => void;
/**
* Notify registered callbacks that browser is ready.
* @param threadId - If provided, only notify callbacks for that thread (for thread scope)
*/
protected notifyBrowserReady(threadId?: string): void;
/**
* Notify registered callbacks that browser has closed.
* @param threadId - If provided, only notify callbacks for that thread (for thread scope)
*/
protected notifyBrowserClosed(threadId?: string): void;
/**
* Get the current page URL without launching the browser.
* @param threadId - Optional thread ID for thread-isolated browsers
* @returns The current URL string, or null if browser is not running or not supported
*/
getCurrentUrl(_threadId?: string): Promise<string | null>;
/**
* Get the current browser state (all tabs and active tab index).
* Override in subclass to provide actual tab state.
* @param _threadId - Optional thread ID for thread-isolated sessions
* @returns The browser state, or null if not available
*/
getBrowserState(_threadId?: string): Promise<BrowserState | null>;
/**
* Get the last known browser state before the browser was closed.
* Useful for restoring state on relaunch.
* @param threadId - Optional thread ID for thread-isolated sessions
* @returns The last browser state, or undefined if not available
*/
getLastBrowserState(threadId?: string): BrowserState | undefined;
/**
* Get all open tabs with their URLs and titles.
* Override in subclass to provide actual tab info.
* @param _threadId - Optional thread ID for thread-isolated sessions
* @returns Array of tab states
*/
getTabState(_threadId?: string): Promise<BrowserTabState[]>;
/**
* Get the active tab index.
* Override in subclass to provide actual active tab index.
* @param _threadId - Optional thread ID for thread-isolated sessions
* @returns The active tab index (0-based), or 0 if not available
*/
getActiveTabIndex(_threadId?: string): Promise<number>;
/**
* Navigate to a URL (simple form). Override in subclass if supported.
* Used internally for restoring state on relaunch.
* Named `navigateTo` to avoid conflicts with tool methods that have richer signatures.
*/
navigateTo(_url: string): Promise<void>;
/**
* Set the current thread ID for subsequent browser operations.
* Called by tools before executing browser actions to ensure
* operations are routed to the correct thread session.
*
* @param threadId - The thread ID, or undefined to use the default thread
*/
setCurrentThread(threadId?: string): void;
/**
* Get the current thread ID.
* @returns The current thread ID being used for operations
*/
getCurrentThread(): string;
/**
* Get the browser scope mode.
* @returns The scope from threadManager or config, defaults to 'shared'
*/
getScope(): BrowserScope;
/**
* Start screencast streaming. Override in subclass if supported.
*/
startScreencast(_options?: ScreencastOptions): Promise<ScreencastStream>;
/**
* Check if a thread has an existing browser session.
* Used by startScreencastIfBrowserActive to prevent showing another thread's page.
*
* If threadManager is set, delegates to it. Otherwise returns true (no isolation).
* Subclasses can override for custom behavior.
*
* @returns true if session exists or thread isolation is not used
*/
hasThreadSession(threadId: string): boolean;
/**
* Close a specific thread's browser session.
* Delegates to ThreadManager and notifies registered callbacks.
*
* For 'thread' scope, this closes only that thread's browser instance.
* For 'shared' scope, this is a no-op (use close() to close the shared browser).
*
* @param threadId - The thread ID whose session should be closed
*/
closeThreadSession(threadId: string): Promise<void>;
/**
* Handle browser disconnection for a specific thread.
* Called when a thread's browser is closed externally (e.g., user closes browser window).
* Clears the thread session and notifies registered callbacks.
*
* @param threadId - The thread ID whose session was disconnected
*/
protected handleThreadBrowserDisconnected(threadId: string): void;
/**
* Get a session identifier for a specific thread.
* In thread scope, returns a composite ID (browser:threadId).
* In shared scope or without thread manager, returns the browser instance ID.
*/
getSessionId(threadId?: string): string;
/**
* Start screencast only if browser is already running.
* Does NOT launch the browser.
* Uses config.screencast options as defaults if no options provided.
*
* For thread-isolated browsers ('browser' mode):
* - Returns null if the thread doesn't have an existing browser session
*/
startScreencastIfBrowserActive(options?: ScreencastOptions): Promise<ScreencastStream | null>;
/**
* Inject a mouse event. Override in subclass if supported.
* @param event - Mouse event parameters
* @param threadId - Optional thread ID for thread-isolated sessions
*/
injectMouseEvent(_event: MouseEventParams, _threadId?: string): Promise<void>;
/**
* Inject a keyboard event. Override in subclass if supported.
* @param event - Keyboard event parameters
* @param threadId - Optional thread ID for thread-isolated sessions
*/
injectKeyboardEvent(_event: KeyboardEventParams, _threadId?: string): Promise<void>;
/**
* Get the active page for a thread.
* Used by screencast reconnection to emit the current URL.
*
* @param threadId - Optional thread ID (uses current thread if not provided)
* @returns The active Playwright Page, or null if not available
*/
protected abstract getActivePage(threadId?: string): Promise<{
url(): string;
} | null>;
/**
* Get the current browser state for a thread.
* Used to persist and restore browser state across sessions.
*
* @param threadId - Optional thread ID (uses current thread if not provided)
* @returns Browser state including URL, tabs, and active tab index
*/
protected abstract getBrowserStateForThread(threadId?: string): BrowserState | null;
/**
* Returns browser input processors (e.g., BrowserContextProcessor for context injection).
* Skips if the user already added a processor with the same id.
*
* This method is similar to AgentChannels.getInputProcessors() and allows
* browser implementations to provide their own processors.
*
* @param configuredProcessors - Processors already configured by the user (for deduplication)
* @returns Array of input processors for this browser instance
*/
getInputProcessors(configuredProcessors?: InputProcessorOrWorkflow[]): InputProcessor[];
/**
* Get the browser tools for this provider.
*
* Each provider returns its own set of tools. For example:
* - AgentBrowser returns 17 deterministic tools using refs
* - StagehandBrowser might return AI-powered tools (act, extract, observe)
*
* @returns Record of tool name to tool definition
*/
abstract getTools(): Record<string, Tool<any, any>>;
}
//# sourceMappingURL=browser.d.ts.map