UNPKG

@playword/core

Version:
921 lines (849 loc) 27.9 kB
import { AnthropicInput } from '@langchain/anthropic'; import { EmbeddingsParams } from '@langchain/core/embeddings'; import { GoogleGenerativeAIChatInput } from '@langchain/google-genai'; import { ChatOpenAIFields, ClientOptions } from '@langchain/openai'; import { BrowserContext, Frame, Page } from 'playwright-core'; import * as _langchain_core_documents from '@langchain/core/documents'; import { Document } from '@langchain/core/documents'; import * as _langchain_core_messages from '@langchain/core/messages'; import { AIMessage, HumanMessage, ToolMessage } from '@langchain/core/messages'; import { DynamicStructuredTool } from '@langchain/core/tools'; /** * AI is a module that uses AI services to execute the PlayWord functions. * It provides the following functionalities: * - Use the chat model binding with custom tools to perform actions. * - Embed documents into a new vector store. * - Search for the most similar documents from the vector store. * - Determine if the assertion passes based on the user's input and the AI response. * - Get the best candidate from the embedded documents based on the user's input. * - Summarize the Observer action. * * @param opts The options for AI configuration. See {@link AIOptions} for details. */ declare class AI { /** The chat model to use for the general tasks. */ private llm; /** The vector store to store the embedded documents. */ private store; constructor(opts?: AIOptions); /** * Use custom tools to perform actions. * * @param tools The custom tools to use. * @param messages The messages to send to the AI. */ useTools(tools: DynamicStructuredTool[], messages: (AIMessage | HumanMessage | ToolMessage)[]): Promise<_langchain_core_messages.AIMessageChunk>; /** * Embed texts into a new vector store. * * @param texts The texts to embed. */ embedTexts(texts: string[]): Promise<void>; /** * Get the most similar documents from the vector store. * * @param query The query to search for the most similar documents. * @param topN The number of top results to return. */ searchDocuments(query: string, topN?: number): Promise<_langchain_core_documents.DocumentInterface<Record<string, any>>[]>; /** * Determine if the assertion passes based on the messages. * * @param messages The messages stored the user's input and the AI response. */ parseResult(messages: (AIMessage | HumanMessage | ToolMessage)[]): Promise<boolean>; /** * Get the best candidate from the candidate documents. * * @param input The user's input to find the best candidate. * @param docs The candidate documents. */ getBestCandidate(input: string, docs: Document[]): Promise<number>; /** * Summarize the Observer action * * @param action The action to summarize. */ summarizeAction(action: string): Promise<string>; } /** * The Recorder class manages the recordings created during interactions with * PlayWord and PlayWord Observer. It provides functionality to store, retrieve, * and manipulate recordings, allowing users to save test cases for replay or review. */ declare class Recorder { private recordPath; /** * The current position in the list of recordings. * * This indicates the index of the recording currently being operated on. */ private position; /** * The list of recordings being managed. * * Each recording contains a set of actions and metadata. See {@link Recording} for the structure. */ private recordings; constructor(recordPath?: string); /** * Checks if the recording file exists and verifies that it is a JSON file. * * @param path The path to the recording file. */ private check; /** * Recursively removes a specified property from the recordings. * * @param recordings The target recordings. * @param prop The property to remove. */ private removeProperty; /** * Appends an action to the current recording. * * @param action The action to append. */ addAction(action: Action): void; /** Clears all recordings and resets the position. */ clear(): void; /** Retrieves the count of recordings. */ count(): number; /** * Deletes the recording at the specified position. * * If the position is out of bounds, this method does nothing. * * @param position The position in the recordings list to delete. */ delete(position: number): void; /** * Initializes a new recording step at the specified position. * * @param position The position in the recordings list to initialize. * @param input The input of the recording step. */ initStep(position: number, input: string): void; /** Lists all recordings. */ list(): Recording[]; /** Loads recordings from the file path. */ load(): Promise<void>; /** * Saves the current recordings to the file path. * * @param excluded The properties to exclude from the recordings before saving. */ save(excluded?: string[]): Promise<void>; } declare global { /** The custom window object to interact with the browser page. */ interface Window { /** * When assigning named functions in **page.evaluate** and run the programe with `tsx`, * an error `ReferenceError: __name is not defined` will be thrown. * * **Workaround** * * Define `__name` property in the window object to avoid the error. * * **Reference** * * https://stackoverflow.com/questions/78218772 */ __name: (fn: unknown) => unknown /** Accepts the current action and saves it to the recorder. */ accept: () => void /** Cancels the current action. */ cancel: () => void /** Clears all recorded actions in the recorder. */ clearAll: () => void /** Deletes the specified step on the timeline. */ deleteStep: (index: number) => void /** Starts the dry run process. */ dryRun: () => void /** Emits an action to the observer for processing. */ emit: (action: ObserverAction) => void /** Stop the dry run process. */ stopDryRun: () => void /** Updates the step description recorded in the observer. */ updateInput: (input: string) => void } } /** * Represents an executable action within the application. * * This interface defines the structure for actions that can be performed * using the functions provided in the `actions` module. Each action includes a * name, parameters, and an optional status indicating the success of its last execution. */ interface Action { /** * The action name to be executed. * * This must correspond to one of the functions defined in the actions module. */ name: string /** Parameters to pass to the action during execution. */ params: Partial<ActionParams> /** * Indicates whether the action succeeded during the most recent execution. * * This field is optional and defaults to `undefined` if the action has not been executed. */ success?: boolean } /** * Parameters for the actions. * * This interface defines the inputs required to perform various actions * within the application. Each property corresponds to a specific type of interaction * or behavior. */ interface ActionParams { /** * Specifies the direction to scroll the page. * * Possible values: * - `up`: Scrolls the page upwards by the height of the window (`window.innerHeight`). * - `down`: Scrolls the page downwards by the height of the window (`window.innerHeight`). * - `top`: Scrolls the page to the topmost position. * - `bottom`: Scrolls the page to the bottommost position. */ direction: 'up' | 'down' | 'top' | 'bottom' /** Specifies the duration to wait before performing the action, in milliseconds. */ duration: number /** * The frame number to switch to, used for interacting with specific frames on a page. * * Frames are typically indexed starting from 0. */ frameNumber: number /** The source of the frame in which to perform the action. */ frameSrc: string /** * The keys to press during the action. * * This should be a string representation of the keys, such as `"Enter"`, `"Ctrl+C"`, or `"ArrowUp"`. */ keys: string /** The option to select from a dropdown menu. */ option: string /** * The page number to switch to, used when multiple pages are managed simultaneously. * * Pages are typically indexed starting from 0. */ pageNumber: number /** * A regular expression pattern to verify if the current URL matches a specific format. * * @example `^https://example.com/.*$` */ pattern: string /** The text to input into an element. */ text: string /** * The URL to navigate to. * * This should be a valid and complete URL, including the protocol (e.g., `https://`). */ url: string /** The XPath location of the element to interact with. */ xpath: string } /** * Location information for an element on the page. * * This interface provides the necessary properties to identify and describe an element for interaction on the page. */ interface ElementLocation { /** * The source URL or identifier of the frame containing the element. * * This is an optional field used when the element resides inside an iframe or frame. */ frameSrc?: string /** The full HTML content of the element, including tags, attributes, and text. */ html: string /** The XPath location of the element to interact with. */ xpath: string } /** Configuration options for the Observer class. */ interface ObserverOptions { /** * The delay in milliseconds to wait before executing each action during a dry run. * * This introduces a pause between consecutive actions to wait for loading and rendering. * * @default 250 */ delay?: number /** * The file path where the recordings will be saved. * * This specifies the location for storing recorded actions. The path should end with `.json`. * * Example: `/path/to/custom/recordings.json` * * @default '.playword/recordings.json' */ recordPath?: string } /** Configuration options for the PlayWord class. */ interface PlayWordOptions { /** * Configuration options for the AI instance. * * These options allow customization of the API client, such as specifying * an API key or custom endpoint. * * @example * **Initialize with Google and change the default model** * ```ts * const playword = new PlayWord(context, { * aiOptions: { * googleApiKey: '<your-google-api-key>', * model: 'gemini-2.0-flash' * } * }) * ``` * * **Initialize with Anthropic and Voyage AI** * ```ts * const playword = new PlayWord(context, { * aiOptions: { * anthropicApiKey: 'sk-...', * voyageAIApiKey: 'pa-...' * } * }) * ``` * * **Use a custom OpenAI endpoint** * ```ts * const playword = new PlayWord(context, { * aiOptions: { * baseURL: 'https://api.my-openai-clone.com/v1', * openAIApiKey: '<your-api-key>' * } * }) * ``` * * @default {} */ aiOptions?: AIOptions /** * Whether to enable debug mode. * * @default false */ debug?: boolean /** * The delay in milliseconds to wait before executing each action during the playback. * * This introduces a pause between actions to wait for loading and rendering. * * @default 250 */ delay?: number /** * Whether to record actions performed and where to save the recordings. * * - `true`: Records actions and saves them to `.playword/recordings.json` by default. * - `string`: Specifies a custom file path for saving the recordings. The path must end with `.json`. * - `false`: Disables action recording. * * @example * **Record actions and save to the default path** * ```ts * const playword = new PlayWord(context, { record: true }) * ``` * * **Save recordings to a custom path** * ```ts * const playword = new PlayWord(context, { * record: 'path/to/recordings.json' * }) * ``` * * **Disable recordings** * ```ts * const playword = new PlayWord(context, { record: false }) * ``` * * @default false */ record?: boolean | string } /** * Interface for interacting with PlayWord, providing functions to control the browser, * perform actions, and interact with the OpenAI API using natural language. */ interface PlayWordInterface { /** AI instance to interact with the OpenAI API. */ ai: AI /** Playwright `Context` instance used to control the browser. */ context: BrowserContext /** * The delay in milliseconds to wait before executing each action during the playback. * * This introduces a pause between actions to wait for loading and rendering. * * @default 250 */ delay: number /** * The frame within the page, if the current context is inside a frame. * * This property represents the current frame being operated on within the page. It * allows for frame-specific actions when the context is nested inside an iframe. * * - **PlayWord Observer**: The current frame will be recorded and saved to the record file. * - **PlayWord**: You can switch frames dynamically using the `say` method. * * If no frame is set, the value will be `undefined`. * * @example * **Switch to the specified frame** * ```ts * // Switch to the frame with the name "frame-name" * await playword.say('Switch to the frame "frame-name"') * // Switch to the frame with the source "https://www.example.com" * await playword.say('Switch to the frame "https://www.example.com"') * ``` */ frame?: Frame /** * The most recent input from the user. * * This stores the last natural language command provided to the `say` method. */ input: string /** * The Playwright `Page` instance used to perform actions. * * This property represents the current page being operated on. When a new page is * opened, the context will automatically switch to the new page. Additionally, * manual page switching can be performed using the `say` method. * * @example * **Switch to the specified page** * ```ts * // Switch to the first page * await playword.say('Switch to the first page') * // Switch to the second page * await playword.say('Switch to the second page') * ``` */ page?: Page /** * The recorder instance used to save the actions performed. * * If recording is not enabled or initialized, the value will be `undefined`. */ recorder?: Recorder /** * Step count to keep track of the actions performed. * This is used to locate the recording in the record file. */ stepCount: number /** * Executes actions on the page using natural language input. * * Converts the provided input into corresponding actions and performs them * on the browser page. * * @param message Natural language input to specify the action. * * @example * **Navigate to a webpage** * ```ts * const playword = new PlayWord(context) * await playword.say('Navigate to https://www.google.com') * ``` * * **Click a link** * ```ts * await playword.say('Click the "Gmail" link') * ``` * * **Check for page content** * ```ts * const result = await playword.say('Check if the page contains "Sign in"') * // Output: true * ``` */ say(message: string): Promise<ActionResult> } /** * Represents a recording of actions performed during user interactions. * * This interface includes the input message and the actions performed in one step. */ interface Recording { /** Input message to map actions performed. */ input: string /** Actions performed in one step. */ actions: Action[] } /** * Represents the current state of the observer. * * The `ObserverState` interface tracks various states during the execution of the observer. */ interface ObserverState { /** Indicates whether the observer is currently performing a dry run. */ dryRunning?: boolean /** * Indicates whether the Observer is waiting for AI to generate * a step description or adjust the current action. */ waitingForAI?: boolean /** * Indicates whether the Observer is waiting for user input * to accept, modify, or drop the action. */ waitingForUserAction?: boolean } /** * Interface that extends EmbeddingsParams and defines additional * parameters specific to the VoyageEmbeddings class. */ interface VoyageEmbeddingsParams extends EmbeddingsParams { /** The Voyage AI API key. */ apiKey?: string /** * The maximum number of documents to embed in a single request. * * This is limited by the Voyage AI API to a maximum of 8. * * @default 8 */ batchSize?: number /** * The endpoint URL for the Voyage AI API. * * @default 'https://api.voyageai.com/v1/embeddings' */ endpoint?: string /** * Input type for the embeddings request. Can be "query", or "document". * * @default undefined */ inputType?: 'query' | 'document' /** * The embeddings model to use. * * @default 'voyage-3' */ model?: string /** * The desired dimension of the output embeddings. * * @default undefined */ outputDimension?: number /** * The data type of the output embeddings. Can be "float" or "int8". * * @default 'float' */ outputDtype?: 'float' | 'int8' /** * Whether to truncate the input texts to the maximum length allowed by the model. * * @default true */ truncation?: boolean } /** * The response from an action performed on the page. * * - For assertion actions, the result is a boolean value indicating success (`true`) or failure (`false`). * - For non-assertion actions, the result is a string message describing the outcome. */ type ActionResult = boolean | string /** Configuration for the AI class. */ type AIOptions = GoogleOptions | OpenAIOptions | AnthropicOptions | VoyageOptions /** Anthropic configuration options. */ type AnthropicOptions = AnthropicInput & ClientOptions /** Google configuration options. */ type GoogleOptions = GoogleGenerativeAIChatInput & { /** The API key for the Google API. */ googleApiKey?: string } /** OpenAI configuration options. */ type OpenAIOptions = ChatOpenAIFields & ClientOptions /** Voyage AI configuration options. */ type VoyageOptions = VoyageEmbeddingsParams & { /** The Voyage AI API key. */ voyageAIApiKey?: string } /** * Represents an action observed during user interactions. * * This type includes various action types and their associated parameters. */ type ObserverAction = | { /** The name of the action. */ name: 'click' /** * The parameters for the `click` action. * * Includes the location of the element to be clicked. */ params: ElementLocation } | { /** The name of the action. */ name: 'hover' /** * The parameters for the `hover` action. * * Includes the location of the element and the duration of the hover action. */ params: ElementLocation & { duration: number } } | { /** The name of the action. */ name: 'input' /** * The parameters for the `input` action. * * Includes the location of the element and the text to input. */ params: ElementLocation & { text: string } } | { /** The name of the action. */ name: 'select' /** * The parameters for the `select` action. * * Includes the location of the dropdown element and the option to select. */ params: ElementLocation & { option: string } } | { /** The name of the action. */ name: 'goto' /** * The parameters for the `goto` action. * * Includes the URL to navigate to. */ params: { url: string } } /** * PlayWord Observer enables tracking and recording user interactions on a webpage. * * By leveraging AI, the Observer converts user behaviors into precise and reliable * test cases, making it an essential tool for automated testing of web applications. * * **Features:** * - Observes user interactions and translates them into executable test cases. * - Mounts a user interface on all opened pages using Playwright's `addInitScript` method, * allowing seamless interaction with the Observer. * - Integrates with PlayWord for executing the recorded test cases. * * **Usage:** * This class works in conjunction with the PlayWord framework. The recorded test cases * can be directly executed via the provided PlayWord instance. * * @param playword The PlayWord instance used to control the browser and leverage AI. * @param observerOptions Optional configuration for the Observer. See {@link ObserverOptions} for details. * * @example * **Initialize and Use the Observer** * ```ts * // Initialize a PlayWord instance * const playword = new PlayWord(context) * * // Create an Observer instance and start observing * const observer = new Observer(playword, { delay: 500 }) * await observer.observe() * ``` */ declare class Observer { private playword; /** * Represents the current page action being performed. * See {@link Action} for details. */ private action; /** * The delay in milliseconds to wait before executing each action during a dry run. * * This introduces a pause between consecutive actions, allowing time for * the page to load and render properly. * * @default 250 */ private delay; /** The step description for the current action. */ private input; /** The recorder instance used to save the actions performed. */ private recorder; /** * The current state of the Observer. * See {@link ObserverState} for details. */ state: ObserverState; constructor(playword: PlayWordInterface, { delay, recordPath }?: ObserverOptions); /** Retrieves the AI instance from the PlayWord instance. */ private ai; /** Retrieves the current context from the PlayWord instance. */ private context; /** Retrieves the current page from the PlayWord instance. */ private page; /** Set up the Observer scripts and listeners on the page. */ private setPageListeners; /** Starts observing the user interactions on the page. */ observe(): Promise<void>; } /** * PlayWord enables users to automate browsers with AI. * * This class simplifies browser automation by removing the need to locate elements * manually using selectors. Instead, you can describe your desired actions in natural * language, and PlayWord will interpret and execute them. * * **Repository**: [GitHub - PlayWord](https://github.com/Foreverskyin0216/playword) * * @param context The Playwright `Context` instance used to control the browser. * @param playwordOptions Optional configuration for PlayWord. See {@link PlayWordOptions} for details. * * @example * **Create a new PlayWord instance** * ```ts * const context = await browser.newContext() * const playword = new PlayWord(context, { * aiOptions: { * openAIApiKey: '<your-api-key>' * }, * debug: true, * delay: 500, * record: 'spec/test-login.json' * }) * ``` */ declare class PlayWord implements PlayWordInterface { context: BrowserContext; /** Use the thread ID to keep track of the conversation for LangGraph. */ private threadId; /** AI instance to interact with the OpenAI API. */ ai: AI; /** * The delay in milliseconds to wait before executing each action during the playback. * * This introduces a pause between actions to wait for loading and rendering. * * @default 250 */ delay: number; /** * The frame within the page, if the current context is inside a frame. * * This property represents the current frame being operated on within the page. It * allows for frame-specific actions when the context is nested inside an iframe. * * - **PlayWord Observer**: The current frame will be recorded and saved to the record file. * - **PlayWord**: You can switch frames dynamically using the `say` method. * * If no frame is set, the value will be `undefined`. * * @example * **Switch to the specified frame** * ```ts * // Switch to the frame with the name "frame-name" * await playword.say('Switch to the frame "frame-name"') * // Switch to the frame with the source "https://www.example.com" * await playword.say('Switch to the frame "https://www.example.com"') * ``` */ frame?: Frame; /** * The most recent input from the user. * * This stores the last natural language command provided to the `say` method. */ input: string; /** * The Playwright `Page` instance used to perform actions. * * This property represents the current page being operated on. When a new page is * opened, the context will automatically switch to the new page. Additionally, * manual page switching can be performed using the `say` method. * * @example * **Switch to the specified page** * ```ts * // Switch to the first page * await playword.say('Switch to the first page') * // Switch to the second page * await playword.say('Switch to the second page') * ``` */ page?: Page; /** * The recorder instance used to save the actions performed. * * If recording is not enabled or initialized, the value will be `undefined`. */ recorder?: Recorder; /** * Step count to keep track of the actions performed. * This is used to locate the recording in the record file. */ stepCount: number; constructor(context: BrowserContext, { debug, delay, aiOptions, record }?: PlayWordOptions); /** * The decorator to handle the test fixture, including the setup process and teardown process. * * **Setup:** * - If the page is not initialized, create a new page. * - If recording is enabled, load the recordings from the record file. * - If the input starts with the AI pattern, replace the AI pattern with an empty string for the input. * * **Teardown:** * - Increment the step count to locate the recording in the record file. */ private static fixture; /** Invoke the say method graph to perform actions. */ private useSayGraph; /** * Use recordings to perform actions. If the action fails, retry with AI. * * @param recording The recording to perform actions. See {@link Recording} for details. */ private useRecording; /** * Executes actions on the page using natural language input. * * Converts the provided input into corresponding actions and performs them * on the browser page. * * @param message Natural language input to specify the action. * * @example * **Navigate to a webpage** * ```ts * const playword = new PlayWord(context) * await playword.say('Navigate to https://www.google.com') * ``` * * **Click a link** * ```ts * await playword.say('Click the "Gmail" link') * ``` * * **Check for page content** * ```ts * const result = await playword.say('Check if the page contains "Sign in"') * // Output: true * ``` */ say(message: string): Promise<ActionResult>; } export { Observer, PlayWord };