@playword/core
Version:
Supercharge your web test automation experience with AI
921 lines (849 loc) • 27.9 kB
TypeScript
import { AnthropicInput } from '@langchain/anthropic';
import { EmbeddingsParams } from '@langchain/core/embeddings';
import { GoogleGenerativeAIChatInput } from '@langchain/google-genai';
import { ChatOpenAIFields, ClientOptions } from '@langchain/openai';
import { BrowserContext, Frame, Page } from 'playwright-core';
import * as _langchain_core_documents from '@langchain/core/documents';
import { Document } from '@langchain/core/documents';
import * as _langchain_core_messages from '@langchain/core/messages';
import { AIMessage, HumanMessage, ToolMessage } from '@langchain/core/messages';
import { DynamicStructuredTool } from '@langchain/core/tools';
/**
* AI is a module that uses AI services to execute the PlayWord functions.
* It provides the following functionalities:
* - Use the chat model binding with custom tools to perform actions.
* - Embed documents into a new vector store.
* - Search for the most similar documents from the vector store.
* - Determine if the assertion passes based on the user's input and the AI response.
* - Get the best candidate from the embedded documents based on the user's input.
* - Summarize the Observer action.
*
* @param opts The options for AI configuration. See {@link AIOptions} for details.
*/
declare class AI {
/** The chat model to use for the general tasks. */
private llm;
/** The vector store to store the embedded documents. */
private store;
constructor(opts?: AIOptions);
/**
* Use custom tools to perform actions.
*
* @param tools The custom tools to use.
* @param messages The messages to send to the AI.
*/
useTools(tools: DynamicStructuredTool[], messages: (AIMessage | HumanMessage | ToolMessage)[]): Promise<_langchain_core_messages.AIMessageChunk>;
/**
* Embed texts into a new vector store.
*
* @param texts The texts to embed.
*/
embedTexts(texts: string[]): Promise<void>;
/**
* Get the most similar documents from the vector store.
*
* @param query The query to search for the most similar documents.
* @param topN The number of top results to return.
*/
searchDocuments(query: string, topN?: number): Promise<_langchain_core_documents.DocumentInterface<Record<string, any>>[]>;
/**
* Determine if the assertion passes based on the messages.
*
* @param messages The messages stored the user's input and the AI response.
*/
parseResult(messages: (AIMessage | HumanMessage | ToolMessage)[]): Promise<boolean>;
/**
* Get the best candidate from the candidate documents.
*
* @param input The user's input to find the best candidate.
* @param docs The candidate documents.
*/
getBestCandidate(input: string, docs: Document[]): Promise<number>;
/**
* Summarize the Observer action
*
* @param action The action to summarize.
*/
summarizeAction(action: string): Promise<string>;
}
/**
* The Recorder class manages the recordings created during interactions with
* PlayWord and PlayWord Observer. It provides functionality to store, retrieve,
* and manipulate recordings, allowing users to save test cases for replay or review.
*/
declare class Recorder {
private recordPath;
/**
* The current position in the list of recordings.
*
* This indicates the index of the recording currently being operated on.
*/
private position;
/**
* The list of recordings being managed.
*
* Each recording contains a set of actions and metadata. See {@link Recording} for the structure.
*/
private recordings;
constructor(recordPath?: string);
/**
* Checks if the recording file exists and verifies that it is a JSON file.
*
* @param path The path to the recording file.
*/
private check;
/**
* Recursively removes a specified property from the recordings.
*
* @param recordings The target recordings.
* @param prop The property to remove.
*/
private removeProperty;
/**
* Appends an action to the current recording.
*
* @param action The action to append.
*/
addAction(action: Action): void;
/** Clears all recordings and resets the position. */
clear(): void;
/** Retrieves the count of recordings. */
count(): number;
/**
* Deletes the recording at the specified position.
*
* If the position is out of bounds, this method does nothing.
*
* @param position The position in the recordings list to delete.
*/
delete(position: number): void;
/**
* Initializes a new recording step at the specified position.
*
* @param position The position in the recordings list to initialize.
* @param input The input of the recording step.
*/
initStep(position: number, input: string): void;
/** Lists all recordings. */
list(): Recording[];
/** Loads recordings from the file path. */
load(): Promise<void>;
/**
* Saves the current recordings to the file path.
*
* @param excluded The properties to exclude from the recordings before saving.
*/
save(excluded?: string[]): Promise<void>;
}
declare global {
/** The custom window object to interact with the browser page. */
interface Window {
/**
* When assigning named functions in **page.evaluate** and run the programe with `tsx`,
* an error `ReferenceError: __name is not defined` will be thrown.
*
* **Workaround**
*
* Define `__name` property in the window object to avoid the error.
*
* **Reference**
*
* https://stackoverflow.com/questions/78218772
*/
__name: (fn: unknown) => unknown
/** Accepts the current action and saves it to the recorder. */
accept: () => void
/** Cancels the current action. */
cancel: () => void
/** Clears all recorded actions in the recorder. */
clearAll: () => void
/** Deletes the specified step on the timeline. */
deleteStep: (index: number) => void
/** Starts the dry run process. */
dryRun: () => void
/** Emits an action to the observer for processing. */
emit: (action: ObserverAction) => void
/** Stop the dry run process. */
stopDryRun: () => void
/** Updates the step description recorded in the observer. */
updateInput: (input: string) => void
}
}
/**
* Represents an executable action within the application.
*
* This interface defines the structure for actions that can be performed
* using the functions provided in the `actions` module. Each action includes a
* name, parameters, and an optional status indicating the success of its last execution.
*/
interface Action {
/**
* The action name to be executed.
*
* This must correspond to one of the functions defined in the actions module.
*/
name: string
/** Parameters to pass to the action during execution. */
params: Partial<ActionParams>
/**
* Indicates whether the action succeeded during the most recent execution.
*
* This field is optional and defaults to `undefined` if the action has not been executed.
*/
success?: boolean
}
/**
* Parameters for the actions.
*
* This interface defines the inputs required to perform various actions
* within the application. Each property corresponds to a specific type of interaction
* or behavior.
*/
interface ActionParams {
/**
* Specifies the direction to scroll the page.
*
* Possible values:
* - `up`: Scrolls the page upwards by the height of the window (`window.innerHeight`).
* - `down`: Scrolls the page downwards by the height of the window (`window.innerHeight`).
* - `top`: Scrolls the page to the topmost position.
* - `bottom`: Scrolls the page to the bottommost position.
*/
direction: 'up' | 'down' | 'top' | 'bottom'
/** Specifies the duration to wait before performing the action, in milliseconds. */
duration: number
/**
* The frame number to switch to, used for interacting with specific frames on a page.
*
* Frames are typically indexed starting from 0.
*/
frameNumber: number
/** The source of the frame in which to perform the action. */
frameSrc: string
/**
* The keys to press during the action.
*
* This should be a string representation of the keys, such as `"Enter"`, `"Ctrl+C"`, or `"ArrowUp"`.
*/
keys: string
/** The option to select from a dropdown menu. */
option: string
/**
* The page number to switch to, used when multiple pages are managed simultaneously.
*
* Pages are typically indexed starting from 0.
*/
pageNumber: number
/**
* A regular expression pattern to verify if the current URL matches a specific format.
*
* @example `^https://example.com/.*$`
*/
pattern: string
/** The text to input into an element. */
text: string
/**
* The URL to navigate to.
*
* This should be a valid and complete URL, including the protocol (e.g., `https://`).
*/
url: string
/** The XPath location of the element to interact with. */
xpath: string
}
/**
* Location information for an element on the page.
*
* This interface provides the necessary properties to identify and describe an element for interaction on the page.
*/
interface ElementLocation {
/**
* The source URL or identifier of the frame containing the element.
*
* This is an optional field used when the element resides inside an iframe or frame.
*/
frameSrc?: string
/** The full HTML content of the element, including tags, attributes, and text. */
html: string
/** The XPath location of the element to interact with. */
xpath: string
}
/** Configuration options for the Observer class. */
interface ObserverOptions {
/**
* The delay in milliseconds to wait before executing each action during a dry run.
*
* This introduces a pause between consecutive actions to wait for loading and rendering.
*
* @default 250
*/
delay?: number
/**
* The file path where the recordings will be saved.
*
* This specifies the location for storing recorded actions. The path should end with `.json`.
*
* Example: `/path/to/custom/recordings.json`
*
* @default '.playword/recordings.json'
*/
recordPath?: string
}
/** Configuration options for the PlayWord class. */
interface PlayWordOptions {
/**
* Configuration options for the AI instance.
*
* These options allow customization of the API client, such as specifying
* an API key or custom endpoint.
*
* @example
* **Initialize with Google and change the default model**
* ```ts
* const playword = new PlayWord(context, {
* aiOptions: {
* googleApiKey: '<your-google-api-key>',
* model: 'gemini-2.0-flash'
* }
* })
* ```
*
* **Initialize with Anthropic and Voyage AI**
* ```ts
* const playword = new PlayWord(context, {
* aiOptions: {
* anthropicApiKey: 'sk-...',
* voyageAIApiKey: 'pa-...'
* }
* })
* ```
*
* **Use a custom OpenAI endpoint**
* ```ts
* const playword = new PlayWord(context, {
* aiOptions: {
* baseURL: 'https://api.my-openai-clone.com/v1',
* openAIApiKey: '<your-api-key>'
* }
* })
* ```
*
* @default {}
*/
aiOptions?: AIOptions
/**
* Whether to enable debug mode.
*
* @default false
*/
debug?: boolean
/**
* The delay in milliseconds to wait before executing each action during the playback.
*
* This introduces a pause between actions to wait for loading and rendering.
*
* @default 250
*/
delay?: number
/**
* Whether to record actions performed and where to save the recordings.
*
* - `true`: Records actions and saves them to `.playword/recordings.json` by default.
* - `string`: Specifies a custom file path for saving the recordings. The path must end with `.json`.
* - `false`: Disables action recording.
*
* @example
* **Record actions and save to the default path**
* ```ts
* const playword = new PlayWord(context, { record: true })
* ```
*
* **Save recordings to a custom path**
* ```ts
* const playword = new PlayWord(context, {
* record: 'path/to/recordings.json'
* })
* ```
*
* **Disable recordings**
* ```ts
* const playword = new PlayWord(context, { record: false })
* ```
*
* @default false
*/
record?: boolean | string
}
/**
* Interface for interacting with PlayWord, providing functions to control the browser,
* perform actions, and interact with the OpenAI API using natural language.
*/
interface PlayWordInterface {
/** AI instance to interact with the OpenAI API. */
ai: AI
/** Playwright `Context` instance used to control the browser. */
context: BrowserContext
/**
* The delay in milliseconds to wait before executing each action during the playback.
*
* This introduces a pause between actions to wait for loading and rendering.
*
* @default 250
*/
delay: number
/**
* The frame within the page, if the current context is inside a frame.
*
* This property represents the current frame being operated on within the page. It
* allows for frame-specific actions when the context is nested inside an iframe.
*
* - **PlayWord Observer**: The current frame will be recorded and saved to the record file.
* - **PlayWord**: You can switch frames dynamically using the `say` method.
*
* If no frame is set, the value will be `undefined`.
*
* @example
* **Switch to the specified frame**
* ```ts
* // Switch to the frame with the name "frame-name"
* await playword.say('Switch to the frame "frame-name"')
* // Switch to the frame with the source "https://www.example.com"
* await playword.say('Switch to the frame "https://www.example.com"')
* ```
*/
frame?: Frame
/**
* The most recent input from the user.
*
* This stores the last natural language command provided to the `say` method.
*/
input: string
/**
* The Playwright `Page` instance used to perform actions.
*
* This property represents the current page being operated on. When a new page is
* opened, the context will automatically switch to the new page. Additionally,
* manual page switching can be performed using the `say` method.
*
* @example
* **Switch to the specified page**
* ```ts
* // Switch to the first page
* await playword.say('Switch to the first page')
* // Switch to the second page
* await playword.say('Switch to the second page')
* ```
*/
page?: Page
/**
* The recorder instance used to save the actions performed.
*
* If recording is not enabled or initialized, the value will be `undefined`.
*/
recorder?: Recorder
/**
* Step count to keep track of the actions performed.
* This is used to locate the recording in the record file.
*/
stepCount: number
/**
* Executes actions on the page using natural language input.
*
* Converts the provided input into corresponding actions and performs them
* on the browser page.
*
* @param message Natural language input to specify the action.
*
* @example
* **Navigate to a webpage**
* ```ts
* const playword = new PlayWord(context)
* await playword.say('Navigate to https://www.google.com')
* ```
*
* **Click a link**
* ```ts
* await playword.say('Click the "Gmail" link')
* ```
*
* **Check for page content**
* ```ts
* const result = await playword.say('Check if the page contains "Sign in"')
* // Output: true
* ```
*/
say(message: string): Promise<ActionResult>
}
/**
* Represents a recording of actions performed during user interactions.
*
* This interface includes the input message and the actions performed in one step.
*/
interface Recording {
/** Input message to map actions performed. */
input: string
/** Actions performed in one step. */
actions: Action[]
}
/**
* Represents the current state of the observer.
*
* The `ObserverState` interface tracks various states during the execution of the observer.
*/
interface ObserverState {
/** Indicates whether the observer is currently performing a dry run. */
dryRunning?: boolean
/**
* Indicates whether the Observer is waiting for AI to generate
* a step description or adjust the current action.
*/
waitingForAI?: boolean
/**
* Indicates whether the Observer is waiting for user input
* to accept, modify, or drop the action.
*/
waitingForUserAction?: boolean
}
/**
* Interface that extends EmbeddingsParams and defines additional
* parameters specific to the VoyageEmbeddings class.
*/
interface VoyageEmbeddingsParams extends EmbeddingsParams {
/** The Voyage AI API key. */
apiKey?: string
/**
* The maximum number of documents to embed in a single request.
*
* This is limited by the Voyage AI API to a maximum of 8.
*
* @default 8
*/
batchSize?: number
/**
* The endpoint URL for the Voyage AI API.
*
* @default 'https://api.voyageai.com/v1/embeddings'
*/
endpoint?: string
/**
* Input type for the embeddings request. Can be "query", or "document".
*
* @default undefined
*/
inputType?: 'query' | 'document'
/**
* The embeddings model to use.
*
* @default 'voyage-3'
*/
model?: string
/**
* The desired dimension of the output embeddings.
*
* @default undefined
*/
outputDimension?: number
/**
* The data type of the output embeddings. Can be "float" or "int8".
*
* @default 'float'
*/
outputDtype?: 'float' | 'int8'
/**
* Whether to truncate the input texts to the maximum length allowed by the model.
*
* @default true
*/
truncation?: boolean
}
/**
* The response from an action performed on the page.
*
* - For assertion actions, the result is a boolean value indicating success (`true`) or failure (`false`).
* - For non-assertion actions, the result is a string message describing the outcome.
*/
type ActionResult = boolean | string
/** Configuration for the AI class. */
type AIOptions = GoogleOptions | OpenAIOptions | AnthropicOptions | VoyageOptions
/** Anthropic configuration options. */
type AnthropicOptions = AnthropicInput & ClientOptions
/** Google configuration options. */
type GoogleOptions = GoogleGenerativeAIChatInput & {
/** The API key for the Google API. */
googleApiKey?: string
}
/** OpenAI configuration options. */
type OpenAIOptions = ChatOpenAIFields & ClientOptions
/** Voyage AI configuration options. */
type VoyageOptions = VoyageEmbeddingsParams & {
/** The Voyage AI API key. */
voyageAIApiKey?: string
}
/**
* Represents an action observed during user interactions.
*
* This type includes various action types and their associated parameters.
*/
type ObserverAction =
| {
/** The name of the action. */
name: 'click'
/**
* The parameters for the `click` action.
*
* Includes the location of the element to be clicked.
*/
params: ElementLocation
}
| {
/** The name of the action. */
name: 'hover'
/**
* The parameters for the `hover` action.
*
* Includes the location of the element and the duration of the hover action.
*/
params: ElementLocation & { duration: number }
}
| {
/** The name of the action. */
name: 'input'
/**
* The parameters for the `input` action.
*
* Includes the location of the element and the text to input.
*/
params: ElementLocation & { text: string }
}
| {
/** The name of the action. */
name: 'select'
/**
* The parameters for the `select` action.
*
* Includes the location of the dropdown element and the option to select.
*/
params: ElementLocation & { option: string }
}
| {
/** The name of the action. */
name: 'goto'
/**
* The parameters for the `goto` action.
*
* Includes the URL to navigate to.
*/
params: { url: string }
}
/**
* PlayWord Observer enables tracking and recording user interactions on a webpage.
*
* By leveraging AI, the Observer converts user behaviors into precise and reliable
* test cases, making it an essential tool for automated testing of web applications.
*
* **Features:**
* - Observes user interactions and translates them into executable test cases.
* - Mounts a user interface on all opened pages using Playwright's `addInitScript` method,
* allowing seamless interaction with the Observer.
* - Integrates with PlayWord for executing the recorded test cases.
*
* **Usage:**
* This class works in conjunction with the PlayWord framework. The recorded test cases
* can be directly executed via the provided PlayWord instance.
*
* @param playword The PlayWord instance used to control the browser and leverage AI.
* @param observerOptions Optional configuration for the Observer. See {@link ObserverOptions} for details.
*
* @example
* **Initialize and Use the Observer**
* ```ts
* // Initialize a PlayWord instance
* const playword = new PlayWord(context)
*
* // Create an Observer instance and start observing
* const observer = new Observer(playword, { delay: 500 })
* await observer.observe()
* ```
*/
declare class Observer {
private playword;
/**
* Represents the current page action being performed.
* See {@link Action} for details.
*/
private action;
/**
* The delay in milliseconds to wait before executing each action during a dry run.
*
* This introduces a pause between consecutive actions, allowing time for
* the page to load and render properly.
*
* @default 250
*/
private delay;
/** The step description for the current action. */
private input;
/** The recorder instance used to save the actions performed. */
private recorder;
/**
* The current state of the Observer.
* See {@link ObserverState} for details.
*/
state: ObserverState;
constructor(playword: PlayWordInterface, { delay, recordPath }?: ObserverOptions);
/** Retrieves the AI instance from the PlayWord instance. */
private ai;
/** Retrieves the current context from the PlayWord instance. */
private context;
/** Retrieves the current page from the PlayWord instance. */
private page;
/** Set up the Observer scripts and listeners on the page. */
private setPageListeners;
/** Starts observing the user interactions on the page. */
observe(): Promise<void>;
}
/**
* PlayWord enables users to automate browsers with AI.
*
* This class simplifies browser automation by removing the need to locate elements
* manually using selectors. Instead, you can describe your desired actions in natural
* language, and PlayWord will interpret and execute them.
*
* **Repository**: [GitHub - PlayWord](https://github.com/Foreverskyin0216/playword)
*
* @param context The Playwright `Context` instance used to control the browser.
* @param playwordOptions Optional configuration for PlayWord. See {@link PlayWordOptions} for details.
*
* @example
* **Create a new PlayWord instance**
* ```ts
* const context = await browser.newContext()
* const playword = new PlayWord(context, {
* aiOptions: {
* openAIApiKey: '<your-api-key>'
* },
* debug: true,
* delay: 500,
* record: 'spec/test-login.json'
* })
* ```
*/
declare class PlayWord implements PlayWordInterface {
context: BrowserContext;
/** Use the thread ID to keep track of the conversation for LangGraph. */
private threadId;
/** AI instance to interact with the OpenAI API. */
ai: AI;
/**
* The delay in milliseconds to wait before executing each action during the playback.
*
* This introduces a pause between actions to wait for loading and rendering.
*
* @default 250
*/
delay: number;
/**
* The frame within the page, if the current context is inside a frame.
*
* This property represents the current frame being operated on within the page. It
* allows for frame-specific actions when the context is nested inside an iframe.
*
* - **PlayWord Observer**: The current frame will be recorded and saved to the record file.
* - **PlayWord**: You can switch frames dynamically using the `say` method.
*
* If no frame is set, the value will be `undefined`.
*
* @example
* **Switch to the specified frame**
* ```ts
* // Switch to the frame with the name "frame-name"
* await playword.say('Switch to the frame "frame-name"')
* // Switch to the frame with the source "https://www.example.com"
* await playword.say('Switch to the frame "https://www.example.com"')
* ```
*/
frame?: Frame;
/**
* The most recent input from the user.
*
* This stores the last natural language command provided to the `say` method.
*/
input: string;
/**
* The Playwright `Page` instance used to perform actions.
*
* This property represents the current page being operated on. When a new page is
* opened, the context will automatically switch to the new page. Additionally,
* manual page switching can be performed using the `say` method.
*
* @example
* **Switch to the specified page**
* ```ts
* // Switch to the first page
* await playword.say('Switch to the first page')
* // Switch to the second page
* await playword.say('Switch to the second page')
* ```
*/
page?: Page;
/**
* The recorder instance used to save the actions performed.
*
* If recording is not enabled or initialized, the value will be `undefined`.
*/
recorder?: Recorder;
/**
* Step count to keep track of the actions performed.
* This is used to locate the recording in the record file.
*/
stepCount: number;
constructor(context: BrowserContext, { debug, delay, aiOptions, record }?: PlayWordOptions);
/**
* The decorator to handle the test fixture, including the setup process and teardown process.
*
* **Setup:**
* - If the page is not initialized, create a new page.
* - If recording is enabled, load the recordings from the record file.
* - If the input starts with the AI pattern, replace the AI pattern with an empty string for the input.
*
* **Teardown:**
* - Increment the step count to locate the recording in the record file.
*/
private static fixture;
/** Invoke the say method graph to perform actions. */
private useSayGraph;
/**
* Use recordings to perform actions. If the action fails, retry with AI.
*
* @param recording The recording to perform actions. See {@link Recording} for details.
*/
private useRecording;
/**
* Executes actions on the page using natural language input.
*
* Converts the provided input into corresponding actions and performs them
* on the browser page.
*
* @param message Natural language input to specify the action.
*
* @example
* **Navigate to a webpage**
* ```ts
* const playword = new PlayWord(context)
* await playword.say('Navigate to https://www.google.com')
* ```
*
* **Click a link**
* ```ts
* await playword.say('Click the "Gmail" link')
* ```
*
* **Check for page content**
* ```ts
* const result = await playword.say('Check if the page contains "Sign in"')
* // Output: true
* ```
*/
say(message: string): Promise<ActionResult>;
}
export { Observer, PlayWord };