donobu
Version:
Create browser automations with an LLM agent and replay them as Playwright scripts.
266 lines (253 loc) • 15.5 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.AssertTool = exports.AssertToolGptSchema = exports.AssertCoreSchema = void 0;
const v4_1 = require("zod/v4");
const assertCache_1 = require("../lib/ai/cache/assertCache");
const ToolSchema_1 = require("../models/ToolSchema");
const Logger_1 = require("../utils/Logger");
const MiscUtils_1 = require("../utils/MiscUtils");
const PlaywrightUtils_1 = require("../utils/PlaywrightUtils");
const TargetUtils_1 = require("../utils/TargetUtils");
const Tool_1 = require("./Tool");
const DEFAULT_RETRIES = 1;
const DEFAULT_RETRY_WAIT_SECONDS = 3;
exports.AssertCoreSchema = v4_1.z.object({
assertionToTestFor: v4_1.z.string()
.describe(`The assertion that will be tested for, using plain English.
The raw assertion text should avoid embedding values and current state about the webpage
since the assertion is being evaluated by the tool itself, and the tool
will have access to the webpage's latest content, title, URL, etc.
IMPORTANT: The assertion engine CANNOT refer to Donobu annotated elements by annotation numbers, so
DO NOT reference annot ation numbers in the assertion.
Examples of the good assertions. Write assertions like these:
- 'Assert that this webpage includes a product summary.'
- 'Assert there are more than 3 search results.'
- 'Assert that the webpage title is "Hello World".'
- 'Assert that the theme of the website is in dark mode.'
- 'Assert that the first search result is not "strawberry".'
- 'Assert that the sum of Workouts - bicyling, swimming, and running equals Total within a 4-minute tolerance.'
Examples of bad assertions. Do NOT write assertions like these:
- 'Assert that the 3 search results is greater than 2.'
- 'Assert that element with annotation '0' is visible and contains the text 'Email'.'
- 'Assert that the first search result ("banana") is not "strawberry".'
- 'Assert that the sum of Workouts - bicyling (20:39), swimming (17:59), and running (15:46) equals Total (59:00) within a 4-minute tolerance.'
`),
retries: v4_1.z
.number()
.nullable()
.optional()
.describe(`The number of times to retry the assertion if it fails. Defaults to ${DEFAULT_RETRIES}.`),
retryWaitSeconds: v4_1.z
.number()
.nullable()
.optional()
.describe(`The number of seconds to wait between retries. Defaults to ${DEFAULT_RETRY_WAIT_SECONDS} seconds.`),
});
exports.AssertToolGptSchema = v4_1.z.object({
...ToolSchema_1.BaseGptArgsSchema.shape,
...exports.AssertCoreSchema.shape,
});
const AssertionOutcomeSchema = v4_1.z.object({
details: v4_1.z
.string()
.describe('Details of the evaluation itself (i.e. why it passed or failed). Only comment on the evaluation itself, not extraneous details.'),
assertionPassed: v4_1.z
.boolean()
.describe('Set to true IFF the assertion holds, otherwise, set to false if the assertion does not hold.'),
playwrightAssertionSteps: v4_1.z
.array(assertCache_1.PlaywrightAssertionStepSchema)
.nullable()
.catch(null)
.describe(`An array of structured assertion steps that can be compiled into standard Playwright expect() calls.
Each step describes one assertion to execute. Return null if the assertion is too complex or implausible (ex:
checking vibes, tone, relying on screenshots, etc) to express as a combination of these structured steps.
Examples of how assertions map to steps:
- "Organization Settings heading is visible" →
[{ locator: "role", role: "heading", value: "Organization Settings", valueIsRegex: false, assertion: "toBeVisible", attributeValue: null }]
- "Create Template button is visible" →
[{ locator: "role", role: "button", value: "Create Template", valueIsRegex: false, assertion: "toBeVisible", attributeValue: null }]
- "pagination shows Page 2 of N" →
[{ locator: "text", role: null, value: "Page 2 of \\\\d+", valueIsRegex: true, assertion: "toBeVisible", attributeValue: null }]
- "Programs and Templates tabs are visible" →
[{ locator: "role", role: "tab", value: "Programs", valueIsRegex: false, assertion: "toBeVisible", attributeValue: null },
{ locator: "role", role: "tab", value: "Templates", valueIsRegex: false, assertion: "toBeVisible", attributeValue: null }]
- "template X is NOT listed" →
[{ locator: "text", role: null, value: "X", valueIsRegex: false, assertion: "toBeHidden", attributeValue: null }]
- "Next Page is disabled" →
[{ locator: "role", role: "button", value: "Next Page", valueIsRegex: false, assertion: "toBeDisabled", attributeValue: null }]
- "The JSON Configuration radio button is selected" →
[{ locator: "role", role: "radio", value: "JSON Configuration", valueIsRegex: false, assertion: "toBeChecked", attributeValue: null }]
- "The 'Use installed app' toggle is on" →
[{ locator: "label", role: null, value: "Use installed app", valueIsRegex: false, assertion: "toBeChecked", attributeValue: null }]
- "The Device Name field contains 'My Device Name'" →
[{ locator: "label", role: null, value: "Device Name", valueIsRegex: false, assertion: "toHaveValue", attributeValue: "My Device Name" }]
- "The Assert Page tool is selected" →
[{ locator: "role", role: "tab", value: "Assert Page", valueIsRegex: false, assertion: "toHaveAttribute", attributeValue: "true" }]
- "The current URL path is the settings page" →
[{ locator: "text", role: null, value: ".+/settings/general/?$", valueIsRegex: true, assertion: "toHaveURL", attributeValue: null }]
- "The current URL path is for a user" →
[{ locator: "text", role: null, value: ".+/users/[\\d]+/?$", valueIsRegex: true, assertion: "toHaveURL", attributeValue: null }]`),
});
class AssertTool extends Tool_1.Tool {
constructor() {
super(AssertTool.NAME, `This method will test that a given condition (i.e. the 'assertionToTestFor' field) holds true.
It will use a screenshot of the current viewport of the webpage, the webpage's textual content, URL, and title, to evaluate a given assertion.`, exports.AssertCoreSchema, exports.AssertToolGptSchema, true, undefined, ['web']);
}
async call(context, parameters) {
const page = (0, TargetUtils_1.webPage)(context);
const retries = parameters.retries ?? DEFAULT_RETRIES;
const retryWaitSeconds = parameters.retryWaitSeconds ?? DEFAULT_RETRY_WAIT_SECONDS;
let lastResult = null;
for (let attempt = 0; attempt <= retries; ++attempt) {
// If this is a retry attempt, wait before proceeding
if (attempt > 0) {
Logger_1.appLogger.info(`Retry ${attempt} of ${retries} for ${this.name}`);
await page.waitForTimeout(retryWaitSeconds * 1000);
}
const screenshot = await PlaywrightUtils_1.PlaywrightUtils.takeViewportScreenshot(page);
const webpageTextTimeoutMs = 5000;
const webpageRawText = await page
.locator('body')
.innerText({ timeout: webpageTextTimeoutMs })
.catch((error) => {
const msg = `[${MiscUtils_1.MiscUtils.errName(error)}: Failed to extract page text within ${webpageTextTimeoutMs}ms. The page may still be loading or the main thread may be blocked.]`;
Logger_1.appLogger.warn(msg);
return msg;
});
const webpageTitle = await page.title().catch((error) => {
const msg = `[${MiscUtils_1.MiscUtils.errName(error)}: Failed to retrieve page title]`;
Logger_1.appLogger.warn(msg);
return msg;
});
const rawAssertion = typeof context.rawParameters?.assertionToTestFor === 'string'
? context.rawParameters.assertionToTestFor
: parameters.assertionToTestFor;
const envEntries = Object.entries(context.envData ?? {});
// Only treat env vars as "in play" when the raw assertion actually
// references one — keeps the prompt small for the common case.
const referencedEnvEntries = envEntries.filter(([name]) => rawAssertion.includes(`{{$.env.${name}}}`));
const hasEnvRefs = referencedEnvEntries.length > 0;
const envBlock = hasEnvRefs
? `
The user's original assertion contains environment variable references using the
syntax \`{{$.env.NAME}}\`. To keep cached Playwright steps valid across runs with
different env values, you MUST emit those same placeholders in any
playwrightAssertionStep \`value\`/\`attributeValue\` field whose contents come from
an env var. Do NOT bake the literal current value into the step.
Original (uninterpolated) assertion: ${rawAssertion}
Current env mapping (use these to identify which substrings on the page came
from which env var, then emit the placeholder rather than the literal):
${referencedEnvEntries.map(([name, value]) => ` - {{$.env.${name}}} = ${JSON.stringify(value)}`).join('\n')}
Examples:
- Raw assertion "Welcome banner says hello {{$.env.USERNAME}}", USERNAME="alice", page shows "Welcome alice" →
[{ locator: "text", role: null, value: "{{$.env.USERNAME}}", valueIsRegex: false, assertion: "toBeVisible", attributeValue: null }]
- Raw assertion "The username field shows {{$.env.USERNAME}}", USERNAME="alice", page input value is "alice" →
[{ locator: "label", role: null, value: "Username", valueIsRegex: false, assertion: "toHaveValue", attributeValue: "{{$.env.USERNAME}}" }]
- For literal page text unrelated to env vars, keep the literal value as usual.`
: '';
const promptMessages = [
{
type: 'system',
text: `You are a tool that will evaluate whether a given assertion holds true or not about a given webpage.
You will have access to a screenshot of the webpage, its textual content (not its HTML), URL, title, etc.
You should only use the given information to make your assertion.
Do not use any other information or context.
Additionally, produce structured assertion steps (playwrightAssertionSteps) that describe how to
verify the same condition using standard Playwright expect APIs. These steps will be compiled
deterministically and cached for fast reruns. If the assertion is too complex to express as
structured steps, set playwrightAssertionSteps to null.
CRITICAL RULES for generating structured steps — follow these precisely:
- Radio buttons, checkboxes, toggle switches: use 'toBeChecked', NOT 'toBeVisible'. Use locator='label' with the field's label text, or locator='role' with role='radio'/'checkbox'.
- Text input / textarea content: use 'toHaveValue' with locator='label' and set attributeValue to the expected text. Do NOT use 'toBeVisible' on the textbox.
- Selected tabs, pills, or items with aria-selected: use 'toHaveAttribute' with value='aria-selected' and attributeValue='true', NOT 'toBeVisible' on the text.
- Text content within an element: use 'toContainText' with attributeValue set to the substring, NOT 'toBeVisible'.
- Only use 'toBeVisible' when the assertion is genuinely about whether something is visible — not as a fallback for state or value checks.${envBlock}`,
},
{
type: 'user',
items: [
{ type: 'jpeg', bytes: screenshot },
{
type: 'text',
text: `Make the following assertion about the current webpage: ${parameters.assertionToTestFor}
The current date and time in ISO 8601 format is ${new Date().toISOString()}.
The current webpage URL is: ${page.url()}
The current webpage title is: ${webpageTitle}
The raw text of the current webpage is:
\`\`\`
${webpageRawText}
\`\`\`
Note that since this is the textual content, the text may be a bit jumbled, have its styling lost,
careful positioning lost, etc. A screenshot of the webpage has also been provided for visual context.`,
},
],
},
];
const assertionOutcome = await context.gptClient.getStructuredOutput(promptMessages, AssertionOutcomeSchema);
MiscUtils_1.MiscUtils.updateTokenCounts(assertionOutcome, context.metadata);
const assertPassed = assertionOutcome.output.assertionPassed
? true
: false;
const report = assertPassed
? `Assertion PASSED for: ${parameters.assertionToTestFor}\nDetails: ${assertionOutcome.output.details}`
: `Assertion FAILED for: ${parameters.assertionToTestFor}\nDetails: ${assertionOutcome.output.details}`;
// When the AI assertion passes and structured steps were returned,
// verify the steps against the live page before considering them
// cacheable. If the steps fail, discard them but still return the
// passing AI result. The verification window is recorded so the HTML
// reporter can label its `expect()` calls as cache-worthiness checks
// rather than treating an internal locator mismatch as an assertion
// failure.
let verifiedSteps = assertionOutcome.output.playwrightAssertionSteps;
let verification;
if (assertPassed &&
Array.isArray(verifiedSteps) &&
verifiedSteps.length > 0) {
const verifyStartedAt = Date.now();
try {
const executor = (0, assertCache_1.buildAssertExecutor)(verifiedSteps);
await executor({ page: page, envData: context.envData });
verification = {
startedAt: verifyStartedAt,
endedAt: Date.now(),
failed: false,
};
}
catch (error) {
Logger_1.appLogger.debug(`Structured assertion steps failed verification for: "${parameters.assertionToTestFor}" — discarding steps. Error: ${error.message}`);
verifiedSteps = null;
verification = {
startedAt: verifyStartedAt,
endedAt: Date.now(),
failed: true,
errorMessage: error.message,
};
}
}
const result = {
isSuccessful: assertPassed,
forLlm: report,
metadata: {
...assertionOutcome.output,
playwrightAssertionSteps: verifiedSteps,
verification,
attempt: attempt + 1,
},
};
// If assertion passed or we've exhausted all retries, return the result
if (assertPassed || attempt === retries) {
return result;
}
// Store the result for potential use if all retries fail
lastResult = result;
}
// This should never be reached due to the logic above, but return lastResult as fallback
return lastResult;
}
async callFromGpt(context, parameters) {
return this.call(context, parameters);
}
}
exports.AssertTool = AssertTool;
AssertTool.NAME = 'assert';
//# sourceMappingURL=AssertTool.js.map