donobu
Version:
Create browser automations with an LLM agent and replay them as Playwright scripts.
1,029 lines • 47.1 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __exportStar = (this && this.__exportStar) || function(m, exports) {
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.test = void 0;
exports.emitPlaywrightStepLog = emitPlaywrightStepLog;
exports._resetSourceLineCacheForTests = _resetSourceLineCacheForTests;
exports.installPlaywrightStepLogger = installPlaywrightStepLogger;
const test_1 = require("@playwright/test");
const async_hooks_1 = require("async_hooks");
const crypto_1 = require("crypto");
const fs_1 = require("fs");
const os_1 = require("os");
const path_1 = __importDefault(require("path"));
const v4_1 = require("zod/v4");
const envVars_1 = require("../../envVars");
const DonobuFlowsManager_1 = require("../../managers/DonobuFlowsManager");
const fileUploadWorkerRegistry_1 = require("../../persistence/files/fileUploadWorkerRegistry");
const ansi_1 = require("../../utils/ansi");
const BrowserUtils_1 = require("../../utils/BrowserUtils");
const buildProvenance_1 = require("../../utils/buildProvenance");
const FlowLogBuffer_1 = require("../../utils/FlowLogBuffer");
const Logger_1 = require("../../utils/Logger");
const MiscUtils_1 = require("../../utils/MiscUtils");
const PageLogListeners_1 = require("../../utils/PageLogListeners");
const cacheLocator_1 = require("../ai/cache/cacheLocator");
const extendPage_1 = require("../page/extendPage");
const tbd_1 = require("../page/tbd");
const selfHealing_1 = require("./utils/selfHealing");
const triageTestFailure_1 = require("./utils/triageTestFailure");
__exportStar(require("@playwright/test"), exports);
// ---------------------------------------------------------------------------
// Playwright-recorded video → flow persistence
// ---------------------------------------------------------------------------
// Playwright records videos when `use.video` is configured (`'on'`,
// `'retain-on-failure'`, `'on-first-retry'`). Those videos live in the
// Playwright `testInfo.outputDir` and ship with the Playwright HTML report,
// but without explicit wiring they never make it into Donobu's flow
// persistence (so the Studio UI's flow detail view shows no video).
//
// The integration here:
// 1. Inside `finalizeTest`, decide whether to persist based on the
// effective `video` option AND `testInfo.status`. The decision matrix
// mirrors Playwright's own retain semantics so we never persist a
// video the user told Playwright to discard (e.g.,
// `'retain-on-failure'` + passing test = no persist).
// 2. If we should persist, we kick off an async block that uses
// `Video.saveAs` (which internally waits for the BrowserContext to
// close before writing the .webm to a tmp path). After the file is
// finalized we read the bytes and forward them to `setVideo` on the
// flow's persistence layer (which routes through the FileUploadCache
// → cloud upload pipeline).
// 3. The deferred persist promise is registered in a module-level set so
// the worker-scoped drain guard can await it before the Playwright
// worker exits — without this, fast tests would beat the saveAs-after-
// close to the punch and the worker would terminate mid-write.
/**
* Maximum time the worker-scoped drain guard waits for in-flight video
* persists to settle before continuing on to the file-upload drain. Each
* video persist needs the BrowserContext to close (Playwright's video
* pipeline only finalizes the .webm at that point); for a normal test
* teardown that's near-instant.
*/
const VIDEO_PERSIST_TIMEOUT_MS = 30_000;
/**
* Promises tracking deferred video saves. Each entry resolves once the
* test's BrowserContext has closed AND we've copied bytes from the
* finalized .webm into the flow's persistence layer. The worker-scoped
* `donobuFileUploadDrainGuard` awaits all of these before draining cloud
* uploads, so we don't lose end-of-session videos.
*/
const pendingVideoPersists = new Set();
function trackVideoPersist(p) {
pendingVideoPersists.add(p);
void p.finally(() => {
pendingVideoPersists.delete(p);
});
}
async function waitForPendingVideoPersists(timeoutMs) {
const initial = pendingVideoPersists.size;
if (initial === 0) {
return { initial: 0, remaining: 0, timedOut: false };
}
const all = Array.from(pendingVideoPersists);
let timer;
const timeout = new Promise((resolve) => {
timer = setTimeout(() => resolve('timeout'), timeoutMs);
});
const settled = Promise.allSettled(all).then(() => 'done');
const winner = await Promise.race([timeout, settled]);
if (timer) {
clearTimeout(timer);
}
return {
initial,
remaining: pendingVideoPersists.size,
timedOut: winner === 'timeout',
};
}
/**
* Returns true iff Playwright would have RETAINED the video file for this
* test under the given `video` option, given the test's outcome. Mirrors
* Playwright's retain semantics (see Playwright `VideoMode` docs):
*
* - `'off'` / `undefined` → no video at all; never persist.
* - `'on'` → always retain → always persist.
* - `'retain-on-failure'` → retain only on non-passing → persist
* only when status !== 'passed'.
* - `'on-first-retry'` → recorded only on retries; if a video
* exists, we're on a retry → persist.
* - any unknown future mode → conservatively SKIP, with a warn log.
* Better to under-persist than violate
* user intent for a mode we don't yet
* understand.
*/
function shouldPersistVideo(videoOption, status) {
const mode = typeof videoOption === 'string' ? videoOption : videoOption?.mode;
switch (mode) {
case 'on':
return true;
case 'on-first-retry':
// When set, Playwright only records on retries; if a video exists
// it implies we're on a retry — always retain.
return true;
case 'retry-with-video':
// Deprecated alias for 'on-first-retry' that Playwright still
// accepts on the type. Same retain semantics.
return true;
case 'retain-on-failure':
return status !== 'passed';
case 'off':
case undefined:
return false;
default:
Logger_1.appLogger.warn(`Unknown Playwright video mode "${String(mode)}"; skipping video ` +
`persistence to avoid violating user intent. If this is a real ` +
`Playwright mode, add it to shouldPersistVideo() in testExtension.ts.`);
return false;
}
}
function describeVideoMode(videoOption) {
if (typeof videoOption === 'string') {
return videoOption;
}
if (videoOption && typeof videoOption === 'object' && 'mode' in videoOption) {
return videoOption.mode;
}
return 'off';
}
/**
* If Playwright recorded a video for this test AND the effective video
* mode says we should retain it, kicks off an async save+persist and
* tracks the promise so the worker-scoped drain guard can await it.
*
* This function does NOT block on the actual file write — `Video.saveAs`
* internally waits for the BrowserContext to close (which can't happen
* until our page fixture's teardown returns). Awaiting it here would
* deadlock. Instead we defer and let the worker-scoped fixture await all
* pending persists at end-of-worker.
*/
function persistVideoIfApplicable(page, testInfo, videoOption) {
const flowId = page._dnb.donobuFlowMetadata.id;
const video = page.video();
if (!video) {
// No video being recorded. Either video is 'off', or 'on-first-retry'
// and this isn't a retry, etc. Nothing to do.
return;
}
if (!shouldPersistVideo(videoOption, testInfo.status)) {
Logger_1.appLogger.info(`Skipping video persist for flow ${flowId}: video mode ` +
`"${describeVideoMode(videoOption)}" + status "${testInfo.status}" ` +
`means Playwright will discard the file and we'd be violating user ` +
`intent by keeping a copy.`);
return;
}
const persistence = page._dnb.persistence;
const persistPromise = (async () => {
const tmpPath = path_1.default.join((0, os_1.tmpdir)(), `donobu-test-video-${flowId}.webm`);
try {
// Video.saveAs blocks until the BrowserContext closes and the .webm
// is fully written. Playwright closes the context as part of its own
// fixture teardown, AFTER our page fixture body returns — so this
// promise unblocks once the test is fully torn down.
await video.saveAs(tmpPath);
const bytes = await fs_1.promises.readFile(tmpPath);
await persistence.setVideo(flowId, bytes);
Logger_1.appLogger.info(`Persisted video for flow ${flowId} ` +
`(${(bytes.length / 1024).toFixed(0)} KB) — queued for cloud upload.`);
}
catch (err) {
Logger_1.appLogger.warn(`Failed to persist video for flow ${flowId}: ${err.message}`);
}
finally {
await fs_1.promises.unlink(tmpPath).catch(() => undefined);
}
})();
trackVideoPersist(persistPromise);
}
/**
* Maximum time the worker-scoped post-test-session upload drain will wait
* for cloud uploads to complete before letting the worker exit. Bytes that
* aren't uploaded within this window stay on disk and are picked up by the
* next `donobu` process to start with the same data dir (via the file-upload
* cache's stale-claim reclaim).
*/
const UPLOAD_DRAIN_TIMEOUT_MS = 30_000;
exports.test = test_1.test.extend({
/**
* Establish a logging scope for the entire Playwright test *before* any other
* fixtures run. Playwright builds the fixture dependency graph eagerly, so
* user test bodies may start executing outside of the AsyncLocalStorage
* context seeded later inside the `page` fixture. By creating an auto fixture
* that runs first, we can:
* 1. Generate a stable flow ID for the test.
* 2. Enter the logging context immediately so any downstream fixture or
* test body log sees the ID.
* 3. Store the ID in a process-local fallback so synchronous logs emitted
* outside ALS-aware code still carry the same identifier.
*
* Every other fixture (e.g., `page`) consumes this `flowLoggingContext`.
*/
flowLoggingContext: [
async ({}, use) => {
// NOTE:
// It would be *nice* if we could use 'testInfo.testId' for the flowId but
// that is a trap! A Playwright testId is deterministic and Playwright keeps
// it constant across retries, shards, and even repeated invocations of the
// same test. If we reused testInfo.testId, every rerun would reuse the same
// Donobu flow ID, so metadata would overwrite and concurrent executions could
// clobber each other.
const flowId = (0, crypto_1.randomUUID)();
const logBuffer = new FlowLogBuffer_1.FlowLogBuffer();
const flowContext = { flowId, logBuffer };
const asyncScope = new async_hooks_1.AsyncResource('DonobuFlowContext');
await Logger_1.loggingContext.run(flowContext, async () => {
Logger_1.loggingContext.enterWith(flowContext);
(0, Logger_1.setProcessLocalFlowId)(flowId);
(0, Logger_1.setProcessLocalLogBuffer)(logBuffer);
try {
await asyncScope.runInAsyncScope(() => use({ flowId, logBuffer }));
}
finally {
(0, Logger_1.setProcessLocalFlowId)(null);
(0, Logger_1.setProcessLocalLogBuffer)(null);
}
});
},
{ scope: 'test', auto: true },
],
/**
* Auto fixture that monkey-patches `testInfo._addStep` so each Playwright
* step in the `pw:api` (clicks, fills, navigations) and `expect`
* (assertions) categories emits an `appLogger.info` entry on completion.
* Routing native Playwright operations through the same per-flow
* `FlowLogBuffer` that AI tool-call logs populate makes the live
* `/api/flows/:id/logs` feed and persisted `logs.json` capture meaningful
* step-by-step data even for non-AI Playwright tests.
*
* Depends on `flowLoggingContext` so the ALS / `processLocalLogBuffer` are
* already set up by the time our patched `complete` fires.
*
* Verified against @playwright/test 1.57. `_addStep` and the TestStepImpl
* shape (category, title, params, wallTime, endWallTime, error, complete)
* are private Playwright internals — install and per-step logging are
* wrapped in try/catch so any shape drift degrades to a no-op rather than
* failing the test.
*/
pwApiStepLogger: [
async ({ flowLoggingContext: _ }, use, testInfo) => {
const teardown = installPlaywrightStepLogger(testInfo);
try {
await use();
}
finally {
teardown();
}
},
{ scope: 'test', auto: true },
],
/**
* Drain in-flight file uploads (videos, screenshots, etc.) to Donobu
* Cloud at the end of every Playwright worker's lifetime.
*
* The {@link FileUploadWorker} runs continuously while tests execute,
* uploading flow artifacts to the cloud asynchronously. When the worker
* process exits, any uploads still mid-flight are abandoned at whatever
* stage they reached. The bytes survive on disk as `.uploading.<token>`
* markers, but for ephemeral CI runners — where the box gets torn down
* after the test session — there's no future process to resume them.
*
* This auto fixture closes that gap by waiting for the queue to drain
* before letting the worker exit.
*/
donobuFileUploadDrainGuard: [
async ({}, use) => {
await use();
// ── Teardown — runs ONCE per Playwright worker, after every test ──
// First, wait for any deferred video persists from finalizeTest to
// complete. Each one is gated on a BrowserContext close (Playwright
// finalizes the .webm at that point) and then forwards the bytes to
// setVideo. Without this wait, fast tests would beat the
// saveAs-after-close to the punch and the worker would exit
// mid-write — meaning the cloud upload below would never be queued
// for the missing video.
const videoWait = await waitForPendingVideoPersists(VIDEO_PERSIST_TIMEOUT_MS);
if (videoWait.initial > 0) {
if (videoWait.timedOut) {
Logger_1.appLogger.warn(`donobuFileUploadDrainGuard: ${videoWait.remaining} of ` +
`${videoWait.initial} video persist(s) still pending after ` +
`${Math.round(VIDEO_PERSIST_TIMEOUT_MS / 1000)}s; proceeding ` +
`with upload drain anyway. Those video files (if any) will ` +
`not be uploaded this session.`);
}
else {
Logger_1.appLogger.info(`donobuFileUploadDrainGuard: completed ${videoWait.initial} ` +
`pending video persist(s) before upload drain.`);
}
}
let initialActive;
try {
const status = await (0, fileUploadWorkerRegistry_1.getFileUploadAggregateStatus)();
initialActive = status.totalPending + status.totalInFlight;
}
catch (err) {
Logger_1.appLogger.warn(`donobuFileUploadDrainGuard: failed to read upload status; ` +
`skipping drain. Error: ${err.message}`);
return;
}
if (initialActive === 0) {
Logger_1.appLogger.debug('donobuFileUploadDrainGuard: no pending file uploads at end of test ' +
'session; worker is exiting.');
return;
}
Logger_1.appLogger.info(`donobuFileUploadDrainGuard: test session complete; ${initialActive} ` +
`file upload(s) still syncing to Donobu Cloud. ` +
`Waiting up to ${Math.round(UPLOAD_DRAIN_TIMEOUT_MS / 1000)}s for ` +
`the queue to drain before exit. ` +
`(Any uploads not finished within the timeout stay on disk and ` +
`resume on the next \`donobu\` process start.)`);
const startedAt = Date.now();
let result;
try {
result = await (0, fileUploadWorkerRegistry_1.shutdownFileUploadWorkers)(UPLOAD_DRAIN_TIMEOUT_MS);
}
catch (err) {
Logger_1.appLogger.warn(`donobuFileUploadDrainGuard: drain threw; letting worker exit. ` +
`Error: ${err.message}`);
return;
}
const elapsedMs = Date.now() - startedAt;
if (result.drained) {
Logger_1.appLogger.info(`donobuFileUploadDrainGuard: all ${initialActive} file upload(s) ` +
`completed in ${Math.round(elapsedMs / 1000)}s. Worker exiting.`);
}
else {
Logger_1.appLogger.warn(`donobuFileUploadDrainGuard: drain timed out after ` +
`${Math.round(elapsedMs / 1000)}s with ${result.totalRemaining} ` +
`upload(s) still pending. The bytes are on disk and will resume ` +
`the next time a \`donobu\` process starts with the same data ` +
`directory. Worker exiting now.`);
}
},
{ scope: 'worker', auto: true },
],
// 1) Declare `gptClient` as an "option" fixture with a default of `undefined`.
gptClient: [
undefined, // default
{ option: true }, // so that test.use({ gptClient: ... }) can override
],
// Override the default page fixture
page: async ({ page, gptClient, headless, flowLoggingContext, video }, use, testInfo) => {
Logger_1.appLogger.info(`Test started: "${testInfo.title}"`);
const overallObjective = testInfo.annotations.find((v) => v.type === 'objective')?.description ??
null;
const visualCueDurationMs = v4_1.z
.int()
.optional()
.default(250)
.parse(testInfo.project.metadata['visualCueDurationMs']);
const { flowId, logBuffer } = flowLoggingContext;
const extendedPage = await (0, extendPage_1.extendPage)(page, {
flowId: flowId,
visualCueDurationMs: visualCueDurationMs,
cacheFilepath: (0, cacheLocator_1.buildPageAiCachePath)(testInfo.file),
envVars: (0, DonobuFlowsManager_1.distillAllowedEnvVariableNames)(overallObjective, testInfo.annotations
.filter((a) => a.type === 'ENV' && a.description)
.map((a) => a.description)),
gptClient: gptClient,
headless: headless,
});
extendedPage._dnb.donobuFlowMetadata.name = getSanitizedTestName(testInfo);
extendedPage._dnb.donobuFlowMetadata.testId = testInfo.testId;
extendedPage._dnb.donobuFlowMetadata.overallObjective = overallObjective;
// Register browser console and network listeners so that logs from these
// sources are captured into the flow's logBuffer. In Studio-launched flows
// this is done by WebTargetInspector.initialize(), but that method is not
// called during test runs, so we wire the listeners up here directly.
(0, PageLogListeners_1.registerPageLogListeners)(page);
// Bind the Playwright-provided `use` callback to an async resource so that
// any microtasks scheduled inside the test body keep the flow logging
// context. Without this, Playwright may re-use earlier async resources that
// were created before we entered the logging scope.
const asyncScope = new async_hooks_1.AsyncResource('DonobuTestContext');
const boundUse = asyncScope.bind(use);
try {
try {
const testMetadata = flowMetadataToTestMetadata(testInfo.testId, extendedPage._dnb.donobuFlowMetadata);
await extendedPage._dnb.testsPersistence.createTest(testMetadata);
}
catch {
// Test already exists from a prior run — nothing to do.
}
await extendedPage._dnb.persistence.setFlowMetadata(extendedPage._dnb.donobuFlowMetadata);
await boundUse(extendedPage);
}
catch (error) {
testInfo.status =
error instanceof test_1.errors.TimeoutError ? 'timedOut' : 'failed';
throw error;
}
finally {
await finalizeTest(extendedPage, testInfo, logBuffer, video);
}
},
});
/**
* Recursively walks Playwright's internal `testInfo._steps` tree and collects
* all completed `expect` and `test.step` entries. These are the native
* Playwright operations (assertions, named step blocks) that are NOT captured
* by the Donobu AI tool call pipeline.
*
* Uses `testInfo._steps` (an internal Playwright property) rather than a
* separate Reporter because:
* 1. We already have `testInfo` here — zero infrastructure changes needed.
* 2. Avoids all Playwright module-resolution machinery for reporter strings.
* 3. `_steps` is fully populated by the time `finalizeTest()` runs (the
* test body has already completed).
*/
function collectNativeSteps(rawSteps, startTimes) {
const result = [];
for (const step of rawSteps) {
const cat = step.category ?? '';
const childRaw = Array.isArray(step.steps) ? step.steps : [];
const collectedChildren = collectNativeSteps(childRaw, startTimes);
if (cat === 'expect' || cat === 'test.step') {
// Start time comes from the onStepBegin payload (captured by the
// pwApiStepLogger fixture into startTimes). If unavailable, fall
// back to the step's endWallTime so the window is zero-width and
// sibling steps don't get falsely nested inside.
const endWallTime = step.endWallTime ?? Date.now();
const startWallTime = (typeof step.stepId === 'string' && startTimes.get(step.stepId)) ||
endWallTime;
result.push({
title: step.title ?? '',
category: cat,
startWallTime,
endWallTime,
passed: !step.error,
error: step.error
? { message: step.error.message, stack: step.error.stack }
: undefined,
location: step.location
? {
file: step.location.file ?? step.location.fileName ?? '',
line: step.location.line ?? 0,
column: step.location.column ?? 0,
}
: undefined,
children: collectedChildren,
});
}
else {
// Parent is not a kept category (e.g. pw:api) — promote any qualifying
// descendants so a nested expect() still appears in the report.
result.push(...collectedChildren);
}
}
result.sort((a, b) => a.startWallTime - b.startWallTime);
return result;
}
// ---------------------------------------------------------------------------
// Live Playwright step → appLogger pipeline
// ---------------------------------------------------------------------------
// Used by the `pwApiStepLogger` fixture above to surface native Playwright
// operations (clicks, fills, navigations, expects) in the same per-flow
// FlowLogBuffer that AI tool-call logs already populate.
const LOGGED_STEP_CATEGORIES = new Set([
'pw:api',
'expect',
]);
const STEP_PARAM_ALLOWLIST = [
'url',
'selector',
'text',
'value',
'timeout',
'name',
'key',
// For expect steps Playwright stores the asserted value here
// (e.g., `expect(x).toBe('$51.00')` → params: { expected: '$51.00' }).
'expected',
];
/**
* Pull a small, JSON-safe subset of `step.params` for logging. Playwright's
* step.params can hold non-serializable values (Locator handles, Page refs,
* etc.) that would either cycle or bloat the log feed; we keep only known
* primitive fields.
*/
function pickStepParams(rawParams) {
if (!rawParams || typeof rawParams !== 'object') {
return undefined;
}
const result = {};
let hasAny = false;
for (const key of STEP_PARAM_ALLOWLIST) {
const value = rawParams[key];
if (value === undefined) {
continue;
}
const t = typeof value;
if (t === 'string' || t === 'number' || t === 'boolean') {
result[key] = value;
hasAny = true;
}
}
return hasAny ? result : undefined;
}
const NODE_MODULES_LOCATION_REGEX = /[\\/]node_modules[\\/]/;
function emitPlaywrightStepLog(step) {
const category = step?.category ?? '';
if (!LOGGED_STEP_CATEGORIES.has(category)) {
return;
}
// Drop steps that originated inside a third-party (or Donobu library)
// package. The user's test code never lives in node_modules, so any step
// attributed there is plumbing — Donobu's own AssertTool, internal CDP
// wrappers, etc. — not the user's intent.
const locationFile = (step.location &&
typeof step.location === 'object' &&
typeof (step.location.file ?? step.location.fileName) === 'string' &&
(step.location.file ?? step.location.fileName)) ||
'';
if (locationFile && NODE_MODULES_LOCATION_REGEX.test(locationFile)) {
return;
}
const title = step.title ?? '';
const startedAt = typeof step.wallTime === 'number' ? step.wallTime : undefined;
const completedAt = typeof step.endWallTime === 'number' ? step.endWallTime : undefined;
const durationMs = startedAt !== undefined && completedAt !== undefined
? completedAt - startedAt
: undefined;
const error = step.error && typeof step.error === 'object'
? {
name: typeof step.error.name === 'string' ? step.error.name : 'Error',
// Playwright matchers emit ANSI-colored diff output. Strip it so
// raw escape sequences don't leak into the log feed.
message: (0, ansi_1.stripAnsi)(typeof step.error.message === 'string' ? step.error.message : ''),
}
: undefined;
const location = step.location && typeof step.location === 'object'
? {
file: step.location.file ?? step.location.fileName ?? '',
line: step.location.line ?? 0,
column: step.location.column ?? 0,
}
: undefined;
const params = pickStepParams(step.params);
// Default message: Playwright's title verbatim, plus a primitive
// `expected` value for matchers Playwright didn't already inline.
let message = `${title}${formatExpectedSuffix(title, params?.expected)}`;
// For expect steps, prefer the actual user source line at
// location.file:line — `Expect "toBe"` is too bare; the source shows
// both the LHS being asserted and the matcher together
// (`expect(text).toBe('$51.01')`). Fall back to the matcher message if
// the source can't be read.
if (category === 'expect' && location?.file && location.line > 0) {
const sourceLine = readSourceLine(location.file, location.line);
if (sourceLine) {
message = sourceLine;
}
}
const meta = {
category,
title,
params,
durationMs,
startedAt,
completedAt,
error,
location,
};
// A populated `step.error` means Playwright reported the step as failed —
// surface it at error level so it stands out in the feed.
if (error) {
Logger_1.appLogger.error(message, meta);
}
else {
Logger_1.appLogger.info(message, meta);
}
}
function formatExpectedSuffix(title, expected) {
if (expected === undefined || expected === null) {
return '';
}
const t = typeof expected;
if (t !== 'string' && t !== 'number' && t !== 'boolean') {
return '';
}
const formatted = JSON.stringify(expected);
if (title.includes(formatted)) {
return '';
}
return ` ${formatted}`;
}
/**
* Per-worker cache of source file contents (split into lines), keyed by
* absolute file path. Source files don't change during a worker's
* lifetime, so one read per file is sufficient.
*
* `null` means a previous read failed; we cache that to avoid retrying.
*/
const sourceLineCache = new Map();
/** Reset the source-line cache. Test-only. */
function _resetSourceLineCacheForTests() {
sourceLineCache.clear();
}
/**
* Read the trimmed line at `file:line` (1-based), with a per-worker
* cache. Drops a trailing semicolon for cleanliness. Returns undefined
* if the file or line can't be read.
*/
function readSourceLine(file, line) {
let lines = sourceLineCache.get(file);
if (lines === undefined) {
try {
lines = (0, fs_1.readFileSync)(file, 'utf8').split('\n');
}
catch {
lines = null;
}
sourceLineCache.set(file, lines);
}
if (!lines) {
return undefined;
}
const raw = lines[line - 1];
if (typeof raw !== 'string') {
return undefined;
}
let trimmed = raw.trim();
if (trimmed.endsWith(';')) {
trimmed = trimmed.slice(0, -1);
}
return trimmed || undefined;
}
/**
* Monkey-patch `testInfo._callbacks.onStepEnd` so each Playwright step in
* the `pw:api` / `expect` categories emits an `appLogger.info` entry once
* Playwright signals the step has finished. Returns a teardown function
* that restores the original callback.
*
* Why patch `onStepEnd` and not `_addStep`:
* Playwright captures the step's source location with a stack walk inside
* `_addStep` (see playwright/lib/worker/testInfo.js). Patching `_addStep`
* inserts a Donobu library frame into that stack, and Playwright's
* built-in filter only excludes `@playwright/test` / `playwright-core`
* paths — so for steps that don't pre-set `data.location` (most notably
* `expect`), the captured location lands inside our wrapper in
* `node_modules/donobu/...`. The downstream node_modules filter would
* then drop the entry. `_callbacks.onStepEnd` runs AFTER the stack
* capture, so this hook is invisible to Playwright's location resolution.
*
* Verified against @playwright/test 1.57+. `_callbacks` / `_stepMap` and
* the step shape (category, title, params, endWallTime, error, location)
* are private Playwright internals — install and per-step logging are
* wrapped in try/catch so any shape drift degrades to a no-op rather than
* failing the test.
*
* Exported for unit testing.
*/
function installPlaywrightStepLogger(testInfo) {
const ti = testInfo;
const callbacks = ti._callbacks;
const stepMap = ti._stepMap;
const originalOnStepEnd = callbacks && typeof callbacks.onStepEnd === 'function'
? callbacks.onStepEnd
: null;
const originalOnStepBegin = callbacks && typeof callbacks.onStepBegin === 'function'
? callbacks.onStepBegin
: null;
if (!callbacks || !originalOnStepEnd || !stepMap) {
return () => { };
}
// Stash a stepId -> wallTime map on testInfo so collectNativeSteps can
// look up start times when building the report hierarchy. Playwright
// doesn't store wallTime on the step object itself — it's only emitted
// via the onStepBegin payload — so we capture it here. Without this,
// `test.step` blocks have no recoverable start time and any preceding
// tool-call step gets falsely nested inside them.
const startTimes = new Map();
ti.__donobuStepStartTimes = startTimes;
let installed = false;
try {
callbacks.onStepEnd = function patchedOnStepEnd(payload) {
const ret = originalOnStepEnd.call(this, payload);
try {
const stepId = payload?.stepId;
if (typeof stepId === 'string') {
const step = stepMap.get(stepId);
if (step && LOGGED_STEP_CATEGORIES.has(step.category)) {
emitPlaywrightStepLog(step);
}
}
}
catch (err) {
Logger_1.appLogger.debug('Failed to log Playwright step', err);
}
return ret;
};
callbacks.onStepBegin = function patchedOnStepBegin(payload) {
const ret = originalOnStepBegin
? originalOnStepBegin.call(this, payload)
: undefined;
try {
if (typeof payload?.stepId === 'string' &&
typeof payload?.wallTime === 'number') {
startTimes.set(payload.stepId, payload.wallTime);
}
}
catch (err) {
Logger_1.appLogger.debug('Failed to record Playwright step start time', err);
}
return ret;
};
installed = true;
}
catch (err) {
Logger_1.appLogger.warn('Failed to install Playwright step logger; native Playwright steps will not appear in flow logs', err);
}
return () => {
if (installed) {
callbacks.onStepEnd = originalOnStepEnd;
if (originalOnStepBegin) {
callbacks.onStepBegin = originalOnStepBegin;
}
}
};
}
/**
* Fetch tool-call screenshots from the flow's persistence layer and attach
* them to the Playwright report. Each screenshot is attached as
* `donobu-step-{index}-{toolName}` so the HTML report generator can
* correlate them to tool calls. A companion JSON attachment
* (`donobu-step-summary`) provides the tool call metadata.
*/
async function attachStepScreenshots(sharedState, testInfo) {
const flowId = sharedState.donobuFlowMetadata.id;
let toolCalls;
try {
toolCalls = await sharedState.persistence.getToolCalls(flowId);
}
catch {
// Flow may not have any tool calls (e.g., non-AI tests)
return;
}
if (toolCalls.length === 0) {
return;
}
const stepSummary = [];
for (let i = 0; i < toolCalls.length; i++) {
const tc = toolCalls[i];
if (!tc.outcome || tc.completedAt === null) {
continue;
}
const hasScreenshot = !!tc.postCallImageId;
stepSummary.push({
index: i,
toolName: tc.toolName,
page: tc.page,
startedAt: tc.startedAt,
completedAt: tc.completedAt,
success: tc.outcome.isSuccessful,
summary: tc.outcome.forLlm?.slice(0, 200) ?? '',
hasScreenshot,
parameters: tc.parameters,
outcome: {
isSuccessful: tc.outcome.isSuccessful,
forLlm: tc.outcome.forLlm,
},
metadata: tc.outcome.metadata,
});
if (hasScreenshot) {
try {
const bytes = await sharedState.persistence.getScreenShot(flowId, tc.postCallImageId);
if (bytes) {
const isJpeg = bytes[0] === 0xff && bytes[1] === 0xd8 && bytes[2] === 0xff;
const ext = isJpeg ? 'jpg' : 'png';
// Write the bytes to a temporary file so we can attach by `path`
// rather than `body`. Attaching by body would keep the bytes in
// memory and pass them through to reporters, which then base64
// them into the HTML — the very thing we're trying to avoid. By
// contrast `attach({ path })` makes Playwright copy the file into
// `<outputDir>/attachments/<sha1>.<ext>` and only carry the path.
// We unlink our temp file afterwards so the test-results tree
// doesn't keep a duplicate next to the attachments folder.
const filePath = testInfo.outputPath(`donobu-step-${i}-${tc.toolName}.${ext}`);
await fs_1.promises.writeFile(filePath, bytes);
try {
await testInfo.attach(`donobu-step-${i}-${tc.toolName}`, {
path: filePath,
contentType: isJpeg ? 'image/jpeg' : 'image/png',
});
}
finally {
await fs_1.promises.unlink(filePath).catch(() => {
/* best-effort cleanup; attach already copied the bytes */
});
}
}
}
catch {
Logger_1.appLogger.debug(`Could not retrieve step screenshot ${tc.postCallImageId} for step ${i}`);
}
}
}
await testInfo.attach('donobu-step-summary', {
body: JSON.stringify(stepSummary),
contentType: 'application/json',
});
}
async function finalizeTest(page, testInfo, logBuffer, videoOption) {
const sharedState = page._dnb;
// Kick off video persistence early in teardown. The actual file copy is
// deferred (it can't run until the BrowserContext closes, which happens
// AFTER our fixture body returns), so this just queues a tracked promise.
// Doing it before the rest of finalizeTest's work means the deferred
// saveAs has already-finalized references to flowId / persistence / video
// by the time it runs, regardless of what subsequent teardown does.
if (videoOption !== undefined) {
persistVideoIfApplicable(page, testInfo, videoOption);
}
try {
sharedState.donobuFlowMetadata.state =
testInfo.status === 'failed' || testInfo.status === 'timedOut'
? 'FAILED'
: 'SUCCESS';
sharedState.donobuFlowMetadata.completedAt = new Date().getTime();
await sharedState.persistence.setFlowMetadata(sharedState.donobuFlowMetadata);
await testInfo.attach('test-flow-metadata.json', {
body: JSON.stringify(sharedState.donobuFlowMetadata, null, 2),
contentType: 'application/json',
});
// Persist captured flow logs so they are available in the Donobu UI,
// mirroring what DonobuFlowsManager does for Studio-launched flows.
if (logBuffer) {
try {
const snapshot = logBuffer.snapshot();
await sharedState.persistence.setFlowFile(sharedState.donobuFlowMetadata.id, 'logs.json', Buffer.from(JSON.stringify(snapshot)));
}
catch (error) {
Logger_1.appLogger.error('Failed to persist flow logs:', error);
}
}
// Attach step-level screenshots from the flow's tool call history.
// These enable the HTML report to show a visual timeline of what the
// AI agent saw at each step.
await attachStepScreenshots(sharedState, testInfo);
// Attach native Playwright steps (expect assertions, test.step blocks)
// so the HTML report can show a unified timeline alongside AI tool calls.
try {
const startTimes = testInfo.__donobuStepStartTimes ?? new Map();
const nativeSteps = collectNativeSteps(testInfo._steps ?? [], startTimes);
if (nativeSteps.length > 0) {
await testInfo.attach('donobu-native-steps', {
body: JSON.stringify(nativeSteps),
contentType: 'application/json',
});
}
}
catch {
// Non-fatal: native step collection failing must not affect the test result.
}
// Attach AI invocation wrappers (page.ai / page.ai.assert / page.ai.locate)
// so the HTML reporter can render each as a parent node containing the
// tool calls and native steps that fell inside its time window. The
// `cacheHit` flag drives the `[cached]` badge on the wrapper itself —
// not on inner actions, since a cached `page.ai` may legitimately invoke
// a live `page.ai.assert` and vice versa.
try {
const aiInvocations = sharedState.aiInvocations;
if (aiInvocations.length > 0) {
await testInfo.attach('donobu-ai-invocations', {
body: JSON.stringify(aiInvocations),
contentType: 'application/json',
});
}
}
catch {
// Non-fatal.
}
const browserState = await BrowserUtils_1.BrowserUtils.getBrowserStorageState(page.context());
await sharedState.persistence.setBrowserState(sharedState.donobuFlowMetadata.id, browserState);
}
catch (error) {
Logger_1.appLogger.error(`Error during cleanup for test ${testInfo.title}:`, error);
}
if (testInfo.status === 'failed') {
if (isV1Test(testInfo)) {
if (isV1SelfHealingEnabled(testInfo) &&
!MiscUtils_1.MiscUtils.yn(envVars_1.env.data.DONOBU_AUTO_HEAL_ACTIVE)) {
if (!sharedState.gptClient) {
Logger_1.appLogger.warn('Will not self-heal due to no GPT client being set up.');
}
else {
try {
await (0, selfHealing_1.selfHeal)(sharedState.gptClient, testInfo, page);
}
catch (error) {
Logger_1.appLogger.error('Error when attempting to self-heal', error);
}
}
}
}
else {
try {
const evidenceResult = await (0, triageTestFailure_1.gatherTestFailureEvidence)(testInfo, page);
if (evidenceResult?.filePath) {
Logger_1.appLogger.info(`Persisted Donobu triage evidence for "${testInfo.title}" to ${evidenceResult.filePath}.`);
}
else if (evidenceResult?.evidence) {
Logger_1.appLogger.info(`Captured Donobu triage evidence for "${testInfo.title}" (schema v${evidenceResult.evidence.schemaVersion}).`);
}
}
catch (error) {
Logger_1.appLogger.error(`Failed to gather test failure evidence for "${testInfo.title}".`, error);
}
}
}
else if (testInfo.status === 'passed' &&
MiscUtils_1.MiscUtils.yn(envVars_1.env.data.DONOBU_AUTO_HEAL_ACTIVE)) {
const hasSelfHealedAnnotation = testInfo.annotations.some((annotation) => annotation.type === 'self-healed');
if (!hasSelfHealedAnnotation) {
testInfo.annotations.push({
type: 'self-healed',
description: 'Automatically healed by Donobu auto-heal rerun.',
});
}
}
// Flush any page.tbd() sessions: replace the tbd() call sites in the
// source files with the generated Playwright code for the recorded
// user interactions.
if (page._dnb.tbdSessions.length > 0) {
try {
const count = await (0, tbd_1.flushTbdSessions)(page._dnb.tbdSessions);
if (count > 0) {
Logger_1.appLogger.info(`tbd: replaced ${count} page.tbd() placeholder(s) with recorded actions`);
}
}
catch (error) {
Logger_1.appLogger.error('tbd: failed to flush replacements', error);
}
}
Logger_1.appLogger.info(`Test ended: "${testInfo.title}" — status: ${testInfo.status}`);
}
/**
* A "v1" test is one that was generated by the Donobu app and
* has an overall object defined as a test annotation.
*/
function isV1Test(testInfo) {
return testInfo.annotations.find((v) => v.type === 'objective')
? true
: false;
}
function isV1SelfHealingEnabled(testInfo) {
if (envVars_1.env.keys.SELF_HEAL_TESTS_ENABLED in testInfo.project.metadata) {
// Check project-specific metadata. This takes priority if it exists.
const isSelfHealEnabledForProject = MiscUtils_1.MiscUtils.yn(testInfo.project.metadata[envVars_1.env.keys.SELF_HEAL_TESTS_ENABLED]) ?? false;
return isSelfHealEnabledForProject;
}
else {
// Failover to checking the environment variable.
const isSelfHealEnabledViaEnvVar = MiscUtils_1.MiscUtils.yn(envVars_1.env.data.SELF_HEAL_TESTS_ENABLED?.trim()) ?? false;
return isSelfHealEnabledViaEnvVar;
}
}
/**
* Returns a reasonably unique name for the current test, while also capping the
* length to the maximum number of characters allowed by underlying Donobu system.
*/
function getSanitizedTestName(testInfo) {
return testInfo.titlePath
.join(' > ')
.slice(-DonobuFlowsManager_1.DonobuFlowsManager.FLOW_NAME_MAX_LENGTH);
}
/** Builds a TestMetadata from the fields available on a FlowMetadata. */
function flowMetadataToTestMetadata(testId, flowMeta) {
return {
id: testId,
name: flowMeta.name,
target: flowMeta.target,
web: flowMeta.web,
envVars: flowMeta.envVars,
customTools: flowMeta.customTools,
overallObjective: flowMeta.overallObjective,
allowedTools: flowMeta.allowedTools,
resultJsonSchema: flowMeta.resultJsonSchema,
callbackUrl: flowMeta.callbackUrl,
maxToolCalls: flowMeta.maxToolCalls,
suiteId: null,
nextRunMode: 'DETERMINISTIC',
provenance: (0, buildProvenance_1.buildProvenance)('CODE'),
};
}
//# sourceMappingURL=testExtension.js.map