UNPKG

donobu

Version:

Create browser automations with an LLM agent and replay them as Playwright scripts.

1,029 lines 47.1 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __exportStar = (this && this.__exportStar) || function(m, exports) { for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p); }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.test = void 0; exports.emitPlaywrightStepLog = emitPlaywrightStepLog; exports._resetSourceLineCacheForTests = _resetSourceLineCacheForTests; exports.installPlaywrightStepLogger = installPlaywrightStepLogger; const test_1 = require("@playwright/test"); const async_hooks_1 = require("async_hooks"); const crypto_1 = require("crypto"); const fs_1 = require("fs"); const os_1 = require("os"); const path_1 = __importDefault(require("path")); const v4_1 = require("zod/v4"); const envVars_1 = require("../../envVars"); const DonobuFlowsManager_1 = require("../../managers/DonobuFlowsManager"); const fileUploadWorkerRegistry_1 = require("../../persistence/files/fileUploadWorkerRegistry"); const ansi_1 = require("../../utils/ansi"); const BrowserUtils_1 = require("../../utils/BrowserUtils"); const buildProvenance_1 = require("../../utils/buildProvenance"); const FlowLogBuffer_1 = require("../../utils/FlowLogBuffer"); const Logger_1 = require("../../utils/Logger"); const MiscUtils_1 = require("../../utils/MiscUtils"); const PageLogListeners_1 = require("../../utils/PageLogListeners"); const cacheLocator_1 = require("../ai/cache/cacheLocator"); const extendPage_1 = require("../page/extendPage"); const tbd_1 = require("../page/tbd"); const selfHealing_1 = require("./utils/selfHealing"); const triageTestFailure_1 = require("./utils/triageTestFailure"); __exportStar(require("@playwright/test"), exports); // --------------------------------------------------------------------------- // Playwright-recorded video → flow persistence // --------------------------------------------------------------------------- // Playwright records videos when `use.video` is configured (`'on'`, // `'retain-on-failure'`, `'on-first-retry'`). Those videos live in the // Playwright `testInfo.outputDir` and ship with the Playwright HTML report, // but without explicit wiring they never make it into Donobu's flow // persistence (so the Studio UI's flow detail view shows no video). // // The integration here: // 1. Inside `finalizeTest`, decide whether to persist based on the // effective `video` option AND `testInfo.status`. The decision matrix // mirrors Playwright's own retain semantics so we never persist a // video the user told Playwright to discard (e.g., // `'retain-on-failure'` + passing test = no persist). // 2. If we should persist, we kick off an async block that uses // `Video.saveAs` (which internally waits for the BrowserContext to // close before writing the .webm to a tmp path). After the file is // finalized we read the bytes and forward them to `setVideo` on the // flow's persistence layer (which routes through the FileUploadCache // → cloud upload pipeline). // 3. The deferred persist promise is registered in a module-level set so // the worker-scoped drain guard can await it before the Playwright // worker exits — without this, fast tests would beat the saveAs-after- // close to the punch and the worker would terminate mid-write. /** * Maximum time the worker-scoped drain guard waits for in-flight video * persists to settle before continuing on to the file-upload drain. Each * video persist needs the BrowserContext to close (Playwright's video * pipeline only finalizes the .webm at that point); for a normal test * teardown that's near-instant. */ const VIDEO_PERSIST_TIMEOUT_MS = 30_000; /** * Promises tracking deferred video saves. Each entry resolves once the * test's BrowserContext has closed AND we've copied bytes from the * finalized .webm into the flow's persistence layer. The worker-scoped * `donobuFileUploadDrainGuard` awaits all of these before draining cloud * uploads, so we don't lose end-of-session videos. */ const pendingVideoPersists = new Set(); function trackVideoPersist(p) { pendingVideoPersists.add(p); void p.finally(() => { pendingVideoPersists.delete(p); }); } async function waitForPendingVideoPersists(timeoutMs) { const initial = pendingVideoPersists.size; if (initial === 0) { return { initial: 0, remaining: 0, timedOut: false }; } const all = Array.from(pendingVideoPersists); let timer; const timeout = new Promise((resolve) => { timer = setTimeout(() => resolve('timeout'), timeoutMs); }); const settled = Promise.allSettled(all).then(() => 'done'); const winner = await Promise.race([timeout, settled]); if (timer) { clearTimeout(timer); } return { initial, remaining: pendingVideoPersists.size, timedOut: winner === 'timeout', }; } /** * Returns true iff Playwright would have RETAINED the video file for this * test under the given `video` option, given the test's outcome. Mirrors * Playwright's retain semantics (see Playwright `VideoMode` docs): * * - `'off'` / `undefined` → no video at all; never persist. * - `'on'` → always retain → always persist. * - `'retain-on-failure'` → retain only on non-passing → persist * only when status !== 'passed'. * - `'on-first-retry'` → recorded only on retries; if a video * exists, we're on a retry → persist. * - any unknown future mode → conservatively SKIP, with a warn log. * Better to under-persist than violate * user intent for a mode we don't yet * understand. */ function shouldPersistVideo(videoOption, status) { const mode = typeof videoOption === 'string' ? videoOption : videoOption?.mode; switch (mode) { case 'on': return true; case 'on-first-retry': // When set, Playwright only records on retries; if a video exists // it implies we're on a retry — always retain. return true; case 'retry-with-video': // Deprecated alias for 'on-first-retry' that Playwright still // accepts on the type. Same retain semantics. return true; case 'retain-on-failure': return status !== 'passed'; case 'off': case undefined: return false; default: Logger_1.appLogger.warn(`Unknown Playwright video mode "${String(mode)}"; skipping video ` + `persistence to avoid violating user intent. If this is a real ` + `Playwright mode, add it to shouldPersistVideo() in testExtension.ts.`); return false; } } function describeVideoMode(videoOption) { if (typeof videoOption === 'string') { return videoOption; } if (videoOption && typeof videoOption === 'object' && 'mode' in videoOption) { return videoOption.mode; } return 'off'; } /** * If Playwright recorded a video for this test AND the effective video * mode says we should retain it, kicks off an async save+persist and * tracks the promise so the worker-scoped drain guard can await it. * * This function does NOT block on the actual file write — `Video.saveAs` * internally waits for the BrowserContext to close (which can't happen * until our page fixture's teardown returns). Awaiting it here would * deadlock. Instead we defer and let the worker-scoped fixture await all * pending persists at end-of-worker. */ function persistVideoIfApplicable(page, testInfo, videoOption) { const flowId = page._dnb.donobuFlowMetadata.id; const video = page.video(); if (!video) { // No video being recorded. Either video is 'off', or 'on-first-retry' // and this isn't a retry, etc. Nothing to do. return; } if (!shouldPersistVideo(videoOption, testInfo.status)) { Logger_1.appLogger.info(`Skipping video persist for flow ${flowId}: video mode ` + `"${describeVideoMode(videoOption)}" + status "${testInfo.status}" ` + `means Playwright will discard the file and we'd be violating user ` + `intent by keeping a copy.`); return; } const persistence = page._dnb.persistence; const persistPromise = (async () => { const tmpPath = path_1.default.join((0, os_1.tmpdir)(), `donobu-test-video-${flowId}.webm`); try { // Video.saveAs blocks until the BrowserContext closes and the .webm // is fully written. Playwright closes the context as part of its own // fixture teardown, AFTER our page fixture body returns — so this // promise unblocks once the test is fully torn down. await video.saveAs(tmpPath); const bytes = await fs_1.promises.readFile(tmpPath); await persistence.setVideo(flowId, bytes); Logger_1.appLogger.info(`Persisted video for flow ${flowId} ` + `(${(bytes.length / 1024).toFixed(0)} KB) — queued for cloud upload.`); } catch (err) { Logger_1.appLogger.warn(`Failed to persist video for flow ${flowId}: ${err.message}`); } finally { await fs_1.promises.unlink(tmpPath).catch(() => undefined); } })(); trackVideoPersist(persistPromise); } /** * Maximum time the worker-scoped post-test-session upload drain will wait * for cloud uploads to complete before letting the worker exit. Bytes that * aren't uploaded within this window stay on disk and are picked up by the * next `donobu` process to start with the same data dir (via the file-upload * cache's stale-claim reclaim). */ const UPLOAD_DRAIN_TIMEOUT_MS = 30_000; exports.test = test_1.test.extend({ /** * Establish a logging scope for the entire Playwright test *before* any other * fixtures run. Playwright builds the fixture dependency graph eagerly, so * user test bodies may start executing outside of the AsyncLocalStorage * context seeded later inside the `page` fixture. By creating an auto fixture * that runs first, we can: * 1. Generate a stable flow ID for the test. * 2. Enter the logging context immediately so any downstream fixture or * test body log sees the ID. * 3. Store the ID in a process-local fallback so synchronous logs emitted * outside ALS-aware code still carry the same identifier. * * Every other fixture (e.g., `page`) consumes this `flowLoggingContext`. */ flowLoggingContext: [ async ({}, use) => { // NOTE: // It would be *nice* if we could use 'testInfo.testId' for the flowId but // that is a trap! A Playwright testId is deterministic and Playwright keeps // it constant across retries, shards, and even repeated invocations of the // same test. If we reused testInfo.testId, every rerun would reuse the same // Donobu flow ID, so metadata would overwrite and concurrent executions could // clobber each other. const flowId = (0, crypto_1.randomUUID)(); const logBuffer = new FlowLogBuffer_1.FlowLogBuffer(); const flowContext = { flowId, logBuffer }; const asyncScope = new async_hooks_1.AsyncResource('DonobuFlowContext'); await Logger_1.loggingContext.run(flowContext, async () => { Logger_1.loggingContext.enterWith(flowContext); (0, Logger_1.setProcessLocalFlowId)(flowId); (0, Logger_1.setProcessLocalLogBuffer)(logBuffer); try { await asyncScope.runInAsyncScope(() => use({ flowId, logBuffer })); } finally { (0, Logger_1.setProcessLocalFlowId)(null); (0, Logger_1.setProcessLocalLogBuffer)(null); } }); }, { scope: 'test', auto: true }, ], /** * Auto fixture that monkey-patches `testInfo._addStep` so each Playwright * step in the `pw:api` (clicks, fills, navigations) and `expect` * (assertions) categories emits an `appLogger.info` entry on completion. * Routing native Playwright operations through the same per-flow * `FlowLogBuffer` that AI tool-call logs populate makes the live * `/api/flows/:id/logs` feed and persisted `logs.json` capture meaningful * step-by-step data even for non-AI Playwright tests. * * Depends on `flowLoggingContext` so the ALS / `processLocalLogBuffer` are * already set up by the time our patched `complete` fires. * * Verified against @playwright/test 1.57. `_addStep` and the TestStepImpl * shape (category, title, params, wallTime, endWallTime, error, complete) * are private Playwright internals — install and per-step logging are * wrapped in try/catch so any shape drift degrades to a no-op rather than * failing the test. */ pwApiStepLogger: [ async ({ flowLoggingContext: _ }, use, testInfo) => { const teardown = installPlaywrightStepLogger(testInfo); try { await use(); } finally { teardown(); } }, { scope: 'test', auto: true }, ], /** * Drain in-flight file uploads (videos, screenshots, etc.) to Donobu * Cloud at the end of every Playwright worker's lifetime. * * The {@link FileUploadWorker} runs continuously while tests execute, * uploading flow artifacts to the cloud asynchronously. When the worker * process exits, any uploads still mid-flight are abandoned at whatever * stage they reached. The bytes survive on disk as `.uploading.<token>` * markers, but for ephemeral CI runners — where the box gets torn down * after the test session — there's no future process to resume them. * * This auto fixture closes that gap by waiting for the queue to drain * before letting the worker exit. */ donobuFileUploadDrainGuard: [ async ({}, use) => { await use(); // ── Teardown — runs ONCE per Playwright worker, after every test ── // First, wait for any deferred video persists from finalizeTest to // complete. Each one is gated on a BrowserContext close (Playwright // finalizes the .webm at that point) and then forwards the bytes to // setVideo. Without this wait, fast tests would beat the // saveAs-after-close to the punch and the worker would exit // mid-write — meaning the cloud upload below would never be queued // for the missing video. const videoWait = await waitForPendingVideoPersists(VIDEO_PERSIST_TIMEOUT_MS); if (videoWait.initial > 0) { if (videoWait.timedOut) { Logger_1.appLogger.warn(`donobuFileUploadDrainGuard: ${videoWait.remaining} of ` + `${videoWait.initial} video persist(s) still pending after ` + `${Math.round(VIDEO_PERSIST_TIMEOUT_MS / 1000)}s; proceeding ` + `with upload drain anyway. Those video files (if any) will ` + `not be uploaded this session.`); } else { Logger_1.appLogger.info(`donobuFileUploadDrainGuard: completed ${videoWait.initial} ` + `pending video persist(s) before upload drain.`); } } let initialActive; try { const status = await (0, fileUploadWorkerRegistry_1.getFileUploadAggregateStatus)(); initialActive = status.totalPending + status.totalInFlight; } catch (err) { Logger_1.appLogger.warn(`donobuFileUploadDrainGuard: failed to read upload status; ` + `skipping drain. Error: ${err.message}`); return; } if (initialActive === 0) { Logger_1.appLogger.debug('donobuFileUploadDrainGuard: no pending file uploads at end of test ' + 'session; worker is exiting.'); return; } Logger_1.appLogger.info(`donobuFileUploadDrainGuard: test session complete; ${initialActive} ` + `file upload(s) still syncing to Donobu Cloud. ` + `Waiting up to ${Math.round(UPLOAD_DRAIN_TIMEOUT_MS / 1000)}s for ` + `the queue to drain before exit. ` + `(Any uploads not finished within the timeout stay on disk and ` + `resume on the next \`donobu\` process start.)`); const startedAt = Date.now(); let result; try { result = await (0, fileUploadWorkerRegistry_1.shutdownFileUploadWorkers)(UPLOAD_DRAIN_TIMEOUT_MS); } catch (err) { Logger_1.appLogger.warn(`donobuFileUploadDrainGuard: drain threw; letting worker exit. ` + `Error: ${err.message}`); return; } const elapsedMs = Date.now() - startedAt; if (result.drained) { Logger_1.appLogger.info(`donobuFileUploadDrainGuard: all ${initialActive} file upload(s) ` + `completed in ${Math.round(elapsedMs / 1000)}s. Worker exiting.`); } else { Logger_1.appLogger.warn(`donobuFileUploadDrainGuard: drain timed out after ` + `${Math.round(elapsedMs / 1000)}s with ${result.totalRemaining} ` + `upload(s) still pending. The bytes are on disk and will resume ` + `the next time a \`donobu\` process starts with the same data ` + `directory. Worker exiting now.`); } }, { scope: 'worker', auto: true }, ], // 1) Declare `gptClient` as an "option" fixture with a default of `undefined`. gptClient: [ undefined, // default { option: true }, // so that test.use({ gptClient: ... }) can override ], // Override the default page fixture page: async ({ page, gptClient, headless, flowLoggingContext, video }, use, testInfo) => { Logger_1.appLogger.info(`Test started: "${testInfo.title}"`); const overallObjective = testInfo.annotations.find((v) => v.type === 'objective')?.description ?? null; const visualCueDurationMs = v4_1.z .int() .optional() .default(250) .parse(testInfo.project.metadata['visualCueDurationMs']); const { flowId, logBuffer } = flowLoggingContext; const extendedPage = await (0, extendPage_1.extendPage)(page, { flowId: flowId, visualCueDurationMs: visualCueDurationMs, cacheFilepath: (0, cacheLocator_1.buildPageAiCachePath)(testInfo.file), envVars: (0, DonobuFlowsManager_1.distillAllowedEnvVariableNames)(overallObjective, testInfo.annotations .filter((a) => a.type === 'ENV' && a.description) .map((a) => a.description)), gptClient: gptClient, headless: headless, }); extendedPage._dnb.donobuFlowMetadata.name = getSanitizedTestName(testInfo); extendedPage._dnb.donobuFlowMetadata.testId = testInfo.testId; extendedPage._dnb.donobuFlowMetadata.overallObjective = overallObjective; // Register browser console and network listeners so that logs from these // sources are captured into the flow's logBuffer. In Studio-launched flows // this is done by WebTargetInspector.initialize(), but that method is not // called during test runs, so we wire the listeners up here directly. (0, PageLogListeners_1.registerPageLogListeners)(page); // Bind the Playwright-provided `use` callback to an async resource so that // any microtasks scheduled inside the test body keep the flow logging // context. Without this, Playwright may re-use earlier async resources that // were created before we entered the logging scope. const asyncScope = new async_hooks_1.AsyncResource('DonobuTestContext'); const boundUse = asyncScope.bind(use); try { try { const testMetadata = flowMetadataToTestMetadata(testInfo.testId, extendedPage._dnb.donobuFlowMetadata); await extendedPage._dnb.testsPersistence.createTest(testMetadata); } catch { // Test already exists from a prior run — nothing to do. } await extendedPage._dnb.persistence.setFlowMetadata(extendedPage._dnb.donobuFlowMetadata); await boundUse(extendedPage); } catch (error) { testInfo.status = error instanceof test_1.errors.TimeoutError ? 'timedOut' : 'failed'; throw error; } finally { await finalizeTest(extendedPage, testInfo, logBuffer, video); } }, }); /** * Recursively walks Playwright's internal `testInfo._steps` tree and collects * all completed `expect` and `test.step` entries. These are the native * Playwright operations (assertions, named step blocks) that are NOT captured * by the Donobu AI tool call pipeline. * * Uses `testInfo._steps` (an internal Playwright property) rather than a * separate Reporter because: * 1. We already have `testInfo` here — zero infrastructure changes needed. * 2. Avoids all Playwright module-resolution machinery for reporter strings. * 3. `_steps` is fully populated by the time `finalizeTest()` runs (the * test body has already completed). */ function collectNativeSteps(rawSteps, startTimes) { const result = []; for (const step of rawSteps) { const cat = step.category ?? ''; const childRaw = Array.isArray(step.steps) ? step.steps : []; const collectedChildren = collectNativeSteps(childRaw, startTimes); if (cat === 'expect' || cat === 'test.step') { // Start time comes from the onStepBegin payload (captured by the // pwApiStepLogger fixture into startTimes). If unavailable, fall // back to the step's endWallTime so the window is zero-width and // sibling steps don't get falsely nested inside. const endWallTime = step.endWallTime ?? Date.now(); const startWallTime = (typeof step.stepId === 'string' && startTimes.get(step.stepId)) || endWallTime; result.push({ title: step.title ?? '', category: cat, startWallTime, endWallTime, passed: !step.error, error: step.error ? { message: step.error.message, stack: step.error.stack } : undefined, location: step.location ? { file: step.location.file ?? step.location.fileName ?? '', line: step.location.line ?? 0, column: step.location.column ?? 0, } : undefined, children: collectedChildren, }); } else { // Parent is not a kept category (e.g. pw:api) — promote any qualifying // descendants so a nested expect() still appears in the report. result.push(...collectedChildren); } } result.sort((a, b) => a.startWallTime - b.startWallTime); return result; } // --------------------------------------------------------------------------- // Live Playwright step → appLogger pipeline // --------------------------------------------------------------------------- // Used by the `pwApiStepLogger` fixture above to surface native Playwright // operations (clicks, fills, navigations, expects) in the same per-flow // FlowLogBuffer that AI tool-call logs already populate. const LOGGED_STEP_CATEGORIES = new Set([ 'pw:api', 'expect', ]); const STEP_PARAM_ALLOWLIST = [ 'url', 'selector', 'text', 'value', 'timeout', 'name', 'key', // For expect steps Playwright stores the asserted value here // (e.g., `expect(x).toBe('$51.00')` → params: { expected: '$51.00' }). 'expected', ]; /** * Pull a small, JSON-safe subset of `step.params` for logging. Playwright's * step.params can hold non-serializable values (Locator handles, Page refs, * etc.) that would either cycle or bloat the log feed; we keep only known * primitive fields. */ function pickStepParams(rawParams) { if (!rawParams || typeof rawParams !== 'object') { return undefined; } const result = {}; let hasAny = false; for (const key of STEP_PARAM_ALLOWLIST) { const value = rawParams[key]; if (value === undefined) { continue; } const t = typeof value; if (t === 'string' || t === 'number' || t === 'boolean') { result[key] = value; hasAny = true; } } return hasAny ? result : undefined; } const NODE_MODULES_LOCATION_REGEX = /[\\/]node_modules[\\/]/; function emitPlaywrightStepLog(step) { const category = step?.category ?? ''; if (!LOGGED_STEP_CATEGORIES.has(category)) { return; } // Drop steps that originated inside a third-party (or Donobu library) // package. The user's test code never lives in node_modules, so any step // attributed there is plumbing — Donobu's own AssertTool, internal CDP // wrappers, etc. — not the user's intent. const locationFile = (step.location && typeof step.location === 'object' && typeof (step.location.file ?? step.location.fileName) === 'string' && (step.location.file ?? step.location.fileName)) || ''; if (locationFile && NODE_MODULES_LOCATION_REGEX.test(locationFile)) { return; } const title = step.title ?? ''; const startedAt = typeof step.wallTime === 'number' ? step.wallTime : undefined; const completedAt = typeof step.endWallTime === 'number' ? step.endWallTime : undefined; const durationMs = startedAt !== undefined && completedAt !== undefined ? completedAt - startedAt : undefined; const error = step.error && typeof step.error === 'object' ? { name: typeof step.error.name === 'string' ? step.error.name : 'Error', // Playwright matchers emit ANSI-colored diff output. Strip it so // raw escape sequences don't leak into the log feed. message: (0, ansi_1.stripAnsi)(typeof step.error.message === 'string' ? step.error.message : ''), } : undefined; const location = step.location && typeof step.location === 'object' ? { file: step.location.file ?? step.location.fileName ?? '', line: step.location.line ?? 0, column: step.location.column ?? 0, } : undefined; const params = pickStepParams(step.params); // Default message: Playwright's title verbatim, plus a primitive // `expected` value for matchers Playwright didn't already inline. let message = `${title}${formatExpectedSuffix(title, params?.expected)}`; // For expect steps, prefer the actual user source line at // location.file:line — `Expect "toBe"` is too bare; the source shows // both the LHS being asserted and the matcher together // (`expect(text).toBe('$51.01')`). Fall back to the matcher message if // the source can't be read. if (category === 'expect' && location?.file && location.line > 0) { const sourceLine = readSourceLine(location.file, location.line); if (sourceLine) { message = sourceLine; } } const meta = { category, title, params, durationMs, startedAt, completedAt, error, location, }; // A populated `step.error` means Playwright reported the step as failed — // surface it at error level so it stands out in the feed. if (error) { Logger_1.appLogger.error(message, meta); } else { Logger_1.appLogger.info(message, meta); } } function formatExpectedSuffix(title, expected) { if (expected === undefined || expected === null) { return ''; } const t = typeof expected; if (t !== 'string' && t !== 'number' && t !== 'boolean') { return ''; } const formatted = JSON.stringify(expected); if (title.includes(formatted)) { return ''; } return ` ${formatted}`; } /** * Per-worker cache of source file contents (split into lines), keyed by * absolute file path. Source files don't change during a worker's * lifetime, so one read per file is sufficient. * * `null` means a previous read failed; we cache that to avoid retrying. */ const sourceLineCache = new Map(); /** Reset the source-line cache. Test-only. */ function _resetSourceLineCacheForTests() { sourceLineCache.clear(); } /** * Read the trimmed line at `file:line` (1-based), with a per-worker * cache. Drops a trailing semicolon for cleanliness. Returns undefined * if the file or line can't be read. */ function readSourceLine(file, line) { let lines = sourceLineCache.get(file); if (lines === undefined) { try { lines = (0, fs_1.readFileSync)(file, 'utf8').split('\n'); } catch { lines = null; } sourceLineCache.set(file, lines); } if (!lines) { return undefined; } const raw = lines[line - 1]; if (typeof raw !== 'string') { return undefined; } let trimmed = raw.trim(); if (trimmed.endsWith(';')) { trimmed = trimmed.slice(0, -1); } return trimmed || undefined; } /** * Monkey-patch `testInfo._callbacks.onStepEnd` so each Playwright step in * the `pw:api` / `expect` categories emits an `appLogger.info` entry once * Playwright signals the step has finished. Returns a teardown function * that restores the original callback. * * Why patch `onStepEnd` and not `_addStep`: * Playwright captures the step's source location with a stack walk inside * `_addStep` (see playwright/lib/worker/testInfo.js). Patching `_addStep` * inserts a Donobu library frame into that stack, and Playwright's * built-in filter only excludes `@playwright/test` / `playwright-core` * paths — so for steps that don't pre-set `data.location` (most notably * `expect`), the captured location lands inside our wrapper in * `node_modules/donobu/...`. The downstream node_modules filter would * then drop the entry. `_callbacks.onStepEnd` runs AFTER the stack * capture, so this hook is invisible to Playwright's location resolution. * * Verified against @playwright/test 1.57+. `_callbacks` / `_stepMap` and * the step shape (category, title, params, endWallTime, error, location) * are private Playwright internals — install and per-step logging are * wrapped in try/catch so any shape drift degrades to a no-op rather than * failing the test. * * Exported for unit testing. */ function installPlaywrightStepLogger(testInfo) { const ti = testInfo; const callbacks = ti._callbacks; const stepMap = ti._stepMap; const originalOnStepEnd = callbacks && typeof callbacks.onStepEnd === 'function' ? callbacks.onStepEnd : null; const originalOnStepBegin = callbacks && typeof callbacks.onStepBegin === 'function' ? callbacks.onStepBegin : null; if (!callbacks || !originalOnStepEnd || !stepMap) { return () => { }; } // Stash a stepId -> wallTime map on testInfo so collectNativeSteps can // look up start times when building the report hierarchy. Playwright // doesn't store wallTime on the step object itself — it's only emitted // via the onStepBegin payload — so we capture it here. Without this, // `test.step` blocks have no recoverable start time and any preceding // tool-call step gets falsely nested inside them. const startTimes = new Map(); ti.__donobuStepStartTimes = startTimes; let installed = false; try { callbacks.onStepEnd = function patchedOnStepEnd(payload) { const ret = originalOnStepEnd.call(this, payload); try { const stepId = payload?.stepId; if (typeof stepId === 'string') { const step = stepMap.get(stepId); if (step && LOGGED_STEP_CATEGORIES.has(step.category)) { emitPlaywrightStepLog(step); } } } catch (err) { Logger_1.appLogger.debug('Failed to log Playwright step', err); } return ret; }; callbacks.onStepBegin = function patchedOnStepBegin(payload) { const ret = originalOnStepBegin ? originalOnStepBegin.call(this, payload) : undefined; try { if (typeof payload?.stepId === 'string' && typeof payload?.wallTime === 'number') { startTimes.set(payload.stepId, payload.wallTime); } } catch (err) { Logger_1.appLogger.debug('Failed to record Playwright step start time', err); } return ret; }; installed = true; } catch (err) { Logger_1.appLogger.warn('Failed to install Playwright step logger; native Playwright steps will not appear in flow logs', err); } return () => { if (installed) { callbacks.onStepEnd = originalOnStepEnd; if (originalOnStepBegin) { callbacks.onStepBegin = originalOnStepBegin; } } }; } /** * Fetch tool-call screenshots from the flow's persistence layer and attach * them to the Playwright report. Each screenshot is attached as * `donobu-step-{index}-{toolName}` so the HTML report generator can * correlate them to tool calls. A companion JSON attachment * (`donobu-step-summary`) provides the tool call metadata. */ async function attachStepScreenshots(sharedState, testInfo) { const flowId = sharedState.donobuFlowMetadata.id; let toolCalls; try { toolCalls = await sharedState.persistence.getToolCalls(flowId); } catch { // Flow may not have any tool calls (e.g., non-AI tests) return; } if (toolCalls.length === 0) { return; } const stepSummary = []; for (let i = 0; i < toolCalls.length; i++) { const tc = toolCalls[i]; if (!tc.outcome || tc.completedAt === null) { continue; } const hasScreenshot = !!tc.postCallImageId; stepSummary.push({ index: i, toolName: tc.toolName, page: tc.page, startedAt: tc.startedAt, completedAt: tc.completedAt, success: tc.outcome.isSuccessful, summary: tc.outcome.forLlm?.slice(0, 200) ?? '', hasScreenshot, parameters: tc.parameters, outcome: { isSuccessful: tc.outcome.isSuccessful, forLlm: tc.outcome.forLlm, }, metadata: tc.outcome.metadata, }); if (hasScreenshot) { try { const bytes = await sharedState.persistence.getScreenShot(flowId, tc.postCallImageId); if (bytes) { const isJpeg = bytes[0] === 0xff && bytes[1] === 0xd8 && bytes[2] === 0xff; const ext = isJpeg ? 'jpg' : 'png'; // Write the bytes to a temporary file so we can attach by `path` // rather than `body`. Attaching by body would keep the bytes in // memory and pass them through to reporters, which then base64 // them into the HTML — the very thing we're trying to avoid. By // contrast `attach({ path })` makes Playwright copy the file into // `<outputDir>/attachments/<sha1>.<ext>` and only carry the path. // We unlink our temp file afterwards so the test-results tree // doesn't keep a duplicate next to the attachments folder. const filePath = testInfo.outputPath(`donobu-step-${i}-${tc.toolName}.${ext}`); await fs_1.promises.writeFile(filePath, bytes); try { await testInfo.attach(`donobu-step-${i}-${tc.toolName}`, { path: filePath, contentType: isJpeg ? 'image/jpeg' : 'image/png', }); } finally { await fs_1.promises.unlink(filePath).catch(() => { /* best-effort cleanup; attach already copied the bytes */ }); } } } catch { Logger_1.appLogger.debug(`Could not retrieve step screenshot ${tc.postCallImageId} for step ${i}`); } } } await testInfo.attach('donobu-step-summary', { body: JSON.stringify(stepSummary), contentType: 'application/json', }); } async function finalizeTest(page, testInfo, logBuffer, videoOption) { const sharedState = page._dnb; // Kick off video persistence early in teardown. The actual file copy is // deferred (it can't run until the BrowserContext closes, which happens // AFTER our fixture body returns), so this just queues a tracked promise. // Doing it before the rest of finalizeTest's work means the deferred // saveAs has already-finalized references to flowId / persistence / video // by the time it runs, regardless of what subsequent teardown does. if (videoOption !== undefined) { persistVideoIfApplicable(page, testInfo, videoOption); } try { sharedState.donobuFlowMetadata.state = testInfo.status === 'failed' || testInfo.status === 'timedOut' ? 'FAILED' : 'SUCCESS'; sharedState.donobuFlowMetadata.completedAt = new Date().getTime(); await sharedState.persistence.setFlowMetadata(sharedState.donobuFlowMetadata); await testInfo.attach('test-flow-metadata.json', { body: JSON.stringify(sharedState.donobuFlowMetadata, null, 2), contentType: 'application/json', }); // Persist captured flow logs so they are available in the Donobu UI, // mirroring what DonobuFlowsManager does for Studio-launched flows. if (logBuffer) { try { const snapshot = logBuffer.snapshot(); await sharedState.persistence.setFlowFile(sharedState.donobuFlowMetadata.id, 'logs.json', Buffer.from(JSON.stringify(snapshot))); } catch (error) { Logger_1.appLogger.error('Failed to persist flow logs:', error); } } // Attach step-level screenshots from the flow's tool call history. // These enable the HTML report to show a visual timeline of what the // AI agent saw at each step. await attachStepScreenshots(sharedState, testInfo); // Attach native Playwright steps (expect assertions, test.step blocks) // so the HTML report can show a unified timeline alongside AI tool calls. try { const startTimes = testInfo.__donobuStepStartTimes ?? new Map(); const nativeSteps = collectNativeSteps(testInfo._steps ?? [], startTimes); if (nativeSteps.length > 0) { await testInfo.attach('donobu-native-steps', { body: JSON.stringify(nativeSteps), contentType: 'application/json', }); } } catch { // Non-fatal: native step collection failing must not affect the test result. } // Attach AI invocation wrappers (page.ai / page.ai.assert / page.ai.locate) // so the HTML reporter can render each as a parent node containing the // tool calls and native steps that fell inside its time window. The // `cacheHit` flag drives the `[cached]` badge on the wrapper itself — // not on inner actions, since a cached `page.ai` may legitimately invoke // a live `page.ai.assert` and vice versa. try { const aiInvocations = sharedState.aiInvocations; if (aiInvocations.length > 0) { await testInfo.attach('donobu-ai-invocations', { body: JSON.stringify(aiInvocations), contentType: 'application/json', }); } } catch { // Non-fatal. } const browserState = await BrowserUtils_1.BrowserUtils.getBrowserStorageState(page.context()); await sharedState.persistence.setBrowserState(sharedState.donobuFlowMetadata.id, browserState); } catch (error) { Logger_1.appLogger.error(`Error during cleanup for test ${testInfo.title}:`, error); } if (testInfo.status === 'failed') { if (isV1Test(testInfo)) { if (isV1SelfHealingEnabled(testInfo) && !MiscUtils_1.MiscUtils.yn(envVars_1.env.data.DONOBU_AUTO_HEAL_ACTIVE)) { if (!sharedState.gptClient) { Logger_1.appLogger.warn('Will not self-heal due to no GPT client being set up.'); } else { try { await (0, selfHealing_1.selfHeal)(sharedState.gptClient, testInfo, page); } catch (error) { Logger_1.appLogger.error('Error when attempting to self-heal', error); } } } } else { try { const evidenceResult = await (0, triageTestFailure_1.gatherTestFailureEvidence)(testInfo, page); if (evidenceResult?.filePath) { Logger_1.appLogger.info(`Persisted Donobu triage evidence for "${testInfo.title}" to ${evidenceResult.filePath}.`); } else if (evidenceResult?.evidence) { Logger_1.appLogger.info(`Captured Donobu triage evidence for "${testInfo.title}" (schema v${evidenceResult.evidence.schemaVersion}).`); } } catch (error) { Logger_1.appLogger.error(`Failed to gather test failure evidence for "${testInfo.title}".`, error); } } } else if (testInfo.status === 'passed' && MiscUtils_1.MiscUtils.yn(envVars_1.env.data.DONOBU_AUTO_HEAL_ACTIVE)) { const hasSelfHealedAnnotation = testInfo.annotations.some((annotation) => annotation.type === 'self-healed'); if (!hasSelfHealedAnnotation) { testInfo.annotations.push({ type: 'self-healed', description: 'Automatically healed by Donobu auto-heal rerun.', }); } } // Flush any page.tbd() sessions: replace the tbd() call sites in the // source files with the generated Playwright code for the recorded // user interactions. if (page._dnb.tbdSessions.length > 0) { try { const count = await (0, tbd_1.flushTbdSessions)(page._dnb.tbdSessions); if (count > 0) { Logger_1.appLogger.info(`tbd: replaced ${count} page.tbd() placeholder(s) with recorded actions`); } } catch (error) { Logger_1.appLogger.error('tbd: failed to flush replacements', error); } } Logger_1.appLogger.info(`Test ended: "${testInfo.title}" — status: ${testInfo.status}`); } /** * A "v1" test is one that was generated by the Donobu app and * has an overall object defined as a test annotation. */ function isV1Test(testInfo) { return testInfo.annotations.find((v) => v.type === 'objective') ? true : false; } function isV1SelfHealingEnabled(testInfo) { if (envVars_1.env.keys.SELF_HEAL_TESTS_ENABLED in testInfo.project.metadata) { // Check project-specific metadata. This takes priority if it exists. const isSelfHealEnabledForProject = MiscUtils_1.MiscUtils.yn(testInfo.project.metadata[envVars_1.env.keys.SELF_HEAL_TESTS_ENABLED]) ?? false; return isSelfHealEnabledForProject; } else { // Failover to checking the environment variable. const isSelfHealEnabledViaEnvVar = MiscUtils_1.MiscUtils.yn(envVars_1.env.data.SELF_HEAL_TESTS_ENABLED?.trim()) ?? false; return isSelfHealEnabledViaEnvVar; } } /** * Returns a reasonably unique name for the current test, while also capping the * length to the maximum number of characters allowed by underlying Donobu system. */ function getSanitizedTestName(testInfo) { return testInfo.titlePath .join(' > ') .slice(-DonobuFlowsManager_1.DonobuFlowsManager.FLOW_NAME_MAX_LENGTH); } /** Builds a TestMetadata from the fields available on a FlowMetadata. */ function flowMetadataToTestMetadata(testId, flowMeta) { return { id: testId, name: flowMeta.name, target: flowMeta.target, web: flowMeta.web, envVars: flowMeta.envVars, customTools: flowMeta.customTools, overallObjective: flowMeta.overallObjective, allowedTools: flowMeta.allowedTools, resultJsonSchema: flowMeta.resultJsonSchema, callbackUrl: flowMeta.callbackUrl, maxToolCalls: flowMeta.maxToolCalls, suiteId: null, nextRunMode: 'DETERMINISTIC', provenance: (0, buildProvenance_1.buildProvenance)('CODE'), }; } //# sourceMappingURL=testExtension.js.map