UNPKG

magnitude-test

Version:

A TypeScript client for running automated UI tests through the Magnitude testing platform

759 lines (744 loc) 26 kB
import { l as logger, V as VERSION, p as processUrl, a as addProtocolIfMissing } from './version-DtiDJ6_4.mjs'; import { parentPort, workerData as workerData$2 } from 'node:worker_threads'; import { isBun } from 'std-env'; import EventEmitter$1 from 'node:events'; import cuid2 from '@paralleldrive/cuid2'; import { BrowserAgent, AgentError, getMachineId, getCodebaseId, posthog, buildDefaultBrowserAgentOptions } from 'magnitude-core'; import z from 'zod'; import 'pino'; import 'node:module'; if (!globalThis.__magnitudeTestFunctions) { globalThis.__magnitudeTestFunctions = /* @__PURE__ */ new Map(); } const testFunctions = globalThis.__magnitudeTestFunctions; if (!globalThis.__magnitudeMessageEmitter) { globalThis.__magnitudeMessageEmitter = new EventEmitter$1(); } const messageEmitter = globalThis.__magnitudeMessageEmitter; if (!globalThis.__magnitudeTestHooks) { globalThis.__magnitudeTestHooks = { beforeAll: [], afterAll: [], beforeEach: [], afterEach: [] }; } const hooks = globalThis.__magnitudeTestHooks; function postToParent(message) { if (isBun) { if (typeof process.send !== "function") { throw new Error("Not running in a Bun subprocess with IPC"); } process.send(message); return; } if (!parentPort) throw new Error("Not running in a worker thread"); parentPort.postMessage(message); } function getTestWorkerData() { if (isBun) { const dataStr = process.env.MAGNITUDE_WORKER_DATA; if (!dataStr) { throw new Error("Worker data not found in environment"); } return JSON.parse(dataStr); } if (!parentPort) { throw new Error("Do not use this module on the main thread"); } return workerData$2; } function getDefaultExportFromCjs (x) { return x && x.__esModule && Object.prototype.hasOwnProperty.call(x, 'default') ? x['default'] : x; } var eventemitter3 = {exports: {}}; var hasRequiredEventemitter3; function requireEventemitter3 () { if (hasRequiredEventemitter3) return eventemitter3.exports; hasRequiredEventemitter3 = 1; (function (module) { var has = Object.prototype.hasOwnProperty, prefix = "~"; function Events() { } if (Object.create) { Events.prototype = /* @__PURE__ */ Object.create(null); if (!new Events().__proto__) prefix = false; } function EE(fn, context, once2) { this.fn = fn; this.context = context; this.once = once2 || false; } function addListener(emitter, event, fn, context, once2) { if (typeof fn !== "function") { throw new TypeError("The listener must be a function"); } var listener = new EE(fn, context || emitter, once2), evt = prefix ? prefix + event : event; if (!emitter._events[evt]) emitter._events[evt] = listener, emitter._eventsCount++; else if (!emitter._events[evt].fn) emitter._events[evt].push(listener); else emitter._events[evt] = [emitter._events[evt], listener]; return emitter; } function clearEvent(emitter, evt) { if (--emitter._eventsCount === 0) emitter._events = new Events(); else delete emitter._events[evt]; } function EventEmitter() { this._events = new Events(); this._eventsCount = 0; } EventEmitter.prototype.eventNames = function eventNames() { var names = [], events, name; if (this._eventsCount === 0) return names; for (name in events = this._events) { if (has.call(events, name)) names.push(prefix ? name.slice(1) : name); } if (Object.getOwnPropertySymbols) { return names.concat(Object.getOwnPropertySymbols(events)); } return names; }; EventEmitter.prototype.listeners = function listeners(event) { var evt = prefix ? prefix + event : event, handlers = this._events[evt]; if (!handlers) return []; if (handlers.fn) return [handlers.fn]; for (var i = 0, l = handlers.length, ee = new Array(l); i < l; i++) { ee[i] = handlers[i].fn; } return ee; }; EventEmitter.prototype.listenerCount = function listenerCount(event) { var evt = prefix ? prefix + event : event, listeners2 = this._events[evt]; if (!listeners2) return 0; if (listeners2.fn) return 1; return listeners2.length; }; EventEmitter.prototype.emit = function emit(event, a1, a2, a3, a4, a5) { var evt = prefix ? prefix + event : event; if (!this._events[evt]) return false; var listeners2 = this._events[evt], len = arguments.length, args, i; if (listeners2.fn) { if (listeners2.once) this.removeListener(event, listeners2.fn, void 0, true); switch (len) { case 1: return listeners2.fn.call(listeners2.context), true; case 2: return listeners2.fn.call(listeners2.context, a1), true; case 3: return listeners2.fn.call(listeners2.context, a1, a2), true; case 4: return listeners2.fn.call(listeners2.context, a1, a2, a3), true; case 5: return listeners2.fn.call(listeners2.context, a1, a2, a3, a4), true; case 6: return listeners2.fn.call(listeners2.context, a1, a2, a3, a4, a5), true; } for (i = 1, args = new Array(len - 1); i < len; i++) { args[i - 1] = arguments[i]; } listeners2.fn.apply(listeners2.context, args); } else { var length = listeners2.length, j; for (i = 0; i < length; i++) { if (listeners2[i].once) this.removeListener(event, listeners2[i].fn, void 0, true); switch (len) { case 1: listeners2[i].fn.call(listeners2[i].context); break; case 2: listeners2[i].fn.call(listeners2[i].context, a1); break; case 3: listeners2[i].fn.call(listeners2[i].context, a1, a2); break; case 4: listeners2[i].fn.call(listeners2[i].context, a1, a2, a3); break; default: if (!args) for (j = 1, args = new Array(len - 1); j < len; j++) { args[j - 1] = arguments[j]; } listeners2[i].fn.apply(listeners2[i].context, args); } } } return true; }; EventEmitter.prototype.on = function on(event, fn, context) { return addListener(this, event, fn, context, false); }; EventEmitter.prototype.once = function once(event, fn, context) { return addListener(this, event, fn, context, true); }; EventEmitter.prototype.removeListener = function removeListener(event, fn, context, once2) { var evt = prefix ? prefix + event : event; if (!this._events[evt]) return this; if (!fn) { clearEvent(this, evt); return this; } var listeners2 = this._events[evt]; if (listeners2.fn) { if (listeners2.fn === fn && (!once2 || listeners2.once) && (!context || listeners2.context === context)) { clearEvent(this, evt); } } else { for (var i = 0, events = [], length = listeners2.length; i < length; i++) { if (listeners2[i].fn !== fn || once2 && !listeners2[i].once || context && listeners2[i].context !== context) { events.push(listeners2[i]); } } if (events.length) this._events[evt] = events.length === 1 ? events[0] : events; else clearEvent(this, evt); } return this; }; EventEmitter.prototype.removeAllListeners = function removeAllListeners(event) { var evt; if (event) { evt = prefix ? prefix + event : event; if (this._events[evt]) clearEvent(this, evt); } else { this._events = new Events(); this._eventsCount = 0; } return this; }; EventEmitter.prototype.off = EventEmitter.prototype.removeListener; EventEmitter.prototype.addListener = EventEmitter.prototype.on; EventEmitter.prefixed = prefix; EventEmitter.EventEmitter = EventEmitter; { module.exports = EventEmitter; } } (eventemitter3)); return eventemitter3.exports; } var eventemitter3Exports = requireEventemitter3(); var EventEmitter = /*@__PURE__*/getDefaultExportFromCjs(eventemitter3Exports); const CHECK_INSTRUCTIONS = ` Given the actions of an LLM agent executing a test case, and a screenshot taken afterwards, evaluate whether the provided check "passes" i.e. holds true or not. Check to evaluate: `.trim(); class TestCaseAgent extends BrowserAgent { //public readonly events: EventEmitter<TestCaseAgentEvents>; checkEvents = new EventEmitter(); //declare public readonly events: EventEmitter<TestCaseAgentEvents>; //declare public readonly events: EventEmitter<TestCaseAgentEvents>; //public readonly events: EventEmitter<TestCaseAgentEvents> = new EventEmitter(); // constructor(options: any /* your constructor options */) { // super(options); // // 3. Point this new property to the existing emitter from the parent. // // We cast `super.events` to tell TypeScript "Trust me, this emitter // // will also handle TestCaseAgentEvents". This is the one and only // // "lie" we have to tell the compiler to make everything else work. // this.events = super.events as unknown as EventEmitter<TestCaseAgentEvents>; // } async check(description) { const instructions = CHECK_INSTRUCTIONS + ` <check>${description}</check>`; this.checkEvents.emit("checkStarted", description); const response = await this.query(instructions, z.object({ reasoning: z.string(), passed: z.boolean() })); this.memory.recordThought(response.reasoning); this.checkEvents.emit("checkDone", description, response.passed); if (!response.passed) throw new AgentError(`Check failed: ${description}`, { variant: "check_failed" }); } } class TestStateTracker { /** * Watches agent events and uses that to construct and update a test state */ // Handles a lot of state tracking but not result/failure/done because the agent itself // doesn't know when done, that's known when the test fn finishes. agent; state; // ref to last step, handy for attaching actions to it // private lastStep: StepDescriptor | null = null; // private lastCheck: CheckDescriptor | null = null; lastStepOrCheck = null; events; constructor(agent) { this.agent = agent; this.state = { status: "pending", // Initialize status stepsAndChecks: [], modelUsage: [] // macroUsage: { provider: 'example', model: 'example', inputTokens: 0, outputTokens: 0, numCalls: 0 }, //agent.getMacro().getInfo(), // microUsage: { provider: 'example', numCalls: 0 } //agent.getMicro().getInfo() }; this.agent.events.on("start", this.onStart, this); this.agent.events.on("stop", this.onStop, this); this.agent.events.on("actStarted", this.onActStarted, this); this.agent.events.on("actDone", this.onActDone, this); this.agent.events.on("actionStarted", this.onActionStarted, this); this.agent.events.on("actionDone", this.onActionDone, this); this.agent.events.on("tokensUsed", this.onTokensUsed, this); this.agent.checkEvents.on("checkStarted", this.onCheckStarted, this); this.agent.checkEvents.on("checkDone", this.onCheckDone, this); this.events = new EventEmitter(); } // getEvents() { // return this.events; // } getState() { return this.state; } // propagateState() { // this.events.emit('update', this.state); // } onStart() { this.state.startedAt = Date.now(); this.state.status = "running"; this.events.emit("stateChanged", this.state); } onStop() { this.state.doneAt = Date.now(); this.events.emit("stateChanged", this.state); } onTokensUsed(modelUsage) { const modelHash = JSON.stringify(modelUsage.llm); let exists = false; for (const usage of this.state.modelUsage) { const compare = JSON.stringify(usage.llm); if (modelHash === compare) { exists = true; usage.inputTokens += modelUsage.inputTokens; usage.outputTokens += modelUsage.outputTokens; usage.numCalls += 1; } } if (!exists) { this.state.modelUsage.push({ ...modelUsage, numCalls: 1 }); } } onActionStarted(action) { if (!this.lastStepOrCheck || this.lastStepOrCheck.variant !== "step") { throw new Error("Action reported without preceding step"); } this.lastStepOrCheck.actions.push({ action, pretty: this.agent.identifyAction(action).render(action) }); this.events.emit("stateChanged", this.state); } onActionDone(action) { } // onAction(action: ActionDescriptor) { // // TODO: maybe allow detatched actions (e.g. synthetic load at beginning, or manual low-level actions) // if (!this.lastStepOrCheck || this.lastStepOrCheck.variant !== 'step') { // throw new Error('Action reported without preceding step'); // } // this.lastStepOrCheck.actions.push(action); // this.events.emit('update', this.state); // } onActStarted(task) { const stepDescriptor = { variant: "step", description: task, actions: [], status: "running" }; this.state.stepsAndChecks.push(stepDescriptor); this.lastStepOrCheck = stepDescriptor; this.events.emit("stateChanged", this.state); } onActDone(task) { if (!this.lastStepOrCheck || this.lastStepOrCheck.variant !== "step") { throw new Error("Step success without preceding step"); } this.lastStepOrCheck.status = "passed"; this.events.emit("stateChanged", this.state); } // onStepStart(description: string) { // const stepDescriptor: StepDescriptor = { // variant: 'step', // description: description, // actions: [], // status: 'running' // }; // this.state.stepsAndChecks.push(stepDescriptor); // this.lastStepOrCheck = stepDescriptor; // this.events.emit('update', this.state); // } // onStepSuccess() { // if (!this.lastStepOrCheck || this.lastStepOrCheck.variant !== 'step') { // throw new Error('Step success without preceding step'); // } // this.lastStepOrCheck.status = 'passed'; // // Update any LLM metrics // this.state.macroUsage = this.agent.getMacro().getInfo(); // this.state.microUsage = this.agent.getMicro().getInfo(); // this.events.emit('update', this.state); // } onCheckStarted(check) { const checkDescriptor = { variant: "check", description: check, status: "running" }; this.state.stepsAndChecks.push(checkDescriptor); this.lastStepOrCheck = checkDescriptor; this.events.emit("stateChanged", this.state); } onCheckDone(check, passed) { if (!this.lastStepOrCheck || this.lastStepOrCheck.variant !== "check") { throw new Error("Check success reported without preceding check"); } this.lastStepOrCheck.status = passed ? "passed" : "failed"; this.events.emit("stateChanged", this.state); } // onCheckStart(description: string) { // const checkDescriptor: CheckDescriptor = { // variant: 'check', // description: description, // status: 'running' // }; // this.state.stepsAndChecks.push(checkDescriptor); // this.lastStepOrCheck = checkDescriptor; // this.events.emit('update', this.state); // } // onCheckSuccess() { // if (!this.lastStepOrCheck || this.lastStepOrCheck.variant !== 'check') { // throw new Error('Check success reported without preceding check'); // } // this.lastStepOrCheck.status = 'passed'; // // Update any LLM metrics // this.state.macroUsage = this.agent.getMacro().getInfo(); // this.state.microUsage = this.agent.getMicro().getInfo(); // this.events.emit('update', this.state); // } // onFail(failure: FailureDescriptor) { // // if (!this.lastStepOrCheck) { // // throw new Error('Failure reported without preceding step or check'); // // } // if (this.lastStepOrCheck) { // if (failure.variant === 'cancelled') { // this.lastStepOrCheck.status = 'cancelled'; // } else { // this.lastStepOrCheck.status = 'failed'; // } // } // this.state.failure = failure; // this.events.emit('update', this.state); // } } async function sendTelemetry(state) { let numSteps = 0; let numChecks = 0; let browserActionCount = 0; for (const item of state.stepsAndChecks) { if (item.variant === "step") { numSteps += 1; for (const action of item.actions) { if (action.action.variant.startsWith("browser") || action.action.variant.startsWith("keyboard") || action.action.variant.startsWith("mouse")) { browserActionCount += 1; } } } else { numChecks += 1; } } const userId = getMachineId(); const codebaseId = getCodebaseId(); if (codebaseId) { try { posthog.groupIdentify({ groupType: "codebase", groupKey: codebaseId //properties: {} }); } catch (error) { logger.warn(`Failed to identify group: ${error.message}`); } } const payload = { source: "magnitude-test", telemetryVersion: "0.2", packageVersion: VERSION, codebase: codebaseId, startedAt: state.startedAt ?? Date.now(), doneAt: state.doneAt ?? Date.now(), numSteps, numChecks, browserActionCount, modelUsage: state.modelUsage, passed: state.status === "passed", failure: state.failure }; try { posthog.capture({ distinctId: userId, event: "test-run", properties: { ...payload }, ...codebaseId ? { groups: { codebase: codebaseId } } : {} // groups: { // // TODO: derive from git hash (also put it payload too) // codebase: codebaseId // } }); await posthog.shutdown(); } catch (error) { logger.warn(`Failed to send telemetry (may have timed out): ${error.message}`); } } const workerData$1 = getTestWorkerData(); const generateId = cuid2.init({ length: 12 }); function registerTest(testFn, title, url) { const testId = generateId(); testFunctions.set(testId, testFn); postToParent({ type: "registered", test: { id: testId, title, url, filepath: workerData$1.relativeFilePath, group: currentGroup?.name } }); } let beforeAllExecuted = false; let beforeAllError = null; let afterAllExecuted = false; let isShuttingDown = false; let pendingAfterEach = /* @__PURE__ */ new Set(); let currentGroup; function setCurrentGroup(group) { currentGroup = group; } function currentGroupOptions() { return structuredClone(currentGroup?.options) ?? {}; } messageEmitter.removeAllListeners("message"); messageEmitter.on("message", async (message) => { if (message.type === "graceful_shutdown") { isShuttingDown = true; if (pendingAfterEach.size > 0) { try { await Promise.all( [...pendingAfterEach].map(async (_testId) => { for (const afterEachHook of hooks.afterEach) { await afterEachHook(); } }) ); } catch (error) { console.error("afterEach hooks failed during graceful shutdown:", error); } } if (!afterAllExecuted) { try { for (const afterAllHook of hooks.afterAll) { await afterAllHook(); } afterAllExecuted = true; } catch (error) { console.error("afterAll hook failed during graceful shutdown:\n", error); } } postToParent({ type: "graceful_shutdown_complete" }); return; } if (message.type !== "execute") return; if (isShuttingDown) { postToParent({ type: "test_error", testId: message.test.id, error: "Test cancelled due to graceful shutdown" }); return; } const { test, browserOptions, llm, grounding, telemetry } = message; const testFn = testFunctions.get(test.id); if (!testFn) { postToParent({ type: "test_error", testId: test.id, error: `Test function not found: ${test.id}` }); return; } try { const promptStack = testPromptStack[test.title] || []; const prompt = promptStack.length > 0 ? promptStack.join("\n") : void 0; const { agentOptions: defaultAgentOptions, browserOptions: defaultBrowserOptions } = buildDefaultBrowserAgentOptions({ agentOptions: { llm, ...prompt ? { prompt } : {} }, browserOptions: { url: test.url, browser: browserOptions, grounding } }); const agent = new TestCaseAgent({ // disable telemetry to keep test run telemetry seperate from general automation telemetry agentOptions: { ...defaultAgentOptions, telemetry: false }, browserOptions: defaultBrowserOptions }); const tracker = new TestStateTracker(agent); tracker.events.on("stateChanged", (state) => { postToParent({ type: "test_state_change", testId: test.id, state }); }); await agent.start(); let finalState; let finalResult; try { if (!beforeAllExecuted && hooks.beforeAll.length > 0) { try { for (const beforeAllHook of hooks.beforeAll) { await beforeAllHook(); } } catch (error) { console.error("beforeAll hooks failed:", error); beforeAllError = error instanceof Error ? error : new Error(String(error)); } finally { beforeAllExecuted = true; } } if (beforeAllError) { throw new Error(`beforeAll hook failed: ${beforeAllError.message}`); } for (const beforeEachHook of hooks.beforeEach) { try { await beforeEachHook(); } catch (error) { console.error(`beforeEach hook failed for test '${test.title}':`, error); throw error; } } pendingAfterEach.add(test.id); await testFn(agent); if (!isShuttingDown) { pendingAfterEach.delete(test.id); for (const afterEachHook of hooks.afterEach) { try { await afterEachHook(); } catch (error) { console.error(`afterEach hook failed for test '${test.title}':`, error); throw error; } } } finalState = { ...tracker.getState(), status: "passed", doneAt: Date.now() }; finalResult = { passed: true }; } catch (error) { if (!isShuttingDown) { pendingAfterEach.delete(test.id); try { for (const afterEachHook of hooks.afterEach) { await afterEachHook(); } } catch (afterEachError) { console.error(`afterEach hook failed for failing test '${test.title}':`, afterEachError); const originalMessage = error instanceof Error ? error.message : String(error); const afterEachMessage = afterEachError instanceof Error ? afterEachError.message : String(afterEachError); error = new Error(`Test failed: ${originalMessage}. Additionally, afterEach hook failed: ${afterEachMessage}`); } } const failure = { message: error instanceof Error ? error.message : String(error) }; finalState = { ...tracker.getState(), failure, status: "failed", doneAt: Date.now() }; finalResult = { passed: false, failure }; } await agent.stop(); postToParent({ type: "test_state_change", testId: test.id, state: finalState }); if (finalState && (telemetry ?? true)) await sendTelemetry(finalState); postToParent({ type: "test_result", testId: test.id, result: finalResult ?? { passed: false, failure: { message: "Test result doesn't exist" } } }); } catch (error) { postToParent({ type: "test_error", error: error instanceof Error ? error.message : String(error), testId: test.id }); } }); const workerData = getTestWorkerData(); const testPromptStack = {}; function testDecl(title, optionsOrTestFn, testFnOrNothing) { let options; let testFn; if (typeof optionsOrTestFn == "function") { options = {}; testFn = optionsOrTestFn; } else { options = optionsOrTestFn; if (!testFnOrNothing) { throw new Error("Test function is required"); } testFn = testFnOrNothing; } const groupOptions = currentGroupOptions(); const combinedOptions = { ...workerData.options ?? {}, ...groupOptions, ...options ?? {}, url: processUrl(workerData.options?.url, groupOptions.url, options?.url) }; if (!combinedOptions.url) { throw Error("URL must be provided either through (1) env var MAGNITUDE_TEST_URL, (2) via magnitude.config.ts, or (3) in group or test options"); } const promptStack = []; if (groupOptions.prompt) promptStack.push(groupOptions.prompt); if (options.prompt) promptStack.push(options.prompt); testPromptStack[title] = promptStack; registerTest(testFn, title, addProtocolIfMissing(combinedOptions.url)); } testDecl.group = function(id, optionsOrTestFn, testFnOrNothing) { let options; let testFn; if (typeof optionsOrTestFn == "function") { options = {}; testFn = optionsOrTestFn; } else { options = optionsOrTestFn; if (!testFnOrNothing) { throw new Error("Test function is required"); } testFn = testFnOrNothing; } setCurrentGroup({ name: id, options }); testFn(); setCurrentGroup(void 0); }; const test = testDecl; function createHookRegistrar(kind) { return function(fn) { if (typeof fn !== "function") { throw new Error(`${kind} expects a function`); } hooks[kind].push(fn); }; } const beforeAll = createHookRegistrar("beforeAll"); const afterAll = createHookRegistrar("afterAll"); const beforeEach = createHookRegistrar("beforeEach"); const afterEach = createHookRegistrar("afterEach"); process.env.NODE_ENV = process.env.NODE_ENV || "production"; export { afterAll, afterEach, beforeAll, beforeEach, test };