magnitude-test
Version:
A TypeScript client for running automated UI tests through the Magnitude testing platform
759 lines (744 loc) • 26 kB
JavaScript
import { l as logger, V as VERSION, p as processUrl, a as addProtocolIfMissing } from './version-DtiDJ6_4.mjs';
import { parentPort, workerData as workerData$2 } from 'node:worker_threads';
import { isBun } from 'std-env';
import EventEmitter$1 from 'node:events';
import cuid2 from '@paralleldrive/cuid2';
import { BrowserAgent, AgentError, getMachineId, getCodebaseId, posthog, buildDefaultBrowserAgentOptions } from 'magnitude-core';
import z from 'zod';
import 'pino';
import 'node:module';
if (!globalThis.__magnitudeTestFunctions) {
globalThis.__magnitudeTestFunctions = /* @__PURE__ */ new Map();
}
const testFunctions = globalThis.__magnitudeTestFunctions;
if (!globalThis.__magnitudeMessageEmitter) {
globalThis.__magnitudeMessageEmitter = new EventEmitter$1();
}
const messageEmitter = globalThis.__magnitudeMessageEmitter;
if (!globalThis.__magnitudeTestHooks) {
globalThis.__magnitudeTestHooks = {
beforeAll: [],
afterAll: [],
beforeEach: [],
afterEach: []
};
}
const hooks = globalThis.__magnitudeTestHooks;
function postToParent(message) {
if (isBun) {
if (typeof process.send !== "function") {
throw new Error("Not running in a Bun subprocess with IPC");
}
process.send(message);
return;
}
if (!parentPort) throw new Error("Not running in a worker thread");
parentPort.postMessage(message);
}
function getTestWorkerData() {
if (isBun) {
const dataStr = process.env.MAGNITUDE_WORKER_DATA;
if (!dataStr) {
throw new Error("Worker data not found in environment");
}
return JSON.parse(dataStr);
}
if (!parentPort) {
throw new Error("Do not use this module on the main thread");
}
return workerData$2;
}
function getDefaultExportFromCjs (x) {
return x && x.__esModule && Object.prototype.hasOwnProperty.call(x, 'default') ? x['default'] : x;
}
var eventemitter3 = {exports: {}};
var hasRequiredEventemitter3;
function requireEventemitter3 () {
if (hasRequiredEventemitter3) return eventemitter3.exports;
hasRequiredEventemitter3 = 1;
(function (module) {
var has = Object.prototype.hasOwnProperty, prefix = "~";
function Events() {
}
if (Object.create) {
Events.prototype = /* @__PURE__ */ Object.create(null);
if (!new Events().__proto__) prefix = false;
}
function EE(fn, context, once2) {
this.fn = fn;
this.context = context;
this.once = once2 || false;
}
function addListener(emitter, event, fn, context, once2) {
if (typeof fn !== "function") {
throw new TypeError("The listener must be a function");
}
var listener = new EE(fn, context || emitter, once2), evt = prefix ? prefix + event : event;
if (!emitter._events[evt]) emitter._events[evt] = listener, emitter._eventsCount++;
else if (!emitter._events[evt].fn) emitter._events[evt].push(listener);
else emitter._events[evt] = [emitter._events[evt], listener];
return emitter;
}
function clearEvent(emitter, evt) {
if (--emitter._eventsCount === 0) emitter._events = new Events();
else delete emitter._events[evt];
}
function EventEmitter() {
this._events = new Events();
this._eventsCount = 0;
}
EventEmitter.prototype.eventNames = function eventNames() {
var names = [], events, name;
if (this._eventsCount === 0) return names;
for (name in events = this._events) {
if (has.call(events, name)) names.push(prefix ? name.slice(1) : name);
}
if (Object.getOwnPropertySymbols) {
return names.concat(Object.getOwnPropertySymbols(events));
}
return names;
};
EventEmitter.prototype.listeners = function listeners(event) {
var evt = prefix ? prefix + event : event, handlers = this._events[evt];
if (!handlers) return [];
if (handlers.fn) return [handlers.fn];
for (var i = 0, l = handlers.length, ee = new Array(l); i < l; i++) {
ee[i] = handlers[i].fn;
}
return ee;
};
EventEmitter.prototype.listenerCount = function listenerCount(event) {
var evt = prefix ? prefix + event : event, listeners2 = this._events[evt];
if (!listeners2) return 0;
if (listeners2.fn) return 1;
return listeners2.length;
};
EventEmitter.prototype.emit = function emit(event, a1, a2, a3, a4, a5) {
var evt = prefix ? prefix + event : event;
if (!this._events[evt]) return false;
var listeners2 = this._events[evt], len = arguments.length, args, i;
if (listeners2.fn) {
if (listeners2.once) this.removeListener(event, listeners2.fn, void 0, true);
switch (len) {
case 1:
return listeners2.fn.call(listeners2.context), true;
case 2:
return listeners2.fn.call(listeners2.context, a1), true;
case 3:
return listeners2.fn.call(listeners2.context, a1, a2), true;
case 4:
return listeners2.fn.call(listeners2.context, a1, a2, a3), true;
case 5:
return listeners2.fn.call(listeners2.context, a1, a2, a3, a4), true;
case 6:
return listeners2.fn.call(listeners2.context, a1, a2, a3, a4, a5), true;
}
for (i = 1, args = new Array(len - 1); i < len; i++) {
args[i - 1] = arguments[i];
}
listeners2.fn.apply(listeners2.context, args);
} else {
var length = listeners2.length, j;
for (i = 0; i < length; i++) {
if (listeners2[i].once) this.removeListener(event, listeners2[i].fn, void 0, true);
switch (len) {
case 1:
listeners2[i].fn.call(listeners2[i].context);
break;
case 2:
listeners2[i].fn.call(listeners2[i].context, a1);
break;
case 3:
listeners2[i].fn.call(listeners2[i].context, a1, a2);
break;
case 4:
listeners2[i].fn.call(listeners2[i].context, a1, a2, a3);
break;
default:
if (!args) for (j = 1, args = new Array(len - 1); j < len; j++) {
args[j - 1] = arguments[j];
}
listeners2[i].fn.apply(listeners2[i].context, args);
}
}
}
return true;
};
EventEmitter.prototype.on = function on(event, fn, context) {
return addListener(this, event, fn, context, false);
};
EventEmitter.prototype.once = function once(event, fn, context) {
return addListener(this, event, fn, context, true);
};
EventEmitter.prototype.removeListener = function removeListener(event, fn, context, once2) {
var evt = prefix ? prefix + event : event;
if (!this._events[evt]) return this;
if (!fn) {
clearEvent(this, evt);
return this;
}
var listeners2 = this._events[evt];
if (listeners2.fn) {
if (listeners2.fn === fn && (!once2 || listeners2.once) && (!context || listeners2.context === context)) {
clearEvent(this, evt);
}
} else {
for (var i = 0, events = [], length = listeners2.length; i < length; i++) {
if (listeners2[i].fn !== fn || once2 && !listeners2[i].once || context && listeners2[i].context !== context) {
events.push(listeners2[i]);
}
}
if (events.length) this._events[evt] = events.length === 1 ? events[0] : events;
else clearEvent(this, evt);
}
return this;
};
EventEmitter.prototype.removeAllListeners = function removeAllListeners(event) {
var evt;
if (event) {
evt = prefix ? prefix + event : event;
if (this._events[evt]) clearEvent(this, evt);
} else {
this._events = new Events();
this._eventsCount = 0;
}
return this;
};
EventEmitter.prototype.off = EventEmitter.prototype.removeListener;
EventEmitter.prototype.addListener = EventEmitter.prototype.on;
EventEmitter.prefixed = prefix;
EventEmitter.EventEmitter = EventEmitter;
{
module.exports = EventEmitter;
}
} (eventemitter3));
return eventemitter3.exports;
}
var eventemitter3Exports = requireEventemitter3();
var EventEmitter = /*@__PURE__*/getDefaultExportFromCjs(eventemitter3Exports);
const CHECK_INSTRUCTIONS = `
Given the actions of an LLM agent executing a test case, and a screenshot taken afterwards, evaluate whether the provided check "passes" i.e. holds true or not.
Check to evaluate:
`.trim();
class TestCaseAgent extends BrowserAgent {
//public readonly events: EventEmitter<TestCaseAgentEvents>;
checkEvents = new EventEmitter();
//declare public readonly events: EventEmitter<TestCaseAgentEvents>;
//declare public readonly events: EventEmitter<TestCaseAgentEvents>;
//public readonly events: EventEmitter<TestCaseAgentEvents> = new EventEmitter();
// constructor(options: any /* your constructor options */) {
// super(options);
// // 3. Point this new property to the existing emitter from the parent.
// // We cast `super.events` to tell TypeScript "Trust me, this emitter
// // will also handle TestCaseAgentEvents". This is the one and only
// // "lie" we have to tell the compiler to make everything else work.
// this.events = super.events as unknown as EventEmitter<TestCaseAgentEvents>;
// }
async check(description) {
const instructions = CHECK_INSTRUCTIONS + `
<check>${description}</check>`;
this.checkEvents.emit("checkStarted", description);
const response = await this.query(instructions, z.object({
reasoning: z.string(),
passed: z.boolean()
}));
this.memory.recordThought(response.reasoning);
this.checkEvents.emit("checkDone", description, response.passed);
if (!response.passed) throw new AgentError(`Check failed: ${description}`, { variant: "check_failed" });
}
}
class TestStateTracker {
/**
* Watches agent events and uses that to construct and update a test state
*/
// Handles a lot of state tracking but not result/failure/done because the agent itself
// doesn't know when done, that's known when the test fn finishes.
agent;
state;
// ref to last step, handy for attaching actions to it
// private lastStep: StepDescriptor | null = null;
// private lastCheck: CheckDescriptor | null = null;
lastStepOrCheck = null;
events;
constructor(agent) {
this.agent = agent;
this.state = {
status: "pending",
// Initialize status
stepsAndChecks: [],
modelUsage: []
// macroUsage: { provider: 'example', model: 'example', inputTokens: 0, outputTokens: 0, numCalls: 0 }, //agent.getMacro().getInfo(),
// microUsage: { provider: 'example', numCalls: 0 } //agent.getMicro().getInfo()
};
this.agent.events.on("start", this.onStart, this);
this.agent.events.on("stop", this.onStop, this);
this.agent.events.on("actStarted", this.onActStarted, this);
this.agent.events.on("actDone", this.onActDone, this);
this.agent.events.on("actionStarted", this.onActionStarted, this);
this.agent.events.on("actionDone", this.onActionDone, this);
this.agent.events.on("tokensUsed", this.onTokensUsed, this);
this.agent.checkEvents.on("checkStarted", this.onCheckStarted, this);
this.agent.checkEvents.on("checkDone", this.onCheckDone, this);
this.events = new EventEmitter();
}
// getEvents() {
// return this.events;
// }
getState() {
return this.state;
}
// propagateState() {
// this.events.emit('update', this.state);
// }
onStart() {
this.state.startedAt = Date.now();
this.state.status = "running";
this.events.emit("stateChanged", this.state);
}
onStop() {
this.state.doneAt = Date.now();
this.events.emit("stateChanged", this.state);
}
onTokensUsed(modelUsage) {
const modelHash = JSON.stringify(modelUsage.llm);
let exists = false;
for (const usage of this.state.modelUsage) {
const compare = JSON.stringify(usage.llm);
if (modelHash === compare) {
exists = true;
usage.inputTokens += modelUsage.inputTokens;
usage.outputTokens += modelUsage.outputTokens;
usage.numCalls += 1;
}
}
if (!exists) {
this.state.modelUsage.push({ ...modelUsage, numCalls: 1 });
}
}
onActionStarted(action) {
if (!this.lastStepOrCheck || this.lastStepOrCheck.variant !== "step") {
throw new Error("Action reported without preceding step");
}
this.lastStepOrCheck.actions.push({
action,
pretty: this.agent.identifyAction(action).render(action)
});
this.events.emit("stateChanged", this.state);
}
onActionDone(action) {
}
// onAction(action: ActionDescriptor) {
// // TODO: maybe allow detatched actions (e.g. synthetic load at beginning, or manual low-level actions)
// if (!this.lastStepOrCheck || this.lastStepOrCheck.variant !== 'step') {
// throw new Error('Action reported without preceding step');
// }
// this.lastStepOrCheck.actions.push(action);
// this.events.emit('update', this.state);
// }
onActStarted(task) {
const stepDescriptor = {
variant: "step",
description: task,
actions: [],
status: "running"
};
this.state.stepsAndChecks.push(stepDescriptor);
this.lastStepOrCheck = stepDescriptor;
this.events.emit("stateChanged", this.state);
}
onActDone(task) {
if (!this.lastStepOrCheck || this.lastStepOrCheck.variant !== "step") {
throw new Error("Step success without preceding step");
}
this.lastStepOrCheck.status = "passed";
this.events.emit("stateChanged", this.state);
}
// onStepStart(description: string) {
// const stepDescriptor: StepDescriptor = {
// variant: 'step',
// description: description,
// actions: [],
// status: 'running'
// };
// this.state.stepsAndChecks.push(stepDescriptor);
// this.lastStepOrCheck = stepDescriptor;
// this.events.emit('update', this.state);
// }
// onStepSuccess() {
// if (!this.lastStepOrCheck || this.lastStepOrCheck.variant !== 'step') {
// throw new Error('Step success without preceding step');
// }
// this.lastStepOrCheck.status = 'passed';
// // Update any LLM metrics
// this.state.macroUsage = this.agent.getMacro().getInfo();
// this.state.microUsage = this.agent.getMicro().getInfo();
// this.events.emit('update', this.state);
// }
onCheckStarted(check) {
const checkDescriptor = {
variant: "check",
description: check,
status: "running"
};
this.state.stepsAndChecks.push(checkDescriptor);
this.lastStepOrCheck = checkDescriptor;
this.events.emit("stateChanged", this.state);
}
onCheckDone(check, passed) {
if (!this.lastStepOrCheck || this.lastStepOrCheck.variant !== "check") {
throw new Error("Check success reported without preceding check");
}
this.lastStepOrCheck.status = passed ? "passed" : "failed";
this.events.emit("stateChanged", this.state);
}
// onCheckStart(description: string) {
// const checkDescriptor: CheckDescriptor = {
// variant: 'check',
// description: description,
// status: 'running'
// };
// this.state.stepsAndChecks.push(checkDescriptor);
// this.lastStepOrCheck = checkDescriptor;
// this.events.emit('update', this.state);
// }
// onCheckSuccess() {
// if (!this.lastStepOrCheck || this.lastStepOrCheck.variant !== 'check') {
// throw new Error('Check success reported without preceding check');
// }
// this.lastStepOrCheck.status = 'passed';
// // Update any LLM metrics
// this.state.macroUsage = this.agent.getMacro().getInfo();
// this.state.microUsage = this.agent.getMicro().getInfo();
// this.events.emit('update', this.state);
// }
// onFail(failure: FailureDescriptor) {
// // if (!this.lastStepOrCheck) {
// // throw new Error('Failure reported without preceding step or check');
// // }
// if (this.lastStepOrCheck) {
// if (failure.variant === 'cancelled') {
// this.lastStepOrCheck.status = 'cancelled';
// } else {
// this.lastStepOrCheck.status = 'failed';
// }
// }
// this.state.failure = failure;
// this.events.emit('update', this.state);
// }
}
async function sendTelemetry(state) {
let numSteps = 0;
let numChecks = 0;
let browserActionCount = 0;
for (const item of state.stepsAndChecks) {
if (item.variant === "step") {
numSteps += 1;
for (const action of item.actions) {
if (action.action.variant.startsWith("browser") || action.action.variant.startsWith("keyboard") || action.action.variant.startsWith("mouse")) {
browserActionCount += 1;
}
}
} else {
numChecks += 1;
}
}
const userId = getMachineId();
const codebaseId = getCodebaseId();
if (codebaseId) {
try {
posthog.groupIdentify({
groupType: "codebase",
groupKey: codebaseId
//properties: {}
});
} catch (error) {
logger.warn(`Failed to identify group: ${error.message}`);
}
}
const payload = {
source: "magnitude-test",
telemetryVersion: "0.2",
packageVersion: VERSION,
codebase: codebaseId,
startedAt: state.startedAt ?? Date.now(),
doneAt: state.doneAt ?? Date.now(),
numSteps,
numChecks,
browserActionCount,
modelUsage: state.modelUsage,
passed: state.status === "passed",
failure: state.failure
};
try {
posthog.capture({
distinctId: userId,
event: "test-run",
properties: {
...payload
},
...codebaseId ? { groups: { codebase: codebaseId } } : {}
// groups: {
// // TODO: derive from git hash (also put it payload too)
// codebase: codebaseId
// }
});
await posthog.shutdown();
} catch (error) {
logger.warn(`Failed to send telemetry (may have timed out): ${error.message}`);
}
}
const workerData$1 = getTestWorkerData();
const generateId = cuid2.init({ length: 12 });
function registerTest(testFn, title, url) {
const testId = generateId();
testFunctions.set(testId, testFn);
postToParent({
type: "registered",
test: {
id: testId,
title,
url,
filepath: workerData$1.relativeFilePath,
group: currentGroup?.name
}
});
}
let beforeAllExecuted = false;
let beforeAllError = null;
let afterAllExecuted = false;
let isShuttingDown = false;
let pendingAfterEach = /* @__PURE__ */ new Set();
let currentGroup;
function setCurrentGroup(group) {
currentGroup = group;
}
function currentGroupOptions() {
return structuredClone(currentGroup?.options) ?? {};
}
messageEmitter.removeAllListeners("message");
messageEmitter.on("message", async (message) => {
if (message.type === "graceful_shutdown") {
isShuttingDown = true;
if (pendingAfterEach.size > 0) {
try {
await Promise.all(
[...pendingAfterEach].map(async (_testId) => {
for (const afterEachHook of hooks.afterEach) {
await afterEachHook();
}
})
);
} catch (error) {
console.error("afterEach hooks failed during graceful shutdown:", error);
}
}
if (!afterAllExecuted) {
try {
for (const afterAllHook of hooks.afterAll) {
await afterAllHook();
}
afterAllExecuted = true;
} catch (error) {
console.error("afterAll hook failed during graceful shutdown:\n", error);
}
}
postToParent({ type: "graceful_shutdown_complete" });
return;
}
if (message.type !== "execute") return;
if (isShuttingDown) {
postToParent({
type: "test_error",
testId: message.test.id,
error: "Test cancelled due to graceful shutdown"
});
return;
}
const { test, browserOptions, llm, grounding, telemetry } = message;
const testFn = testFunctions.get(test.id);
if (!testFn) {
postToParent({
type: "test_error",
testId: test.id,
error: `Test function not found: ${test.id}`
});
return;
}
try {
const promptStack = testPromptStack[test.title] || [];
const prompt = promptStack.length > 0 ? promptStack.join("\n") : void 0;
const { agentOptions: defaultAgentOptions, browserOptions: defaultBrowserOptions } = buildDefaultBrowserAgentOptions({
agentOptions: { llm, ...prompt ? { prompt } : {} },
browserOptions: {
url: test.url,
browser: browserOptions,
grounding
}
});
const agent = new TestCaseAgent({
// disable telemetry to keep test run telemetry seperate from general automation telemetry
agentOptions: { ...defaultAgentOptions, telemetry: false },
browserOptions: defaultBrowserOptions
});
const tracker = new TestStateTracker(agent);
tracker.events.on("stateChanged", (state) => {
postToParent({
type: "test_state_change",
testId: test.id,
state
});
});
await agent.start();
let finalState;
let finalResult;
try {
if (!beforeAllExecuted && hooks.beforeAll.length > 0) {
try {
for (const beforeAllHook of hooks.beforeAll) {
await beforeAllHook();
}
} catch (error) {
console.error("beforeAll hooks failed:", error);
beforeAllError = error instanceof Error ? error : new Error(String(error));
} finally {
beforeAllExecuted = true;
}
}
if (beforeAllError) {
throw new Error(`beforeAll hook failed: ${beforeAllError.message}`);
}
for (const beforeEachHook of hooks.beforeEach) {
try {
await beforeEachHook();
} catch (error) {
console.error(`beforeEach hook failed for test '${test.title}':`, error);
throw error;
}
}
pendingAfterEach.add(test.id);
await testFn(agent);
if (!isShuttingDown) {
pendingAfterEach.delete(test.id);
for (const afterEachHook of hooks.afterEach) {
try {
await afterEachHook();
} catch (error) {
console.error(`afterEach hook failed for test '${test.title}':`, error);
throw error;
}
}
}
finalState = {
...tracker.getState(),
status: "passed",
doneAt: Date.now()
};
finalResult = { passed: true };
} catch (error) {
if (!isShuttingDown) {
pendingAfterEach.delete(test.id);
try {
for (const afterEachHook of hooks.afterEach) {
await afterEachHook();
}
} catch (afterEachError) {
console.error(`afterEach hook failed for failing test '${test.title}':`, afterEachError);
const originalMessage = error instanceof Error ? error.message : String(error);
const afterEachMessage = afterEachError instanceof Error ? afterEachError.message : String(afterEachError);
error = new Error(`Test failed: ${originalMessage}. Additionally, afterEach hook failed: ${afterEachMessage}`);
}
}
const failure = {
message: error instanceof Error ? error.message : String(error)
};
finalState = {
...tracker.getState(),
failure,
status: "failed",
doneAt: Date.now()
};
finalResult = { passed: false, failure };
}
await agent.stop();
postToParent({
type: "test_state_change",
testId: test.id,
state: finalState
});
if (finalState && (telemetry ?? true)) await sendTelemetry(finalState);
postToParent({
type: "test_result",
testId: test.id,
result: finalResult ?? { passed: false, failure: { message: "Test result doesn't exist" } }
});
} catch (error) {
postToParent({
type: "test_error",
error: error instanceof Error ? error.message : String(error),
testId: test.id
});
}
});
const workerData = getTestWorkerData();
const testPromptStack = {};
function testDecl(title, optionsOrTestFn, testFnOrNothing) {
let options;
let testFn;
if (typeof optionsOrTestFn == "function") {
options = {};
testFn = optionsOrTestFn;
} else {
options = optionsOrTestFn;
if (!testFnOrNothing) {
throw new Error("Test function is required");
}
testFn = testFnOrNothing;
}
const groupOptions = currentGroupOptions();
const combinedOptions = {
...workerData.options ?? {},
...groupOptions,
...options ?? {},
url: processUrl(workerData.options?.url, groupOptions.url, options?.url)
};
if (!combinedOptions.url) {
throw Error("URL must be provided either through (1) env var MAGNITUDE_TEST_URL, (2) via magnitude.config.ts, or (3) in group or test options");
}
const promptStack = [];
if (groupOptions.prompt) promptStack.push(groupOptions.prompt);
if (options.prompt) promptStack.push(options.prompt);
testPromptStack[title] = promptStack;
registerTest(testFn, title, addProtocolIfMissing(combinedOptions.url));
}
testDecl.group = function(id, optionsOrTestFn, testFnOrNothing) {
let options;
let testFn;
if (typeof optionsOrTestFn == "function") {
options = {};
testFn = optionsOrTestFn;
} else {
options = optionsOrTestFn;
if (!testFnOrNothing) {
throw new Error("Test function is required");
}
testFn = testFnOrNothing;
}
setCurrentGroup({ name: id, options });
testFn();
setCurrentGroup(void 0);
};
const test = testDecl;
function createHookRegistrar(kind) {
return function(fn) {
if (typeof fn !== "function") {
throw new Error(`${kind} expects a function`);
}
hooks[kind].push(fn);
};
}
const beforeAll = createHookRegistrar("beforeAll");
const afterAll = createHookRegistrar("afterAll");
const beforeEach = createHookRegistrar("beforeEach");
const afterEach = createHookRegistrar("afterEach");
process.env.NODE_ENV = process.env.NODE_ENV || "production";
export { afterAll, afterEach, beforeAll, beforeEach, test };