@debugg-ai/debugg-ai-mcp
Version:
Zero-Config, Fully AI-Managed End-to-End Testing for all code gen platforms.
309 lines (308 loc) • 16.1 kB
JavaScript
/**
* Trigger Crawl Handler
*
* Executes the Raw Crawl Workflow Template via the 4-step pattern shared with
* testPageChangesHandler:
* find template → provision tunnel if localhost → execute → poll → result
*
* Unlike check_app_in_browser, a crawl does NOT return pass/fail — it returns
* the execution status + metadata. The backend's job is to explore the app
* and populate the project knowledge graph; this handler just triggers it
* and reports back what happened.
*/
import { config } from '../config/index.js';
import { Logger } from '../utils/logger.js';
import { handleExternalServiceError } from '../utils/errors.js';
import { DebuggAIServerClient } from '../services/index.js';
import { TunnelProvisionError } from '../services/tunnels.js';
import { tunnelManager } from '../services/ngrok/tunnelManager.js';
import { probeLocalPort, probeTunnelHealth } from '../utils/localReachability.js';
import { extractLocalhostPort } from '../utils/urlParser.js';
import { resolveTargetUrl, buildContext, findExistingTunnel, ensureTunnel, sanitizeResponseUrls, touchTunnelById, } from '../utils/tunnelContext.js';
import { getCachedTemplateUuid, invalidateTemplateCache } from '../utils/handlerCaches.js';
import { isTransientWorkflowError, transientReasonTag } from '../utils/transientErrors.js';
import { Telemetry, TelemetryEvents } from '../utils/telemetry.js';
const logger = new Logger({ module: 'triggerCrawlHandler' });
const TEMPLATE_KEYWORD = 'raw crawl';
// Bead kbo9: same env-driven retry budget as testPageChangesHandler (kbxy).
function getMaxTransientRetries() {
const raw = process.env.DEBUGGAI_TRANSIENT_RETRIES;
if (raw === undefined || raw === '')
return 1;
const n = parseInt(raw, 10);
if (!Number.isFinite(n) || n < 0)
return 1;
return Math.min(n, 3);
}
export async function triggerCrawlHandler(input, context, rawProgressCallback) {
const startTime = Date.now();
logger.toolStart('trigger_crawl', input);
// Bead 0bq: progress circuit-breaker — see testPageChangesHandler for rationale.
let progressDisabled = false;
const progressCallback = rawProgressCallback
? async (update) => {
if (progressDisabled)
return;
try {
await rawProgressCallback(update);
}
catch (err) {
progressDisabled = true;
logger.warn('Progress emission failed; disabling further emissions for this request', {
error: err instanceof Error ? err.message : String(err),
});
}
}
: undefined;
const client = new DebuggAIServerClient(config.api.key);
await client.init();
const originalUrl = resolveTargetUrl(input);
let ctx = buildContext(originalUrl);
let keyId;
const abortController = new AbortController();
const onStdinClose = () => {
abortController.abort();
progressDisabled = true;
};
process.stdin.once('close', onStdinClose);
try {
// --- Tunnel: reuse existing or provision a fresh one ---
if (ctx.isLocalhost) {
// Bead 1om: pre-flight local port probe BEFORE provision/ngrok/backend.
const localPort = extractLocalhostPort(ctx.originalUrl);
if (typeof localPort === 'number') {
const probe = await probeLocalPort(localPort);
if (!probe.reachable) {
const payload = {
error: 'LocalServerUnreachable',
message: `No server listening on 127.0.0.1:${localPort}. Start your dev server on that port before running trigger_crawl. Probe result: ${probe.code} (${probe.detail ?? 'no detail'}).`,
detail: { port: localPort, probeCode: probe.code, probeDetail: probe.detail, elapsedMs: probe.elapsedMs },
};
logger.warn(`Pre-flight port probe failed for ${ctx.originalUrl}: ${probe.code} in ${probe.elapsedMs}ms`);
return { content: [{ type: 'text', text: JSON.stringify(payload, null, 2) }], isError: true };
}
}
if (config.devMode) {
// Dev mode: local backend can reach localhost directly — no tunnel needed.
logger.info(`trigger_crawl: dev mode — using localhost URL directly: ${ctx.originalUrl}`);
}
else {
if (progressCallback) {
await progressCallback({ progress: 1, total: 4, message: 'Provisioning secure tunnel for localhost...' });
}
const reused = findExistingTunnel(ctx);
if (reused) {
ctx = reused;
}
else {
let tunnel;
try {
tunnel = await client.tunnels.provisionWithRetry();
}
catch (provisionError) {
const msg = provisionError instanceof Error ? provisionError.message : String(provisionError);
const diag = provisionError instanceof TunnelProvisionError ? ` ${provisionError.diagnosticSuffix()}` : '';
throw new Error(`Failed to provision tunnel for ${ctx.originalUrl}. ` +
`The remote browser needs a secure tunnel to reach your local dev server. ` +
`(Detail: ${msg})${diag}`);
}
keyId = tunnel.keyId;
ctx = await ensureTunnel(ctx, tunnel.tunnelKey, tunnel.tunnelId, tunnel.keyId, () => client.revokeNgrokKey(tunnel.keyId));
}
// Bead 1om: post-tunnel health check — verify traffic actually flows.
if (ctx.targetUrl) {
const health = await probeTunnelHealth(ctx.targetUrl);
if (!health.healthy) {
const payload = {
error: 'TunnelTrafficBlocked',
message: `Tunnel was established but traffic isn't reaching the dev server. ${health.detail ?? ''} Common causes: dev server binds to 0.0.0.0 or ::1 but not 127.0.0.1; dev server crashed; firewall.`,
detail: {
code: health.code,
status: health.status,
ngrokErrorCode: health.ngrokErrorCode,
elapsedMs: health.elapsedMs,
},
};
logger.warn(`Tunnel health probe failed for ${ctx.targetUrl}: ${health.code} ${health.ngrokErrorCode ?? ''} in ${health.elapsedMs}ms`);
if (ctx.tunnelId) {
tunnelManager.stopTunnel(ctx.tunnelId).catch((err) => logger.warn(`Failed to stop broken tunnel ${ctx.tunnelId}: ${err}`));
}
keyId = undefined;
return { content: [{ type: 'text', text: JSON.stringify(payload, null, 2) }], isError: true };
}
}
}
}
// --- Find the crawl workflow template (cached across calls) ---
if (progressCallback) {
await progressCallback({ progress: 2, total: 4, message: 'Locating crawl workflow template...' });
}
const templateUuid = await getCachedTemplateUuid(TEMPLATE_KEYWORD, async (name) => {
return client.workflows.findTemplateByName(name);
});
if (!templateUuid) {
throw new Error(`Raw Crawl Workflow Template not found. ` +
`Ensure the backend has a template matching "${TEMPLATE_KEYWORD}" seeded and accessible.`);
}
// --- Build contextData + env ---
const contextData = {
targetUrl: ctx.targetUrl ?? ctx.originalUrl,
};
if (input.projectUuid)
contextData.projectId = input.projectUuid;
contextData.headless = true; // D7: the MCP always runs headless — no opt-out.
if (typeof input.timeoutSeconds === 'number')
contextData.timeoutSeconds = input.timeoutSeconds;
const env = {};
if (input.environmentId)
env.environmentId = input.environmentId;
if (input.credentialId)
env.credentialId = input.credentialId;
if (input.credentialRole)
env.credentialRole = input.credentialRole;
if (input.username)
env.username = input.username;
if (input.password)
env.password = input.password;
// --- Execute ---
if (progressCallback) {
await progressCallback({ progress: 3, total: 4, message: 'Queuing crawl execution...' });
}
// --- Execute + Poll (with bounded retry on transient errors, bead kbo9) ---
const TERMINAL_STATUSES = new Set(['completed', 'failed', 'cancelled']);
const MAX_RETRIES = getMaxTransientRetries();
let executeResponse;
let executionUuid = '';
let finalExecution;
let attempt = 0;
while (true) {
attempt++;
if (attempt > 1) {
Telemetry.capture(TelemetryEvents.WORKFLOW_TRANSIENT_RETRY, {
tool: 'trigger_crawl',
attempt,
reason: transientReasonTag(finalExecution),
previousExecutionId: executionUuid,
previousErrorMessage: finalExecution?.errorMessage?.slice(0, 200),
previousStateError: finalExecution?.state?.error?.slice(0, 200),
});
if (progressCallback) {
await progressCallback({
progress: 3, total: 4,
message: `Transient backend error — retrying crawl (attempt ${attempt}/${MAX_RETRIES + 1})...`,
});
}
await new Promise(r => setTimeout(r, 1000 * (attempt - 1)));
}
executeResponse = await client.workflows.executeWorkflow(templateUuid, contextData, Object.keys(env).length > 0 ? env : undefined);
executionUuid = executeResponse.executionUuid;
logger.info(`Crawl execution queued: ${executionUuid}${attempt > 1 ? ` (retry ${attempt - 1}/${MAX_RETRIES})` : ''}`);
// --- Poll ---
// Bead 0bq: emit the final progress (4/4 "Complete:...") INSIDE onUpdate
// when terminal status detected, so there's no post-resolve emission that
// could race the response and cause stale-progressToken transport tear-down.
finalExecution = await client.workflows.pollExecution(executionUuid, async (exec) => {
if (ctx.tunnelId)
touchTunnelById(ctx.tunnelId);
if (!progressCallback)
return;
const nodeCount = (exec.nodeExecutions ?? []).length;
if (TERMINAL_STATUSES.has(exec.status)) {
await progressCallback({
progress: 4, total: 4,
message: `Crawl ${exec.status} (${nodeCount} nodes)`,
});
return;
}
await progressCallback({
progress: 4, total: 4,
message: `Crawl ${exec.status} (${nodeCount} nodes)`,
});
}, abortController.signal);
if (attempt > MAX_RETRIES)
break;
if (!isTransientWorkflowError(finalExecution))
break;
logger.warn(`Transient backend error detected on crawl (${transientReasonTag(finalExecution) ?? 'unknown'}) — ` +
`retrying (attempt ${attempt + 1}/${MAX_RETRIES + 1})`);
}
const duration = Date.now() - startTime;
const nodes = finalExecution.nodeExecutions ?? [];
// --- Format response ---
const responsePayload = {
executionId: executionUuid,
status: finalExecution.status,
targetUrl: ctx.originalUrl,
durationMs: finalExecution.durationMs ?? duration,
};
const outcome = finalExecution.state?.outcome;
if (outcome !== undefined && outcome !== null)
responsePayload.outcome = outcome;
if (finalExecution.errorMessage)
responsePayload.errorMessage = finalExecution.errorMessage;
if (finalExecution.errorInfo?.failedNodeId)
responsePayload.failedNode = finalExecution.errorInfo.failedNodeId;
if (executeResponse.resolvedEnvironmentId)
responsePayload.resolvedEnvironmentId = executeResponse.resolvedEnvironmentId;
if (executeResponse.resolvedCredentialId)
responsePayload.resolvedCredentialId = executeResponse.resolvedCredentialId;
// Backend release 2026-04-25: browser_session block on execution detail.
// Crawl runs through the same browser pipeline, so the field is populated
// here too. Pass through verbatim (presigned S3 URLs).
if (finalExecution.browserSession) {
responsePayload.browserSession = finalExecution.browserSession;
}
// Extract crawl metrics from surfer.crawl node (absent in older graph shapes)
const crawlNode = nodes.find(n => n.nodeType === 'surfer.crawl');
if (crawlNode?.outputData) {
const d = crawlNode.outputData;
responsePayload.crawlSummary = {
pagesDiscovered: d.pagesDiscovered,
actionsExecuted: d.actionsExecuted,
stepsTaken: d.stepsTaken,
transitionsRecorded: d.transitionsRecorded,
knowledgeGraphStates: d.knowledgeGraphStates,
success: d.success,
...(d.error ? { error: d.error } : {}),
};
}
// Extract KG import result from knowledge_graph.import node (absent in older graph shapes)
const kgNode = nodes.find(n => n.nodeType === 'knowledge_graph.import');
if (kgNode?.outputData) {
const d = kgNode.outputData;
responsePayload.knowledgeGraph = {
imported: !d.skipped,
skipped: d.skipped ?? false,
reason: d.reason ?? '',
edgesImported: d.edgesImported ?? 0,
statesImported: d.statesImported ?? 0,
knowledgeGraphId: d.knowledgeGraphId ?? '',
...(Array.isArray(d.importErrors) && d.importErrors.length > 0 ? { importErrors: d.importErrors } : {}),
};
}
logger.toolComplete('trigger_crawl', duration);
// Bead 0bq: final progress is emitted INSIDE pollExecution's onUpdate when
// terminal status is detected. Emitting it here would race the response
// and could cause stale-progressToken transport tear-down.
const sanitizedPayload = sanitizeResponseUrls(responsePayload, ctx);
return {
content: [{ type: 'text', text: JSON.stringify(sanitizedPayload, null, 2) }],
};
}
catch (error) {
const duration = Date.now() - startTime;
logger.toolError('trigger_crawl', error, duration);
if (error instanceof Error && (error.message.includes('not found') || error.message.includes('401'))) {
invalidateTemplateCache();
}
throw handleExternalServiceError(error, 'DebuggAI', 'crawl execution');
}
finally {
process.stdin.removeListener('close', onStdinClose);
// Tunnel intentionally NOT torn down (reuse path per bead vwd).
// If tunnel creation failed after key provision, revoke the orphaned key.
if (!ctx.tunnelId && keyId) {
client.revokeNgrokKey(keyId).catch(err => logger.warn(`Failed to revoke unused ngrok key ${keyId}: ${err}`));
}
}
}