UNPKG

@debugg-ai/debugg-ai-mcp

Version:

Zero-Config, Fully AI-Managed End-to-End Testing for all code gen platforms.

309 lines (308 loc) 16.1 kB
/** * Trigger Crawl Handler * * Executes the Raw Crawl Workflow Template via the 4-step pattern shared with * testPageChangesHandler: * find template → provision tunnel if localhost → execute → poll → result * * Unlike check_app_in_browser, a crawl does NOT return pass/fail — it returns * the execution status + metadata. The backend's job is to explore the app * and populate the project knowledge graph; this handler just triggers it * and reports back what happened. */ import { config } from '../config/index.js'; import { Logger } from '../utils/logger.js'; import { handleExternalServiceError } from '../utils/errors.js'; import { DebuggAIServerClient } from '../services/index.js'; import { TunnelProvisionError } from '../services/tunnels.js'; import { tunnelManager } from '../services/ngrok/tunnelManager.js'; import { probeLocalPort, probeTunnelHealth } from '../utils/localReachability.js'; import { extractLocalhostPort } from '../utils/urlParser.js'; import { resolveTargetUrl, buildContext, findExistingTunnel, ensureTunnel, sanitizeResponseUrls, touchTunnelById, } from '../utils/tunnelContext.js'; import { getCachedTemplateUuid, invalidateTemplateCache } from '../utils/handlerCaches.js'; import { isTransientWorkflowError, transientReasonTag } from '../utils/transientErrors.js'; import { Telemetry, TelemetryEvents } from '../utils/telemetry.js'; const logger = new Logger({ module: 'triggerCrawlHandler' }); const TEMPLATE_KEYWORD = 'raw crawl'; // Bead kbo9: same env-driven retry budget as testPageChangesHandler (kbxy). function getMaxTransientRetries() { const raw = process.env.DEBUGGAI_TRANSIENT_RETRIES; if (raw === undefined || raw === '') return 1; const n = parseInt(raw, 10); if (!Number.isFinite(n) || n < 0) return 1; return Math.min(n, 3); } export async function triggerCrawlHandler(input, context, rawProgressCallback) { const startTime = Date.now(); logger.toolStart('trigger_crawl', input); // Bead 0bq: progress circuit-breaker — see testPageChangesHandler for rationale. let progressDisabled = false; const progressCallback = rawProgressCallback ? async (update) => { if (progressDisabled) return; try { await rawProgressCallback(update); } catch (err) { progressDisabled = true; logger.warn('Progress emission failed; disabling further emissions for this request', { error: err instanceof Error ? err.message : String(err), }); } } : undefined; const client = new DebuggAIServerClient(config.api.key); await client.init(); const originalUrl = resolveTargetUrl(input); let ctx = buildContext(originalUrl); let keyId; const abortController = new AbortController(); const onStdinClose = () => { abortController.abort(); progressDisabled = true; }; process.stdin.once('close', onStdinClose); try { // --- Tunnel: reuse existing or provision a fresh one --- if (ctx.isLocalhost) { // Bead 1om: pre-flight local port probe BEFORE provision/ngrok/backend. const localPort = extractLocalhostPort(ctx.originalUrl); if (typeof localPort === 'number') { const probe = await probeLocalPort(localPort); if (!probe.reachable) { const payload = { error: 'LocalServerUnreachable', message: `No server listening on 127.0.0.1:${localPort}. Start your dev server on that port before running trigger_crawl. Probe result: ${probe.code} (${probe.detail ?? 'no detail'}).`, detail: { port: localPort, probeCode: probe.code, probeDetail: probe.detail, elapsedMs: probe.elapsedMs }, }; logger.warn(`Pre-flight port probe failed for ${ctx.originalUrl}: ${probe.code} in ${probe.elapsedMs}ms`); return { content: [{ type: 'text', text: JSON.stringify(payload, null, 2) }], isError: true }; } } if (config.devMode) { // Dev mode: local backend can reach localhost directly — no tunnel needed. logger.info(`trigger_crawl: dev mode — using localhost URL directly: ${ctx.originalUrl}`); } else { if (progressCallback) { await progressCallback({ progress: 1, total: 4, message: 'Provisioning secure tunnel for localhost...' }); } const reused = findExistingTunnel(ctx); if (reused) { ctx = reused; } else { let tunnel; try { tunnel = await client.tunnels.provisionWithRetry(); } catch (provisionError) { const msg = provisionError instanceof Error ? provisionError.message : String(provisionError); const diag = provisionError instanceof TunnelProvisionError ? ` ${provisionError.diagnosticSuffix()}` : ''; throw new Error(`Failed to provision tunnel for ${ctx.originalUrl}. ` + `The remote browser needs a secure tunnel to reach your local dev server. ` + `(Detail: ${msg})${diag}`); } keyId = tunnel.keyId; ctx = await ensureTunnel(ctx, tunnel.tunnelKey, tunnel.tunnelId, tunnel.keyId, () => client.revokeNgrokKey(tunnel.keyId)); } // Bead 1om: post-tunnel health check — verify traffic actually flows. if (ctx.targetUrl) { const health = await probeTunnelHealth(ctx.targetUrl); if (!health.healthy) { const payload = { error: 'TunnelTrafficBlocked', message: `Tunnel was established but traffic isn't reaching the dev server. ${health.detail ?? ''} Common causes: dev server binds to 0.0.0.0 or ::1 but not 127.0.0.1; dev server crashed; firewall.`, detail: { code: health.code, status: health.status, ngrokErrorCode: health.ngrokErrorCode, elapsedMs: health.elapsedMs, }, }; logger.warn(`Tunnel health probe failed for ${ctx.targetUrl}: ${health.code} ${health.ngrokErrorCode ?? ''} in ${health.elapsedMs}ms`); if (ctx.tunnelId) { tunnelManager.stopTunnel(ctx.tunnelId).catch((err) => logger.warn(`Failed to stop broken tunnel ${ctx.tunnelId}: ${err}`)); } keyId = undefined; return { content: [{ type: 'text', text: JSON.stringify(payload, null, 2) }], isError: true }; } } } } // --- Find the crawl workflow template (cached across calls) --- if (progressCallback) { await progressCallback({ progress: 2, total: 4, message: 'Locating crawl workflow template...' }); } const templateUuid = await getCachedTemplateUuid(TEMPLATE_KEYWORD, async (name) => { return client.workflows.findTemplateByName(name); }); if (!templateUuid) { throw new Error(`Raw Crawl Workflow Template not found. ` + `Ensure the backend has a template matching "${TEMPLATE_KEYWORD}" seeded and accessible.`); } // --- Build contextData + env --- const contextData = { targetUrl: ctx.targetUrl ?? ctx.originalUrl, }; if (input.projectUuid) contextData.projectId = input.projectUuid; contextData.headless = true; // D7: the MCP always runs headless — no opt-out. if (typeof input.timeoutSeconds === 'number') contextData.timeoutSeconds = input.timeoutSeconds; const env = {}; if (input.environmentId) env.environmentId = input.environmentId; if (input.credentialId) env.credentialId = input.credentialId; if (input.credentialRole) env.credentialRole = input.credentialRole; if (input.username) env.username = input.username; if (input.password) env.password = input.password; // --- Execute --- if (progressCallback) { await progressCallback({ progress: 3, total: 4, message: 'Queuing crawl execution...' }); } // --- Execute + Poll (with bounded retry on transient errors, bead kbo9) --- const TERMINAL_STATUSES = new Set(['completed', 'failed', 'cancelled']); const MAX_RETRIES = getMaxTransientRetries(); let executeResponse; let executionUuid = ''; let finalExecution; let attempt = 0; while (true) { attempt++; if (attempt > 1) { Telemetry.capture(TelemetryEvents.WORKFLOW_TRANSIENT_RETRY, { tool: 'trigger_crawl', attempt, reason: transientReasonTag(finalExecution), previousExecutionId: executionUuid, previousErrorMessage: finalExecution?.errorMessage?.slice(0, 200), previousStateError: finalExecution?.state?.error?.slice(0, 200), }); if (progressCallback) { await progressCallback({ progress: 3, total: 4, message: `Transient backend error — retrying crawl (attempt ${attempt}/${MAX_RETRIES + 1})...`, }); } await new Promise(r => setTimeout(r, 1000 * (attempt - 1))); } executeResponse = await client.workflows.executeWorkflow(templateUuid, contextData, Object.keys(env).length > 0 ? env : undefined); executionUuid = executeResponse.executionUuid; logger.info(`Crawl execution queued: ${executionUuid}${attempt > 1 ? ` (retry ${attempt - 1}/${MAX_RETRIES})` : ''}`); // --- Poll --- // Bead 0bq: emit the final progress (4/4 "Complete:...") INSIDE onUpdate // when terminal status detected, so there's no post-resolve emission that // could race the response and cause stale-progressToken transport tear-down. finalExecution = await client.workflows.pollExecution(executionUuid, async (exec) => { if (ctx.tunnelId) touchTunnelById(ctx.tunnelId); if (!progressCallback) return; const nodeCount = (exec.nodeExecutions ?? []).length; if (TERMINAL_STATUSES.has(exec.status)) { await progressCallback({ progress: 4, total: 4, message: `Crawl ${exec.status} (${nodeCount} nodes)`, }); return; } await progressCallback({ progress: 4, total: 4, message: `Crawl ${exec.status} (${nodeCount} nodes)`, }); }, abortController.signal); if (attempt > MAX_RETRIES) break; if (!isTransientWorkflowError(finalExecution)) break; logger.warn(`Transient backend error detected on crawl (${transientReasonTag(finalExecution) ?? 'unknown'}) — ` + `retrying (attempt ${attempt + 1}/${MAX_RETRIES + 1})`); } const duration = Date.now() - startTime; const nodes = finalExecution.nodeExecutions ?? []; // --- Format response --- const responsePayload = { executionId: executionUuid, status: finalExecution.status, targetUrl: ctx.originalUrl, durationMs: finalExecution.durationMs ?? duration, }; const outcome = finalExecution.state?.outcome; if (outcome !== undefined && outcome !== null) responsePayload.outcome = outcome; if (finalExecution.errorMessage) responsePayload.errorMessage = finalExecution.errorMessage; if (finalExecution.errorInfo?.failedNodeId) responsePayload.failedNode = finalExecution.errorInfo.failedNodeId; if (executeResponse.resolvedEnvironmentId) responsePayload.resolvedEnvironmentId = executeResponse.resolvedEnvironmentId; if (executeResponse.resolvedCredentialId) responsePayload.resolvedCredentialId = executeResponse.resolvedCredentialId; // Backend release 2026-04-25: browser_session block on execution detail. // Crawl runs through the same browser pipeline, so the field is populated // here too. Pass through verbatim (presigned S3 URLs). if (finalExecution.browserSession) { responsePayload.browserSession = finalExecution.browserSession; } // Extract crawl metrics from surfer.crawl node (absent in older graph shapes) const crawlNode = nodes.find(n => n.nodeType === 'surfer.crawl'); if (crawlNode?.outputData) { const d = crawlNode.outputData; responsePayload.crawlSummary = { pagesDiscovered: d.pagesDiscovered, actionsExecuted: d.actionsExecuted, stepsTaken: d.stepsTaken, transitionsRecorded: d.transitionsRecorded, knowledgeGraphStates: d.knowledgeGraphStates, success: d.success, ...(d.error ? { error: d.error } : {}), }; } // Extract KG import result from knowledge_graph.import node (absent in older graph shapes) const kgNode = nodes.find(n => n.nodeType === 'knowledge_graph.import'); if (kgNode?.outputData) { const d = kgNode.outputData; responsePayload.knowledgeGraph = { imported: !d.skipped, skipped: d.skipped ?? false, reason: d.reason ?? '', edgesImported: d.edgesImported ?? 0, statesImported: d.statesImported ?? 0, knowledgeGraphId: d.knowledgeGraphId ?? '', ...(Array.isArray(d.importErrors) && d.importErrors.length > 0 ? { importErrors: d.importErrors } : {}), }; } logger.toolComplete('trigger_crawl', duration); // Bead 0bq: final progress is emitted INSIDE pollExecution's onUpdate when // terminal status is detected. Emitting it here would race the response // and could cause stale-progressToken transport tear-down. const sanitizedPayload = sanitizeResponseUrls(responsePayload, ctx); return { content: [{ type: 'text', text: JSON.stringify(sanitizedPayload, null, 2) }], }; } catch (error) { const duration = Date.now() - startTime; logger.toolError('trigger_crawl', error, duration); if (error instanceof Error && (error.message.includes('not found') || error.message.includes('401'))) { invalidateTemplateCache(); } throw handleExternalServiceError(error, 'DebuggAI', 'crawl execution'); } finally { process.stdin.removeListener('close', onStdinClose); // Tunnel intentionally NOT torn down (reuse path per bead vwd). // If tunnel creation failed after key provision, revoke the orphaned key. if (!ctx.tunnelId && keyId) { client.revokeNgrokKey(keyId).catch(err => logger.warn(`Failed to revoke unused ngrok key ${keyId}: ${err}`)); } } }