UNPKG

n8n

Version:

n8n Workflow Automation Tool

206 lines • 9.79 kB

JavaScript

"use strict"; var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) { var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d; if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc); else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r; return c > 3 && r && Object.defineProperty(target, key, r), r; }; var __metadata = (this && this.__metadata) || function (k, v) { if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v); }; Object.defineProperty(exports, "__esModule", { value: true }); exports.EvalInsightsService = exports.DETERMINISTIC_MODEL_TAG = void 0; const api_types_1 = require("@n8n/api-types"); const backend_common_1 = require("@n8n/backend-common"); const db_1 = require("@n8n/db"); const di_1 = require("@n8n/di"); const bad_request_error_1 = require("../../errors/response-errors/bad-request.error"); const forbidden_error_1 = require("../../errors/response-errors/forbidden.error"); const not_found_error_1 = require("../../errors/response-errors/not-found.error"); const telemetry_1 = require("../../telemetry"); exports.DETERMINISTIC_MODEL_TAG = 'deterministic'; let EvalInsightsService = class EvalInsightsService { constructor(collectionRepo, licenseState, telemetry, logger) { this.collectionRepo = collectionRepo; this.licenseState = licenseState; this.telemetry = telemetry; this.logger = logger; } async generateInsights(user, workflowId, collectionId, options = {}) { if (!this.licenseState.isAiAssistantLicensed()) { throw new forbidden_error_1.ForbiddenError('AI Assistant license required for eval-collection insights'); } const detail = await this.collectionRepo.getDetailByIdAndWorkflowId(collectionId, workflowId); if (!detail) { throw new not_found_error_1.NotFoundError('Collection not found'); } if (!options.forceRegenerate && detail.collection.insightsCache) { const cached = detail.collection.insightsCache; const parsed = api_types_1.aiInsightsResponseSchema.safeParse(cached); if (parsed.success) return parsed.data; this.logger.warn('Cached insights failed schema validation; regenerating', { collectionId, }); } const summaries = []; detail.runs.forEach((run, originalIndex) => { if (run.status === 'completed' && run.metrics) { summaries.push(this.summariseRun(run, originalIndex)); } }); if (summaries.length < 2) { throw new bad_request_error_1.BadRequestError('Collection needs at least 2 completed runs with metrics before insights can be generated'); } const startMs = Date.now(); let response; try { const payload = await this.invokeAgent(detail.collection.name, summaries); response = { generatedAt: new Date().toISOString(), modelUsed: this.resolveModelName(), status: 'ok', insights: payload, }; } catch (error) { this.logger.debug('Insights agent unavailable; falling back to deterministic summary', { collectionId, error: error instanceof Error ? error.message : String(error), }); response = { generatedAt: new Date().toISOString(), modelUsed: exports.DETERMINISTIC_MODEL_TAG, status: 'fallback', insights: this.buildDeterministicInsights(summaries), }; } await this.collectionRepo.updateInsightsCache(collectionId, response); this.telemetry.track('Eval collection insights generated', { user_id: user.id, workflow_id: workflowId, collection_id: collectionId, model_used: response.modelUsed, duration_ms: Date.now() - startMs, status: response.status, regressions_found: response.insights.regressions.length, }); return response; } summariseRun(run, index) { const metrics = this.coerceMetrics(run.metrics); const avg = this.averageScore(metrics); const versionLabel = String.fromCharCode(0x41 + index); return { versionLabel, workflowVersionId: run.workflowVersionId, avgScore: avg, metrics, }; } averageScore(metrics) { const values = Object.values(metrics); if (values.length === 0) return null; return values.reduce((sum, v) => sum + v, 0) / values.length; } coerceMetrics(metrics) { if (!metrics) return {}; const out = {}; for (const [k, v] of Object.entries(metrics)) { if (typeof v === 'number') out[k] = v; else if (typeof v === 'boolean') out[k] = v ? 1 : 0; } return out; } async invokeAgent(_collectionName, _summaries) { throw new Error('LLM agent not yet wired — fallback path will produce the response'); } resolveModelName() { return 'unknown'; } buildDeterministicInsights(summaries) { const scored = summaries.filter((s) => s.avgScore !== null); if (scored.length === 0) { return { winner: { versionLabel: summaries[0]?.versionLabel ?? 'A', headline: 'No scored runs', body: 'No runs in this collection produced numeric metrics yet.', }, regressions: [], suggestedNext: { headline: 'Re-run with metric outputs configured', body: 'Add evaluation set-metrics nodes to the workflow so insights can compare runs.', hypothesis: 'Without numeric metrics there is nothing to compare across versions.', }, }; } const winner = scored.reduce((best, s) => (s.avgScore > best.avgScore ? s : best)); const regressions = this.collectRegressions(scored, winner); const suggestedNext = this.composeSuggestedNext(winner, regressions); return { winner: { versionLabel: winner.versionLabel, headline: `${winner.versionLabel} is the winner`, body: `${winner.versionLabel} leads on average score (${this.formatScore(winner.avgScore)}) across ${Object.keys(winner.metrics).length} metric(s).`, }, regressions, suggestedNext, }; } collectRegressions(scored, winner) { const REGRESSION_DELTA_THRESHOLD = 0.1; const regressions = []; for (const run of scored) { if (run.versionLabel === winner.versionLabel) continue; for (const [metric, winnerScore] of Object.entries(winner.metrics)) { const runScore = run.metrics[metric]; if (typeof runScore !== 'number') continue; const delta = runScore - winnerScore; if (delta >= -REGRESSION_DELTA_THRESHOLD) continue; regressions.push({ versionLabel: run.versionLabel, metric, delta: Number((delta * 100).toFixed(1)), headline: `${run.versionLabel} regressed on ${metric}`, body: `${run.versionLabel} scored ${this.formatScore(runScore)} on ${metric}, ${this.formatScore(Math.abs(delta * 100))} percentage points below ${winner.versionLabel}.`, }); } } return regressions; } composeSuggestedNext(winner, regressions) { if (regressions.length === 0) { return { headline: `Lock in ${winner.versionLabel} as the baseline`, body: `No version regressed sharply against ${winner.versionLabel}. Promote it and use it as the comparison baseline for future experiments.`, hypothesis: `${winner.versionLabel} is a solid starting point; further gains require new variants rather than fixing regressions.`, }; } const worst = regressions.reduce((w, r) => (r.delta < w.delta ? r : w)); return { headline: `Investigate ${worst.metric} regression on ${worst.versionLabel}`, body: `${worst.versionLabel} lost ${Math.abs(worst.delta).toFixed(1)} percentage points on ${worst.metric} vs ${winner.versionLabel}. Try a variant that keeps ${winner.versionLabel}'s configuration for that metric and changes only the rest.`, hypothesis: `If isolating ${winner.versionLabel}'s ${worst.metric} configuration into a new variant recovers the lost points, the rest of ${worst.versionLabel}'s changes were the regression's cause.`, }; } formatScore(score) { return score.toFixed(2); } }; exports.EvalInsightsService = EvalInsightsService; exports.EvalInsightsService = EvalInsightsService = __decorate([ (0, di_1.Service)(), __metadata("design:paramtypes", [db_1.EvaluationCollectionRepository, backend_common_1.LicenseState, telemetry_1.Telemetry, backend_common_1.Logger]) ], EvalInsightsService); //# sourceMappingURL=eval-insights.service.js.map