@hashgraph/solo

// SPDX-License-Identifier: Apache-2.0 import AdmZip from 'adm-zip'; import fs from 'node:fs'; import path from 'node:path'; import chalk from 'chalk'; import * as constants from '../../core/constants.js'; import {PathEx} from '../../business/utils/path-ex.js'; import {type SoloLogger} from '../../core/logging/solo-logger.js'; const {green, yellow} = chalk; /** * Severity-ordered categories for diagnostics findings. * * Ordering (lowest value = highest severity in the report): * 1. image-pull — container image could not be pulled; pod will never start. * 2. oom — container was killed by the kernel due to memory exhaustion. * 3. pod-readiness — pod is not Running or its readiness probe is failing. * 4. consensus-active — consensus node did not reach ACTIVE platform status. * 5. log-exception — an exception/stack-trace was found in an application log. * 6. app-error — an ERROR line was found in a pod's raw container log. */ export type DiagnosticsFindingCategory = | 'image-pull' | 'oom' | 'pod-readiness' | 'consensus-active' | 'log-exception' | 'app-error'; /** A single detected problem with its supporting evidence lines. */ export type DiagnosticsFinding = { category: DiagnosticsFindingCategory; title: string; /** Relative path of the source file (or "archive:entry") that triggered this finding. */ source: string; /** Up to 14 verbatim lines from the source that match the failure pattern. */ evidence: string[]; }; type ConsensusLogDefinition = { entrySuffix: 'output/swirlds.log' | 'output/hgcaa.log'; displayName: 'swirlds.log' | 'hgcaa.log'; checkConsensusActive: boolean; }; /** * DiagnosticsAnalyzer scans a previously-collected diagnostics output directory * (produced by `deployment diagnostics logs`) and identifies common failure * signatures without requiring a live cluster connection. * * ## Input sources * * ### 1. Solo CLI log (`solo.log`) * The Solo CLI's own Pino log file (`~/.solo/logs/solo.log` by default, or * `solo.log` found recursively under `customOutputDirectory`). Lines * matching `] ERROR:` are captured as `app-error` findings. ANSI escape * codes and `[traceId="..."]` suffixes are stripped before matching. * * ### 2. Pod describe files (`*.describe.txt`) * Written by `downloadHieroComponentLogs()` for every pod across all clusters. * These are the output of `kubectl describe pod <name> -n <namespace>` and * contain the pod's status, container states, events, and resource usage. * * Detectable errors: * * | Category | Detected keywords / conditions | * |-----------------|----------------------------------------------------------------------------------------| * | `image-pull` | `ErrImagePull`, `ImagePullBackOff`, `Back-off pulling image`, | * | | `failed to pull and unpack image`, `unexpected EOF` (truncated layer), | * | | `toomanyrequests`, `rate limit exceeded`, `429 Too Many Requests` | * | `oom` | `OOMKilled`, `out of memory`, `reason: OOMKilled` | * | `pod-readiness` | Pod `Status` field is not `Running`, or `Ready: False` is present in container status; | * | | supporting `Reason:` / `Message:` lines are captured as evidence | * * ### 2. Consensus node log archives (`*-log-config.zip`) * Written by `getNodeLogsAndConfigs()` under `~/.solo/logs/<namespace>/`. * Each zip contains the node's log and config snapshot. Only two log files * inside the archive are inspected: * * - `output/swirlds.log` — Hashgraph platform log * - `output/hgcaa.log` — Hedera application log * * Detectable errors: * * | Category | Detected keywords / conditions | * |--------------------|-------------------------------------------------------------------------------------| * | `consensus-active` | `swirlds.log` never contains the word `ACTIVE` — the node stalled during | * | | startup (e.g. stuck in `STARTING_UP`, `OBSERVING`, or `REPLAYING_EVENTS`); | * | | status-transition lines are captured as evidence | * | `log-exception` | Any line in `swirlds.log` or `hgcaa.log` matching `Exception`, `Error`, | * | | or `Caused by:` — the first matching stack-trace block (up to 14 lines) is | * | | captured as evidence | * * ## Output * All findings are written to `diagnostics-analysis.txt` inside the input * directory. Up to 10 findings are also printed to the terminal in severity * order. Duplicate findings (same category + title + source) are suppressed. */ export class DiagnosticsAnalyzer { private static readonly CONSENSUS_LOG_DEFINITIONS: readonly ConsensusLogDefinition[] = [ {entrySuffix: 'output/swirlds.log', displayName: 'swirlds.log', checkConsensusActive: true}, {entrySuffix: 'output/hgcaa.log', displayName: 'hgcaa.log', checkConsensusActive: false}, ]; public constructor(private readonly logger: SoloLogger) {} /** * Run the full analysis against `customOutputDirectory` (or the default * `~/.solo/logs/hiero-components-logs` when empty). * * Consensus node zip archives are looked up under * `~/.solo/logs/<namespaceName>/` when `namespaceName` is provided, or * directly under `~/.solo/logs/` otherwise. */ public analyze(customOutputDirectory: string, namespaceName: string | undefined): void { const hieroOutputDirectory: string = customOutputDirectory ? path.resolve(customOutputDirectory) : PathEx.join(constants.SOLO_LOGS_DIR, 'hiero-components-logs'); const findings: DiagnosticsFinding[] = []; this.logger.showUser(`Scanning directory: ${hieroOutputDirectory}`); if (fs.existsSync(hieroOutputDirectory)) { this.analyzeDescribeFiles(hieroOutputDirectory, findings); } else { this.logger.showUser(yellow(` Pod describe directory not found, skipping: ${hieroOutputDirectory}`)); } let consensusArchiveDirectory: string = constants.SOLO_LOGS_DIR; if (customOutputDirectory) { consensusArchiveDirectory = path.resolve(customOutputDirectory); } else if (namespaceName) { consensusArchiveDirectory = PathEx.join(constants.SOLO_LOGS_DIR, namespaceName); } if (fs.existsSync(consensusArchiveDirectory)) { this.analyzeConsensusNodeArchives(consensusArchiveDirectory, findings); } else { this.logger.showUser(yellow(` Consensus archive directory not found, skipping: ${consensusArchiveDirectory}`)); } if (fs.existsSync(hieroOutputDirectory)) { this.analyzePodLogFiles(hieroOutputDirectory, findings); } if (fs.existsSync(hieroOutputDirectory)) { this.analyzeSoloLogFiles(hieroOutputDirectory, customOutputDirectory, findings); } else { this.logger.showUser(yellow(` Diagnostics output directory not found, skipping: ${hieroOutputDirectory}`)); } if (!fs.existsSync(hieroOutputDirectory)) { fs.mkdirSync(hieroOutputDirectory, {recursive: true}); } const reportPath: string = PathEx.join(hieroOutputDirectory, 'diagnostics-analysis.txt'); this.logger.showUser(`Writing report to: ${reportPath}`); const reportText: string = this.renderDiagnosticsFindings(findings); fs.writeFileSync(reportPath, reportText, 'utf8'); if (findings.length > 0) { this.logger.showUser( yellow( `Detected ${findings.length} potential issue(s) from diagnostics logs. Summary written to ${reportPath}`, ), ); for (const [index, finding] of findings.slice(0, 10).entries()) { this.logger.showUser(`${index + 1}. ${finding.title} [${finding.source}]`); if (finding.evidence.length > 0) { const maxEvidenceLines: number = finding.category === 'log-exception' ? 8 : 4; for (const evidenceLine of finding.evidence.slice(0, maxEvidenceLines)) { this.logger.showUser(` - ${evidenceLine}`); } if (finding.evidence.length > maxEvidenceLines) { this.logger.showUser( ` ... and ${finding.evidence.length - maxEvidenceLines} more evidence line(s) in diagnostics-analysis.txt`, ); } } } if (findings.length > 10) { this.logger.showUser(`... and ${findings.length - 10} more. See diagnostics-analysis.txt for details.`); } } else { this.logger.showUser(green(`No common failure signatures detected. Report: ${reportPath}`)); } } /** * Recursively scans `rootDirectory` for `*.describe.txt` files (one per pod) * and checks each for image-pull failures, OOM kills, and pod-readiness * problems. * * Detected errors: * - `image-pull` ErrImagePull / ImagePullBackOff / rate-limit / unexpected EOF * - `oom` OOMKilled / out of memory * - `pod-readiness` Status != Running OR Ready: False */ private analyzeDescribeFiles(rootDirectory: string, findings: DiagnosticsFinding[]): void { const describeFiles: string[] = this.collectFilesRecursively(rootDirectory, (filePath: string): boolean => filePath.endsWith('.describe.txt'), ); // Matches any image-pull error surfaced in `kubectl describe pod` output. // Covers: // - ErrImagePull / ImagePullBackOff (standard Kubernetes pull errors) // - "Back-off pulling image" (CRI back-off message in Events) // - "failed to pull and unpack image" (containerd error) // - "unexpected EOF" (truncated layer download) // - toomanyrequests / rate limit exceeded / 429 Too Many Requests // (Docker Hub and other registries throttle anonymous pulls) const imagePullPattern: RegExp = /ErrImagePull|ImagePullBackOff|Back-off pulling image|failed to pull and unpack image|unexpected EOF|toomanyrequests|rate limit exceeded|429 Too Many Requests/i; // Matches out-of-memory kills. // "OOMKilled" appears in the container's LastTerminationState and in Events. // "reason: OOMKilled" is the structured field in the container status JSON. const oomPattern: RegExp = /OOMKilled|out of memory|reason:\s*OOMKilled/i; this.logger.showUser(` Found ${describeFiles.length} pod describe file(s)`); for (const describeFile of describeFiles) { const relatedPath: string = path.relative(rootDirectory, describeFile); this.logger.showUser(` Reading: ${relatedPath}`); let content: string; try { content = fs.readFileSync(describeFile, 'utf8'); } catch (error) { this.logger.showUser(yellow(` Unable to read describe file ${relatedPath}: ${(error as Error).message}`)); continue; } const podName: string = path.basename(describeFile, '.describe.txt'); const source: string = path.relative(rootDirectory, describeFile); if (imagePullPattern.test(content)) { this.addDiagnosticsFinding(findings, { category: 'image-pull', title: `Image pull failure detected for pod ${podName}`, source, evidence: this.extractMatchSnippets(content, imagePullPattern, 8), }); } if (oomPattern.test(content)) { this.addDiagnosticsFinding(findings, { category: 'oom', title: `OOM-related failure detected for pod ${podName}`, source, evidence: this.extractMatchSnippets(content, oomPattern, 6), }); } // A pod is unhealthy if its top-level status is anything other than // "Running" or if any container is not ready. // // Two file formats are possible depending on how the describe file was // collected: // - Text format (kubectl describe pod): "Status: Pending" // "Ready: False" // - YAML format (kubectl get pod -o yaml): "phase: Pending" // "ready: false" // // Both are matched so the check is format-agnostic. // Reason: / Message: / reason: / message: lines (case-insensitive) are // captured for additional context. const statusMatch: RegExpMatchArray = content.match(/^\s*(?:Status|phase):\s+([^\n]+)/m); const status: string = statusMatch?.[1]?.trim().replaceAll(/^"|"$/g, '') ?? ''; const readyFalse: boolean = /^\s*[Rr]eady:\s+[Ff]alse\b/m.test(content); if ((status && status !== constants.POD_PHASE_RUNNING) || readyFalse) { const evidence: string[] = []; if (status) { evidence.push(`Status: ${status}`); } if (readyFalse) { evidence.push('Ready: False'); } evidence.push(...this.extractMatchSnippetsJoiningContinuations(content, /^\s*(Reason|Message):\s+/i, 8)); this.addDiagnosticsFinding(findings, { category: 'pod-readiness', title: `Pod not ready/running: ${podName}`, source, evidence, }); } } } /** * Recursively scans `rootDirectory` for `*.log` pod log files and checks each * for application-level ERROR lines (category: `app-error`). * * These are the raw container logs downloaded by `downloadHieroComponentLogs()` * alongside the `*.describe.txt` files. Each file is scanned for lines * containing `ERROR` and the first matching block (up to 8 lines) is captured. */ private analyzePodLogFiles(rootDirectory: string, findings: DiagnosticsFinding[]): void { // Only scan logs for non-consensus components. Consensus node logs are // handled separately via the *-log-config.zip archives (which include // swirlds.log and hgcaa.log). Broad *.log would match those files too // and produce duplicate / noisy findings. const componentLogPattern: RegExp = /[\\/](?:mirror|block|relay|explorer|solo-shared)[^/\\]*\.log$/i; const logFiles: string[] = this.collectFilesRecursively(rootDirectory, (filePath: string): boolean => componentLogPattern.test(filePath), ); // Strip Docker/containerd timestamp prefix (e.g. "2026-04-06T03:24:32.470558065Z ") before matching. const errorPattern: RegExp = /\b(?:ERROR|FATAL)\b/i; this.logger.showUser(` Found ${logFiles.length} pod log file(s)`); for (const logFile of logFiles) { const relativePath: string = path.relative(rootDirectory, logFile); this.logger.showUser(` Reading: ${relativePath}`); let content: string; try { content = fs.readFileSync(logFile, 'utf8'); } catch (error) { this.logger.showUser(yellow(` Unable to read log file ${relativePath}: ${(error as Error).message}`)); continue; } // Strip leading container-runtime timestamps so the pattern matches the application log line. const strippedContent: string = content.replaceAll(/^\d{4}-\d{2}-\d{2}T[\d:.]+Z\s+/gm, ''); if (!errorPattern.test(strippedContent)) { continue; } const podName: string = path.basename(logFile, '.log'); const evidence: string[] = this.extractMatchSnippets(strippedContent, errorPattern, 8); this.addDiagnosticsFinding(findings, { category: 'app-error', title: `Application ERROR detected in pod log: ${podName}`, source: relativePath, evidence, }); } } /** * Searches for `solo.log` in `hieroOutputDirectory` (recursively) and, when * no custom output directory was specified, also checks the standard * `~/.solo/logs/solo.log` location. ERROR lines are extracted and reported * as `app-error` findings. * */ private analyzeSoloLogFiles( hieroOutputDirectory: string, customOutputDirectory: string, findings: DiagnosticsFinding[], ): void { const soloLogFiles: string[] = this.collectFilesRecursively( hieroOutputDirectory, (filePath: string): boolean => path.basename(filePath) === 'solo.log', ); // When using the default output path, the solo.log lives one level up at // ~/.solo/logs/solo.log — outside hieroOutputDirectory, so check it separately. if (!customOutputDirectory) { const defaultSoloLog: string = PathEx.join(constants.SOLO_LOGS_DIR, 'solo.log'); if (fs.existsSync(defaultSoloLog) && !soloLogFiles.includes(defaultSoloLog)) { soloLogFiles.push(defaultSoloLog); } } this.logger.showUser(` Found ${soloLogFiles.length} solo log file(s)`); const errorPattern: RegExp = /\]\s+ERROR:/; // eslint-disable-next-line no-control-regex const ansiPattern: RegExp = new RegExp('\u001B\\[[0-9;]*m', 'g'); const traceIdPattern: RegExp = /\s+\[traceId="[^"]*"\]/g; for (const soloLogFile of soloLogFiles) { const relativePath: string = path.relative(hieroOutputDirectory, soloLogFile); const sourceLabel: string = relativePath || path.basename(soloLogFile); this.logger.showUser(` Reading: ${sourceLabel}`); let content: string; try { content = fs.readFileSync(soloLogFile, 'utf8'); } catch (error) { this.logger.showUser(yellow(` Unable to read solo log ${sourceLabel}: ${(error as Error).message}`)); continue; } const cleanedContent: string = content.replaceAll(ansiPattern, '').replaceAll(traceIdPattern, ''); if (!errorPattern.test(cleanedContent)) { continue; } const evidence: string[] = this.extractSoloLogErrorBlocks(cleanedContent, 3, 14); this.addDiagnosticsFinding(findings, { category: 'app-error', title: 'ERROR detected in solo.log', source: sourceLabel, evidence, }); } } /** * Recursively scans `archiveRootDirectory` for `*-log-config.zip` archives * produced by `getNodeLogsAndConfigs()` and inspects two log files inside * each archive: * * - `output/swirlds.log` — checked for absence of the `ACTIVE` platform * status marker (category: `consensus-active`) and for exception blocks * (category: `log-exception`). * - `output/hgcaa.log` — checked for exception blocks only * (category: `log-exception`). * * Only the first exception block per log file is captured (up to 14 lines) * to keep the report readable. */ private analyzeConsensusNodeArchives(archiveRootDirectory: string, findings: DiagnosticsFinding[]): void { const archiveFiles: string[] = this.collectFilesRecursively(archiveRootDirectory, (filePath: string): boolean => filePath.endsWith('-log-config.zip'), ); this.logger.showUser(` Found ${archiveFiles.length} consensus log archive(s)`); for (const archiveFile of archiveFiles) { const archiveName: string = path.basename(archiveFile); this.logger.showUser(` Unzipping: ${archiveName}`); let archive: AdmZip; try { archive = new AdmZip(archiveFile, {readEntries: true}); } catch (error) { this.logger.showUser(yellow(` Unable to read archive ${archiveName}: ${(error as Error).message}`)); continue; } for (const entry of archive.getEntries()) { const logDefinition: ConsensusLogDefinition | undefined = this.findConsensusLogDefinition(entry.entryName); if (!logDefinition) { continue; } this.analyzeConsensusLogEntry(archiveName, entry, logDefinition, findings); } } } private findConsensusLogDefinition(entryName: string): ConsensusLogDefinition | undefined { return DiagnosticsAnalyzer.CONSENSUS_LOG_DEFINITIONS.find((logDefinition: ConsensusLogDefinition): boolean => entryName.endsWith(logDefinition.entrySuffix), ); } private analyzeConsensusLogEntry( archiveName: string, entry: AdmZip.IZipEntry, logDefinition: ConsensusLogDefinition, findings: DiagnosticsFinding[], ): void { this.logger.showUser(` Reading entry: ${entry.entryName}`); const source: string = `${archiveName}:${entry.entryName}`; const content: string = entry.getData().toString('utf8'); if (logDefinition.checkConsensusActive) { this.analyzeConsensusActiveStatus(content, source, findings); } this.analyzeExceptionBlocks(logDefinition.displayName, content, source, findings); } /** * A healthy consensus node transitions through STARTING_UP → OBSERVING → * REPLAYING_EVENTS → ACTIVE. If `ACTIVE` never appears in swirlds.log, * the node likely stalled before becoming ready for transactions. */ private analyzeConsensusActiveStatus(content: string, source: string, findings: DiagnosticsFinding[]): void { if (/\bACTIVE\b/.test(content)) { return; } const evidence: string[] = this.extractMatchSnippets( content, /PlatformStatus|status|STARTING_UP|OBSERVING|REPLAYING_EVENTS|FREEZING|ACTIVE/i, 8, ); if (evidence.length === 0) { evidence.push('No ACTIVE status marker found in swirlds.log'); } this.addDiagnosticsFinding(findings, { category: 'consensus-active', title: 'Consensus node may not have reached ACTIVE status', source, evidence, }); } /** * Captures the first exception/stack-trace block from a consensus log file. */ private analyzeExceptionBlocks( logDisplayName: ConsensusLogDefinition['displayName'], content: string, source: string, findings: DiagnosticsFinding[], ): void { const exceptionBlocks: string[] = this.extractExceptionBlocks(content, 1, 14); if (exceptionBlocks.length === 0) { return; } this.addDiagnosticsFinding(findings, { category: 'log-exception', title: `Exception detected in ${logDisplayName}`, source, evidence: exceptionBlocks[0].split('\n').filter((line: string): boolean => line.trim().length > 0), }); } /** * Adds `finding` to `findings` unless an identical entry (same category, * title, and source) already exists. Evidence lines are deduplicated and * capped at 14 entries to keep the report compact. */ private addDiagnosticsFinding(findings: DiagnosticsFinding[], finding: DiagnosticsFinding): void { const key: string = `${finding.category}|${finding.title}|${finding.source}`; const existingKeys: Set<string> = new Set( findings.map((item: DiagnosticsFinding): string => `${item.category}|${item.title}|${item.source}`), ); if (existingKeys.has(key)) { return; } findings.push({ ...finding, evidence: [...new Set(finding.evidence)].filter((line: string): boolean => line.trim().length > 0).slice(0, 14), }); } /** * Walks `rootDirectory` recursively and returns all file paths for which * `matcher` returns `true`. */ private collectFilesRecursively(rootDirectory: string, matcher: (filePath: string) => boolean): string[] { const files: string[] = []; const visit: (directory: string) => void = (directory: string): void => { const entries: fs.Dirent[] = fs.readdirSync(directory, {withFileTypes: true}); for (const entry of entries) { const entryPath: string = path.join(directory, entry.name); if (entry.isDirectory()) { visit(entryPath); continue; } if (entry.isFile() && matcher(entryPath)) { files.push(entryPath); } } }; visit(rootDirectory); return files; } /** * Extracts up to `maxBlocks` ERROR blocks from a solo.log file. * * Each block starts on a line matching `] ERROR:` and continues while * subsequent lines are indented (part of the Pino `err:` object dump). * A new log entry — any line starting with `[HH:MM:SS` — terminates the * current block. Each block is capped at `maxLinesPerBlock` lines. * * Evidence lines are returned flat (one string per line) in * `"line <N>: <content>"` format so they render consistently with other * findings. */ private extractSoloLogErrorBlocks(content: string, maxBlocks: number, maxLinesPerBlock: number): string[] { const lines: string[] = content.split(/\r?\n/); const errorPattern: RegExp = /\]\s+ERROR:/; // New Pino log entries start with a bracketed timestamp, e.g. "[17:25:23.788]" const newEntryPattern: RegExp = /^\[\d{2}:\d{2}:\d{2}\.\d{3}]/; const evidence: string[] = []; let blocksCollected: number = 0; for (let index: number = 0; index < lines.length && blocksCollected < maxBlocks; index++) { if (!errorPattern.test(lines[index])) { continue; } const blockLines: string[] = [`line ${index + 1}: ${lines[index].trim()}`]; let next: number = index + 1; while (next < lines.length && blockLines.length < maxLinesPerBlock) { const nextLine: string = lines[next]; // Stop at the next log entry or a blank line that precedes one if (newEntryPattern.test(nextLine)) { break; } if (nextLine.trim().length > 0) { blockLines.push(`line ${next + 1}: ${nextLine.trim()}`); } next++; } evidence.push(...blockLines); blocksCollected++; index = next - 1; } return evidence; } /** * Returns up to `maxMatches` lines from `content` that match `pattern`, * formatted as `"line <N>: <trimmed line>"`. * * The global (`g`) flag is stripped before matching so the RegExp lastIndex * does not interfere with repeated calls against the same pattern instance. */ private extractMatchSnippets(content: string, pattern: RegExp, maxMatches: number): string[] { const snippets: string[] = []; const lines: string[] = content.split(/\r?\n/); const normalizedFlags: string = pattern.flags.includes('g') ? pattern.flags.replaceAll('g', '') : pattern.flags; const matcher: RegExp = new RegExp(pattern.source, normalizedFlags); for (const [index, line] of lines.entries()) { if (matcher.test(line)) { snippets.push(`line ${index + 1}: ${line.trim()}`); if (snippets.length >= maxMatches) { break; } } } return snippets; } /** * Like {@link extractMatchSnippets} but joins indented continuation lines * (YAML/kubectl-describe multi-line values) into a single evidence entry. * * When a matching key line is found, any immediately following lines whose * leading whitespace is strictly greater than the key line's indentation are * appended (space-separated) before the snippet is recorded. This collapses * a multi-line `message:` value into one readable line instead of surfacing * only the truncated first line. */ private extractMatchSnippetsJoiningContinuations(content: string, pattern: RegExp, maxMatches: number): string[] { const snippets: string[] = []; const lines: string[] = content.split(/\r?\n/); const normalizedFlags: string = pattern.flags.includes('g') ? pattern.flags.replaceAll('g', '') : pattern.flags; const matcher: RegExp = new RegExp(pattern.source, normalizedFlags); for (let index: number = 0; index < lines.length && snippets.length < maxMatches; index++) { const line: string = lines[index]; if (!matcher.test(line)) { continue; } const keyIndent: number = (line.match(/^(\s*)/)?.[1] ?? '').length; let joined: string = line.trim(); // Absorb continuation lines that are indented more than the key line. let next: number = index + 1; while (next < lines.length) { const nextLine: string = lines[next]; if (nextLine.trim().length === 0) { break; } const nextIndent: number = (nextLine.match(/^(\s*)/)?.[1] ?? '').length; if (nextIndent <= keyIndent) { break; } joined += ' ' + nextLine.trim(); next++; } snippets.push(`line ${index + 1}: ${joined}`); } return snippets; } /** * Extracts up to `maxBlocks` exception/stack-trace blocks from `content`. * * A block starts on any line matching `Exception`, `Error`, or `Caused by:` * and continues as long as subsequent lines are stack frames (`at …`), * chained causes (`Caused by:`), or truncation markers (`… N more`). * Each block is capped at `maxLinesPerBlock` lines. */ private extractExceptionBlocks(content: string, maxBlocks: number, maxLinesPerBlock: number): string[] { const lines: string[] = content.split(/\r?\n/); const blocks: string[] = []; const timestampPattern: RegExp = /^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}/; const exceptionTypeLinePattern: RegExp = /^\s*(?:[a-z_][A-Za-z0-9_$]*\.)*[A-Z][A-Za-z0-9_$]*(?:Exception|Error|Throwable)(?::|\b)/; const startPattern: RegExp = new RegExp( String.raw`${exceptionTypeLinePattern.source}|\b(?:Exception|Error)\b|^\s*Caused by:`, ); // Matches only the severity levels that indicate a real error. const errorLevelPattern: RegExp = /\b(?:ERROR|FATAL|SEVERE)\b/i; for (let index: number = 0; index < lines.length && blocks.length < maxBlocks; index++) { if (!startPattern.test(lines[index])) { continue; } // Look back up to 5 lines to find the nearest timestamped log line and // determine its severity. Stack traces following a WARN/INFO/DEBUG line // are expected (e.g. FileAlreadyExistsException on a WARN archive attempt) // and must not be reported as findings. let precedingIsError: boolean = false; let precedingLogLine: string = ''; for (let scan: number = index - 1; scan >= 0 && scan >= index - 5; scan--) { if (timestampPattern.test(lines[scan])) { precedingLogLine = lines[scan]; precedingIsError = errorLevelPattern.test(lines[scan]); break; } } // If the nearest timestamped line exists and is not an error level, skip. if (precedingLogLine && !precedingIsError) { continue; } const blockLines: string[] = [lines[index]]; // In swirlds/hgcaa logs, the actual throwable class line can follow a // timestamped ERROR marker line. Include that marker line as context. if ( index > 0 && blockLines.length < maxLinesPerBlock && (/\bERROR\s+EXCEPTION\b/i.test(lines[index - 1]) || (timestampPattern.test(lines[index - 1]) && errorLevelPattern.test(lines[index - 1]))) && !blockLines.includes(lines[index - 1]) ) { blockLines.unshift(lines[index - 1]); } let next: number = index + 1; while (next < lines.length && blockLines.length < maxLinesPerBlock) { const line: string = lines[next]; if (line.trim().length === 0 || timestampPattern.test(line)) { break; } if ( /^\s+at\s+/.test(line) || /^\s*Caused by:/.test(line) || /^\s*Suppressed:/.test(line) || /^\s*\.\.\.\s+\d+\s+more/.test(line) || exceptionTypeLinePattern.test(line) ) { blockLines.push(line); next++; continue; } break; } blocks.push(blockLines.join('\n')); index = next - 1; } return blocks; } /** * Renders all findings into a human-readable plain-text report, sorted by * severity (image-pull → oom → pod-readiness → consensus-active → * log-exception). Returns the report as a string ready to be written to * `diagnostics-analysis.txt`. */ private renderDiagnosticsFindings(findings: DiagnosticsFinding[]): string { const severityOrder: Record<DiagnosticsFindingCategory, number> = { 'image-pull': 1, oom: 2, 'pod-readiness': 3, 'consensus-active': 4, 'log-exception': 5, 'app-error': 6, }; const categoryLabel: Record<DiagnosticsFindingCategory, string> = { 'image-pull': 'Image Pull', oom: 'Out Of Memory', 'pod-readiness': 'Pod Readiness', 'consensus-active': 'Consensus Active State', 'log-exception': 'Exception Stack', 'app-error': 'Application Error', }; const lines: string[] = ['Solo Diagnostics Analysis Report', `Generated: ${new Date().toISOString()}`, '']; if (findings.length === 0) { lines.push('No common failure signatures were detected.'); return lines.join('\n'); } const orderedFindings: DiagnosticsFinding[] = []; for (const finding of findings) { let insertionIndex: number = orderedFindings.length; for (const [index, existingFinding] of orderedFindings.entries()) { if (severityOrder[finding.category] < severityOrder[existingFinding.category]) { insertionIndex = index; break; } } orderedFindings.splice(insertionIndex, 0, finding); } lines.push(`Detected ${orderedFindings.length} potential issue(s):`, ''); for (const [index, finding] of orderedFindings.entries()) { lines.push(`${index + 1}. [${categoryLabel[finding.category]}] ${finding.title}`, ` Source: ${finding.source}`); if (finding.evidence.length > 0) { lines.push(' Evidence:'); for (const evidenceLine of finding.evidence) { lines.push(` - ${evidenceLine}`); } } lines.push(''); } return lines.join('\n'); } }