UNPKG

@hashgraph/solo

Version:

An opinionated CLI tool to deploy and manage private Hedera Networks.

470 lines 23.4 kB
// SPDX-License-Identifier: Apache-2.0 import { Metrics, V1Container, V1ExecAction, V1ObjectMeta, V1Pod, V1PodSpec, V1Probe, } from '@kubernetes/client-node'; import { NamespaceName } from '../../../../../types/namespace/namespace-name.js'; import { PodReference } from '../../../resources/pod/pod-reference.js'; import { K8ClientPod } from './k8-client-pod.js'; import { Duration } from '../../../../../core/time/duration.js'; import { K8ClientBase } from '../../k8-client-base.js'; import { SoloError } from '../../../../../core/errors/solo-error.js'; import { MissingArgumentError } from '../../../../../core/errors/missing-argument-error.js'; import * as constants from '../../../../../core/constants.js'; import { container } from 'tsyringe-neo'; import { PodName } from '../../../resources/pod/pod-name.js'; import { InjectTokens } from '../../../../../core/dependency-injection/inject-tokens.js'; import { KubeApiResponse } from '../../../kube-api-response.js'; import { ResourceOperation } from '../../../resources/resource-operation.js'; import { ResourceType } from '../../../resources/resource-type.js'; import yaml from 'yaml'; import { sleep } from '../../../../../core/helpers.js'; /** * Waiting reasons for container states that are non-recoverable (image unavailable in registry). */ const FATAL_WAITING_REASONS = new Set([ 'ImagePullBackOff', 'ErrImagePull', 'InvalidImageName', 'ImageInspectError', 'RegistryUnavailable', ]); /** * Terminated reasons for container states that are non-recoverable (e.g. out-of-memory kill). */ const FATAL_TERMINATED_REASONS = new Set(['OOMKilled']); const FATAL_ERROR_RETRY_THRESHOLD = 3; const NON_RECOVERABLE_IMAGE_PULL_PATTERNS = [ /not found/i, /manifest unknown/i, /pull access denied/i, /requested access to the resource is denied/i, /insufficient_scope/i, /unauthorized/i, /authentication required/i, /invalid reference format/i, ]; /** * Inspect a V1Pod's container statuses for non-recoverable error states and return a descriptive * error message if one is detected, or undefined if no fatal error is present. * * Covered states: * - Waiting: ImagePullBackOff, ErrImagePull, InvalidImageName, ImageInspectError, * RegistryUnavailable (image unavailable in registry) * - Terminated: OOMKilled (container killed due to out-of-memory) */ export function detectFatalContainerError(pod) { const podName = pod.metadata?.name ?? '<unknown>'; const allContainerStatuses = [ ...(pod.status?.initContainerStatuses ?? []), ...(pod.status?.containerStatuses ?? []), ]; for (const containerStatus of allContainerStatuses) { const containerName = containerStatus.name ?? '<unknown>'; const waitingState = containerStatus.state?.waiting; if (waitingState?.reason && FATAL_WAITING_REASONS.has(waitingState.reason)) { if ((waitingState.reason === 'ErrImagePull' || waitingState.reason === 'ImagePullBackOff' || waitingState.reason === 'ImageInspectError') && !isNonRecoverableImagePullError(waitingState.message)) { continue; } const detail = waitingState.message ? `: ${waitingState.message}` : ''; return (`Pod "${podName}" container "${containerName}" is in a non-recoverable state: ` + `${waitingState.reason}${detail}`); } const terminatedState = containerStatus.state?.terminated; if (terminatedState?.reason && FATAL_TERMINATED_REASONS.has(terminatedState.reason)) { return (`Pod "${podName}" container "${containerName}" was terminated due to: ` + `${terminatedState.reason} (exit code ${terminatedState.exitCode ?? 'unknown'})`); } } return undefined; } function isNonRecoverableImagePullError(message) { if (!message) { return false; } return NON_RECOVERABLE_IMAGE_PULL_PATTERNS.some((pattern) => pattern.test(message)); } export class K8ClientPods extends K8ClientBase { kubeClient; kubeConfig; kubectlInstallationDirectory; logger; constructor(kubeClient, kubeConfig, kubectlInstallationDirectory) { super(); this.kubeClient = kubeClient; this.kubeConfig = kubeConfig; this.kubectlInstallationDirectory = kubectlInstallationDirectory; this.logger = container.resolve(InjectTokens.SoloLogger); } readByReference(podReference) { return new K8ClientPod(podReference, this, this.kubeClient, this.kubeConfig, this.kubectlInstallationDirectory); } async read(podReference) { const ns = podReference.namespace; const fieldSelector = `metadata.name=${podReference.name}`; const resp = await this.kubeClient.listNamespacedPod({ namespace: ns.name, fieldSelector, timeoutSeconds: Duration.ofMinutes(5).toMillis(), }); return K8ClientPod.fromV1Pod(this.filterItem(resp.items, { name: podReference.name.toString() }), this, this.kubeClient, this.kubeConfig, this.kubectlInstallationDirectory); } async list(namespace, labels) { const labelSelector = labels ? labels.join(',') : undefined; const result = await this.kubeClient.listNamespacedPod({ namespace: namespace.name, labelSelector, timeoutSeconds: Duration.ofMinutes(5).toMillis(), }); const sortedItems = result?.items ? // eslint-disable-next-line unicorn/no-array-sort [...result.items].sort((a, b) => new Date(b.metadata?.creationTimestamp || 0).getTime() - new Date(a.metadata?.creationTimestamp || 0).getTime()) : []; return sortedItems.map((item) => K8ClientPod.fromV1Pod(item, this, this.kubeClient, this.kubeConfig, this.kubectlInstallationDirectory)); } async waitForReadyStatus(namespace, labels, maxAttempts = 10, delay = 500, createdAfter, excludeMarkedForDeletion = false) { const podReadyCondition = new Map().set(constants.POD_CONDITION_READY, constants.POD_CONDITION_STATUS_TRUE); try { return await this.waitForPodConditions(namespace, podReadyCondition, labels, maxAttempts, delay, createdAfter, excludeMarkedForDeletion); } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); this.logger.showUser(`Pod readiness check failed: ${errorMessage}`); throw new SoloError(`Pod with labels [${labels.join(', ')}] not ready [maxAttempts = ${maxAttempts}]`, error); } } /** * Wait until the pod identified by `podReference` appears in the Kubernetes API. * * Use this when the exact pod name is known. If the pod must be discovered by labels, * use {@link waitForReadyStatus} with an appropriate label selector instead. * * @param podReference - exact reference of the pod to wait for * @param maxAttempts - maximum polling attempts before throwing (default 20 × 3 s = 60 s) * @param delay - milliseconds between attempts (default 3000) */ async waitForPodByReference(podReference, maxAttempts = 20, delay = 3000) { const podName = podReference.name.toString(); for (let attempt = 1; attempt <= maxAttempts; attempt++) { const pod = await this.read(podReference); if (pod) { return; } this.logger.debug(`waitForPodByReference: pod ${podName} not yet visible in API, attempt ${attempt}/${maxAttempts}`); await sleep(Duration.ofMillis(delay)); } throw new SoloError(`Pod ${podName} not found after ${maxAttempts} attempts`); } /** * Check pods for conditions * @param namespace - namespace * @param conditionsMap - a map of conditions and values * @param [labels] - pod labels * @param [maxAttempts] - maximum attempts to check * @param [delay] - delay between checks in milliseconds * @param [createdAfter] - if provided, only pods created strictly after this date are considered * @param [excludeMarkedForDeletion] - if true, pods with deletionTimestamp are ignored */ async waitForPodConditions(namespace, conditionsMap, labels = [], maxAttempts = 10, delay = 500, createdAfter, excludeMarkedForDeletion = false) { if (!conditionsMap || conditionsMap.size === 0) { throw new MissingArgumentError('pod conditions are required'); } return await this.waitForRunningPhase(namespace, labels, maxAttempts, delay, (pod) => { if (pod.conditions?.length > 0) { for (const cond of pod.conditions) { for (const entry of conditionsMap.entries()) { const condType = entry[0]; const condStatus = entry[1]; if (cond.type === condType && cond.status === condStatus) { this.logger.info(`Pod condition met for ${pod.podReference.name.name} [type: ${cond.type} status: ${cond.status}]`); return true; } } } } // condition not found return false; }, createdAfter, excludeMarkedForDeletion); } async waitForRunningPhase(namespace, labels, maxAttempts, delay, podItemPredicate, createdAfter, excludeMarkedForDeletion = false) { const phases = [constants.POD_PHASE_RUNNING]; const labelSelector = labels ? labels.join(',') : undefined; this.logger.info(`waitForRunningPhase [labelSelector: ${labelSelector}, namespace:${namespace.name}, maxAttempts: ${maxAttempts}]`); return new Promise((resolve, reject) => { let attempts = 0; const fatalErrorStreakByPod = new Map(); const check = async (resolve, reject) => { // wait for the pod to be available with the given status and labels try { const response = await this.kubeClient.listNamespacedPod({ namespace: namespace.name, labelSelector, timeoutSeconds: Duration.ofMinutes(5).toMillis(), }); this.logger.debug(`[attempt: ${attempts}/${maxAttempts}] ${response.items?.length} pod(s) found [labelSelector: ${labelSelector}, namespace:${namespace.name}]`); if (response.items?.length > 0) { // Sort pods by creation timestamp descending (newest first) // eslint-disable-next-line unicorn/no-array-sort const sortedItems = [...response.items].sort((a, b) => { const aTime = a.metadata?.creationTimestamp?.getTime() || 0; const bTime = b.metadata?.creationTimestamp?.getTime() || 0; return bTime - aTime; }); // When a createdAfter cutoff is provided, skip pods that existed before the // cutoff (e.g. a terminating predecessor from a recreate migration). const createdAfterEligibleItems = createdAfter ? sortedItems.filter((pod) => (pod.metadata?.creationTimestamp?.getTime() || 0) > createdAfter.getTime()) : sortedItems; const eligibleItems = excludeMarkedForDeletion ? createdAfterEligibleItems.filter((pod) => !pod.metadata?.deletionTimestamp) : createdAfterEligibleItems; // Allow transient startup states to recover; only fail after repeated fatal detections. for (const item of eligibleItems) { const fatalError = detectFatalContainerError(item); const podName = item.metadata?.name ?? '<unknown>'; if (fatalError) { const previous = fatalErrorStreakByPod.get(podName); const nextCount = previous?.error === fatalError ? previous.count + 1 : 1; fatalErrorStreakByPod.set(podName, { count: nextCount, error: fatalError }); if (nextCount >= FATAL_ERROR_RETRY_THRESHOLD) { return reject(new SoloError(fatalError)); } this.logger.info(`Detected fatal pod state for "${podName}" (${nextCount}/${FATAL_ERROR_RETRY_THRESHOLD}); retrying`); } else { fatalErrorStreakByPod.delete(podName); } } if (eligibleItems.length > 0) { // Only check the newest eligible pod const newestItem = eligibleItems[0]; const pod = K8ClientPod.fromV1Pod(newestItem, this, this.kubeClient, this.kubeConfig, this.kubectlInstallationDirectory); if (phases.includes(newestItem.status?.phase) && (!podItemPredicate || podItemPredicate(pod))) { return resolve([pod]); } } } } catch (error) { this.logger.info('Error occurred while waiting for pods, retrying', error); } if (++attempts < maxAttempts) { setTimeout(() => check(resolve, reject), delay); } else { return reject(new SoloError(`Expected at least 1 pod not found for labels: ${labelSelector}, phases: ${phases.join(',')} [attempts = ${attempts}/${maxAttempts}]`)); } }; check(resolve, reject); }); } async listForAllNamespaces(labels) { const labelSelector = labels ? labels.join(',') : undefined; const pods = []; try { const response = await this.kubeClient.listPodForAllNamespaces({ labelSelector }); if (response?.items?.length > 0) { for (const item of response.items) { pods.push(new K8ClientPod(PodReference.of(NamespaceName.of(item.metadata?.namespace), PodName.of(item.metadata?.name)), this, this.kubeClient, this.kubeConfig, this.kubectlInstallationDirectory)); } } } catch (error) { KubeApiResponse.throwError(error, ResourceOperation.LIST, ResourceType.POD, undefined, ''); } return pods; } async create(podReference, labels, containerName, containerImage, containerCommand, startupProbeCommand) { const v1Metadata = new V1ObjectMeta(); v1Metadata.name = podReference.name.toString(); v1Metadata.namespace = podReference.namespace.toString(); v1Metadata.labels = labels; const v1ExecAction = new V1ExecAction(); v1ExecAction.command = startupProbeCommand; const v1Probe = new V1Probe(); v1Probe.exec = v1ExecAction; const v1Container = new V1Container(); v1Container.name = containerName.name; v1Container.image = containerImage; v1Container.command = containerCommand; v1Container.startupProbe = v1Probe; const v1Spec = new V1PodSpec(); v1Spec.containers = [v1Container]; const v1Pod = new V1Pod(); v1Pod.metadata = v1Metadata; v1Pod.spec = v1Spec; let result; try { result = await this.kubeClient.createNamespacedPod({ namespace: podReference.namespace.toString(), body: v1Pod }); } catch (error) { if (error instanceof SoloError) { throw error; } KubeApiResponse.throwError(error, ResourceOperation.CREATE, ResourceType.POD, podReference.namespace, podReference.name.toString()); } if (result) { return new K8ClientPod(podReference, this, this.kubeClient, this.kubeConfig, this.kubectlInstallationDirectory); } else { throw new SoloError('Error creating pod', result); } } async delete(podReference) { try { await this.kubeClient.deleteNamespacedPod({ namespace: podReference.namespace.toString(), name: podReference.name.toString(), }); } catch (error) { KubeApiResponse.throwError(error, ResourceOperation.DELETE, ResourceType.POD, podReference.namespace, podReference.name.toString()); } } async readLogs(podReference, timestamps = true) { const namespace = podReference.namespace.toString(); const name = podReference.name.toString(); const pod = await this.kubeClient.readNamespacedPod({ name, namespace }); const containerNames = [ ...(pod.spec?.initContainers?.map((container) => container.name) ?? []), ...(pod.spec?.containers?.map((container) => container.name) ?? []), ...(pod.spec?.ephemeralContainers?.map((container) => container.name) ?? []), ].filter(Boolean); if (containerNames.length === 0) { const log = await this.kubeClient.readNamespacedPodLog({ name, namespace, timestamps, }); return log ?? ''; } const containerLogs = []; for (const containerName of containerNames) { try { const containerLog = await this.kubeClient.readNamespacedPodLog({ name, namespace, container: containerName, timestamps, }); containerLogs.push(`===== Container: ${containerName} =====\n${containerLog ?? ''}`.trimEnd()); } catch (error) { containerLogs.push(`===== Container: ${containerName} =====\nFailed to read logs: ${error instanceof Error ? error.message : String(error)}`); } } return containerLogs.join('\n\n'); } async readDescribe(podReference) { const namespace = podReference.namespace.toString(); const name = podReference.name.toString(); const pod = await this.kubeClient.readNamespacedPod({ name, namespace }); const events = await this.kubeClient.listNamespacedEvent({ namespace, fieldSelector: `involvedObject.name=${name},involvedObject.namespace=${namespace}`, }); // eslint-disable-next-line unicorn/no-array-sort const sortedEvents = [...(events?.items ?? [])].sort((left, right) => { const leftTime = new Date(left.lastTimestamp ?? left.eventTime ?? left.firstTimestamp ?? left.metadata?.creationTimestamp ?? 0).getTime(); const rightTime = new Date(right.lastTimestamp ?? right.eventTime ?? right.firstTimestamp ?? right.metadata?.creationTimestamp ?? 0).getTime(); return leftTime - rightTime; }); const describeData = { pod, events: sortedEvents, }; return yaml.stringify(describeData); } async topPods(namespace, labelSelector) { const metrics = new Metrics(this.kubeConfig); const podMetricsList = await metrics.getPodMetrics(namespace?.name); let allowedPodKeys; if (labelSelector) { const podList = namespace ? await this.kubeClient.listNamespacedPod({ namespace: namespace.name, labelSelector, timeoutSeconds: Duration.ofMinutes(5).toMillis(), }) : await this.kubeClient.listPodForAllNamespaces({ labelSelector }); allowedPodKeys = new Set(podList.items.map((p) => `${p.metadata?.namespace ?? ''}/${p.metadata?.name ?? ''}`)); } return podMetricsList.items .filter((podMetric) => { if (!allowedPodKeys) { return true; } return allowedPodKeys.has(`${podMetric.metadata.namespace}/${podMetric.metadata.name}`); }) .map((podMetric) => { let cpuInMillicores = 0; let memoryInMebibytes = 0; for (const c of podMetric.containers) { cpuInMillicores += K8ClientPods.parseMillicores(c.usage.cpu); memoryInMebibytes += K8ClientPods.parseMebibytes(c.usage.memory); } return { namespace: NamespaceName.of(podMetric.metadata.namespace), podName: PodName.of(podMetric.metadata.name), cpuInMillicores, memoryInMebibytes, }; }); } /** * Parse a Kubernetes CPU quantity string into millicores. * Examples: "100m" -> 100, "1" -> 1000, "0.5" -> 500, "100000n" -> 0 (rounded) */ static parseMillicores(quantity) { if (!quantity) { return 0; } if (quantity.endsWith('n')) { return Math.round(Number.parseInt(quantity.slice(0, -1), 10) / 1_000_000); } if (quantity.endsWith('u')) { return Math.round(Number.parseInt(quantity.slice(0, -1), 10) / 1000); } if (quantity.endsWith('m')) { return Number.parseInt(quantity.slice(0, -1), 10); } return Math.round(Number.parseFloat(quantity) * 1000); } /** * Parse a Kubernetes memory quantity string into mebibytes (MiB). * Examples: "50Mi" -> 50, "1Gi" -> 1024, "52428800" -> 50, "512Ki" -> 0 (rounded) */ static parseMebibytes(quantity) { if (!quantity) { return 0; } if (quantity.endsWith('Ki')) { return Math.round(Number.parseInt(quantity.slice(0, -2), 10) / 1024); } if (quantity.endsWith('Mi')) { return Number.parseInt(quantity.slice(0, -2), 10); } if (quantity.endsWith('Gi')) { return Number.parseInt(quantity.slice(0, -2), 10) * 1024; } if (quantity.endsWith('Ti')) { return Number.parseInt(quantity.slice(0, -2), 10) * 1024 * 1024; } if (quantity.endsWith('Pi')) { return Number.parseInt(quantity.slice(0, -2), 10) * 1024 * 1024 * 1024; } if (quantity.endsWith('k')) { return Math.round((Number.parseInt(quantity.slice(0, -1), 10) * 1000) / (1024 * 1024)); } if (quantity.endsWith('M')) { return Math.round((Number.parseInt(quantity.slice(0, -1), 10) * 1_000_000) / (1024 * 1024)); } if (quantity.endsWith('G')) { return Math.round((Number.parseInt(quantity.slice(0, -1), 10) * 1_000_000_000) / (1024 * 1024)); } // Plain number (bytes) return Math.round(Number.parseFloat(quantity) / (1024 * 1024)); } } //# sourceMappingURL=k8-client-pods.js.map