UNPKG

kubernetes-monitoring-collector

Version:

Kubernetes monitoring: Collect data, logs and statistics and forward to a monitoring server.

585 lines (518 loc) 17.6 kB
/* Copyright (c) 2021 Lean Cloud Services GmbH This work is licensed under Creative Commons Attribution-NoDerivatives 4.0 International License. http://creativecommons.org/licenses/by-nd/4.0/ */ const cfg = require( 'config' ) const log = require( 'npmlog' ) const k8s = require( '@kubernetes/client-node' ) const stream = require( 'stream' ) const aStat = require( './dta-acc-stats' ) exports: module.exports = { init, setCfg, getDta, pushLogs, getErrState } const kindMap ={ ReplicaSet : 'r', DaemonSet : 'd', StatefulSet : 's', Job : 'j' } let k8sApi = null let k8sApps = null let k8sJobs = null let k8sJobB = null let k8sNetw = null let k8sLogs = null let k8sMetrics = null let collCfg = [] let plan = null // collector may get config not send out logs (compliance...) let collectLogs = true let collectAllLogs = false let dtaSender = null //----------------------------------------------------------------------------- async function init( sender ) { try { dtaSender = sender aStat.init( sender ) collectAllLogs = cfg.LOG_ALL_PODS const kc = new k8s.KubeConfig() if ( process.env.KUBERNETES_SERVICE_HOST ) { log.info( 'KubeConfig loadFromCluster...' ) kc.loadFromCluster() } else if ( cfg.CTX ) { log.info( 'KubeConfig from cfg.CTX' ) kc.loadFromOptions( cfg.CTX ) } else if ( process.env.CTX_SERVER ) { log.info( 'KubeConfig from env, server=', process.env.CTX_SERVER ) kc.loadFromDefault() kc.clusters[0].server = process.env.CTX_SERVER } else{ log.info( 'KubeConfig loadFromDefault...' ) kc.loadFromDefault() } log.verbose( 'kc', kc ) k8sApi = kc.makeApiClient( k8s.CoreV1Api ) k8sApps = kc.makeApiClient( k8s.AppsV1Api ) k8sJobs = kc.makeApiClient( k8s.BatchV1Api ) k8sJobB = kc.makeApiClient( k8s.BatchV1beta1Api ) k8sNetw = kc.makeApiClient( k8s.NetworkingV1Api ) k8sMetrics = new k8s.Metrics( kc ) k8sLogs = new k8s.Log(kc); setInterval( reSubscribeLogs, 1000 * 60 * cfg.LOG_RENEW_STREAM_MIN ) } catch ( exc ) { console.error( exc ) log.error( exc ) process.exit( 1 ) } } //----------------------------------------------------------------------------- function setCfg( collectorCfg ) { log.verbose( 'setCfg', collectorCfg ) if ( ! collectorCfg ) { return } if ( collectorCfg.restart === true ) { log.info( 'Restart requested by Monitoring Central' ) log.info( 'Reason: ', collectorCfg.restartReason ) process.exit( 0 ) } if ( collectorCfg.plan ) { plan = collectorCfg.plan } if ( collectorCfg.ms) { collCfg = collectorCfg.ms } if ( collectorCfg.collectLogs) { if ( collectLogs != collectorCfg.collectLogs ) { collectLogs = collectorCfg.collectLogs reSubscribeLogs() } } if ( collectorCfg.collectAllLogs ) { if ( collectLogs != collectorCfg.collectLogs ) { collectLogs = collectorCfg.collectLogs } } aStat.setCfg( collectorCfg ) } //----------------------------------------------------------------------------- let errorState = false function getErrState() { let result = errorState errorState = false return result } //----------------------------------------------------------------------------- // https://nodejs.org/api/stream.html#readabledestroyerror let logStreamMap = {} let podLogs = [] let pushing = false async function pushLogs() { if ( pushing ) { return } pushing = true let cnt = podLogs.length log.verbose( 'push logs', cnt ) if ( cnt > cfg.LOG_SND_MAX_CNT ) { cnt = cfg.LOG_SND_MAX_CNT } if ( cnt > 0 ) try { let logs = {} while ( cnt != 0 ) { let l = podLogs.shift() if ( l ) try { // prevent problems if pushLogs() runs multiple times let cid = l.ns + l.po if ( ! logs[ cid ] ) { logs[ cid ] = { ns : l.ns, ms : l.ms, pod : l.po, logs : [] } } logs[ cid ].logs.push({ ts: l.dt, log: l.log }) } catch ( exc ) { log.warn( 'pushLogs', l.ns, l.ms, l.po, l.c, exc.message ) } cnt -- } // send out the logs await dtaSender.sendLogs( logs ) } catch ( err ) { log.warn( 'pushLogs', err.message ) } pushing = false } function reSubscribeLogs() { //log.info( 'reSubscribeLogs <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<') for ( let streamId in logStreamMap ) { let oldStream = logStreamMap[ streamId ].logStream log.verbose( 'subscribePodLogs, destroy old stream', streamId ) oldStream.destroy() logStreamMap[ streamId ].logStream = null if ( collectLogs ) { subscribeContainerLogs( logStreamMap[ streamId ].ns, logStreamMap[ streamId ].ms, logStreamMap[ streamId ].pod, logStreamMap[ streamId ].container ) } } } async function subscribePodLogs( ns, ms, podName, pod ) { log.verbose( 'subscribePodLogs', ns, ms, podName ) if ( collectLogs ) { for ( let containerName in pod.c ) { subscribeContainerLogs( ns, ms, podName, containerName ) } } } async function subscribeContainerLogs( ns, ms, podName, containerName ) { try { let streamId =ns+'/'+podName+'/'+containerName let tailLines = 50 if ( ! logStreamMap[ streamId ] ) { // first time log.info( 'subscribeContainerLogs initial', streamId ) logStreamMap[ streamId ] = { ns : ns, ms : ms, pod : podName, container : containerName, logStream : null } } else { if ( logStreamMap[ streamId ].logStream ) { return // nothing to do } else { // strea was destroyed to resubscribe log.verbose( 'subscribeContainerLogs resubscribe', streamId ) tailLines = 0 } } const logStream = new stream.PassThrough(); logStream.on( 'data', async (chunk) => { let logStr = chunk + '' podLogs.push({ dt : Date.now(), ns : ns, ms : ms, po : podName, c : containerName, log : logStr }) aStat.extractStats( ns, ms, logStr ) if ( podLogs.length >= cfg.LOG_SND_MAX_CNT ) { await pushLogs() } // log.info( 'Log...', podName, containerName, chunk+'' ) }) logStreamMap[ streamId ].logStream = logStream k8sLogs.log( ns, podName, containerName, logStream, { follow: true, tailLines: tailLines, pretty: false, timestamps: false } ) .catch( err => { log.error( 'k8sLogs', ns, podName, containerName, err.message ) } ) .then( req => {} ) } catch ( exc ) { log.warn( 'getLogs' , exc.message ) } } //----------------------------------------------------------------------------- //https://kubernetes-client.github.io/javascript/modules.html async function getDta() { log.verbose( 'gather data ...' ) let cluster = {} try { cluster.node = await getNode() cluster.namespace = await getNamespaceArr() for ( let ns in cluster.namespace ) { let pods = await getPods( ns, cluster.node ) for ( let p in pods ) { let pod = pods[ p ] cluster.namespace[ ns ][ p ] = pod } } log.verbose( 'cluster', cluster.node ) log.verbose( 'cluster', cluster ) await aStat.sendStats() } catch ( exc ) { log.error( 'getDta', exc.message ) errorState = true return null } return cluster } //----------------------------------------------------------------------------- // https://kubernetes-client.github.io/javascript/classes/corev1api.corev1api-1.html async function getNamespaceArr() { let nsMap = {} let ns = await k8sApi.listNamespace() if ( ns.body && ns.body.items ) { for ( let aNS of ns.body.items ) { nsMap[ aNS.metadata.name ] = {} } } return nsMap } //----------------------------------------------------------------------------- async function loadMS( ns ) { let obj = { 'ReplicaSet': {}, 'DaemonSet': {}, 'StatefulSet':{}, 'Job':{}, 'MinionIngress':{}, 'Ingress':{}, '_ing': {}, '_issue' : {} } try { let lst = await k8sNetw.listNamespacedIngress( ns ) for ( let d of lst.body.items ) { log.verbose( d.metadata.name, JSON.stringify( d, null, ' ' ) ) obj[ '_ing' ][ d.metadata.name ] = { a : d.metadata.annotations, r : d.spec.rules } } } catch ( e ) { log.warn( 'list Ingress', ns, e.message ); errorState = true } try { let lst = await k8sApps.listNamespacedDeployment( ns ) for ( let d of lst.body.items ) { // log.info( d.metadata.name, d.spec.selector ) obj['ReplicaSet'][ d.metadata.name ] = d.spec.selector chkStatus( obj, d, 'Deployment' ) } } catch ( e ) { log.warn( 'list Deployment', ns, e.message ); errorState = true } try { let lst = await k8sApps.listNamespacedDaemonSet( ns ) for ( let d of lst.body.items ) { log.verbose( d.metadata.name, d.spec.selector ) obj['DaemonSet'][ d.metadata.name ] = d.spec.selector chkStatus( obj, d, 'DaemonSet' ) } } catch ( e ) { log.warn( 'list DaemonSet', ns, e.message ); errorState = true } try { let lst = await k8sApps.listNamespacedStatefulSet( ns ) for ( let d of lst.body.items ) { log.verbose( d.metadata.name, d.spec.selector ) obj['StatefulSet'][ d.metadata.name ] = d.spec.selector chkStatus( obj, d, 'StatefulSet' ) } } catch ( e ) { log.warn( 'list StatefulSet', ns, e.message ); errorState = true } try { let lst = await k8sJobs.listNamespacedCronJob( ns ) for ( let d of lst.body.items ) { obj['Job'][ d.metadata.name ] = {} chkStatus( obj, d, 'CronJob' ) } } catch ( e ) { log.verbose( 'list CronJob', ns, e.message ); } try { let lst = await k8sJobB.listNamespacedCronJob( ns ) for ( let d of lst.body.items ) { // log.info( d.metadata.name, d.spec ) obj['Job'][ d.metadata.name ] = {} chkStatus( obj, d, 'CronJob' ) } } catch ( e ) { log.warn( 'list CronJob.b', ns, e.message ); errorState = true } return obj } function chkStatus( obj, d, type ) { let st = d.status if ( st && st.lastScheduleTime ) { // this is a CronJob } else if ( st && st.replicas ) { // deployment, StatefulSet if ( st.readyReplicas != st.replicas ) { obj['_issue'][ d.metadata.name ] = st obj['_issue'][ d.metadata.name ].type = type } } else if ( st && st.desiredNumberScheduled ) { // DaemonSet if ( st.numberReady != st.desiredNumberScheduled ) { obj['_issue'][ d.metadata.name ] = st obj['_issue'][ d.metadata.name ].type = type } } } //----------------------------------------------------------------------------- function getMsName( aPod, obj ) { try { if ( aPod.labels && aPod.labels['app.kubernetes.io/name'] ) { // https://kubernetes.io/docs/concepts/overview/working-with-objects/common-labels/ return { msName : aPod.labels['app.kubernetes.io/name'] } } let mgr = obj[ aPod.metadata.ownerReferences[0].kind ] let labels = aPod.metadata.labels log.verbose( aPod.metadata.name, labels ) if ( aPod.metadata.ownerReferences[0].kind == 'Job' ) { for ( let x in mgr ) { if ( aPod.metadata.name.indexOf( x ) == 0 ) { return { msName : x } } } } else if ( mgr ) { for ( let x in mgr ) { let sel = mgr[ x ].matchLabels let match = true for ( let s in sel ) { if ( labels[ s ] && labels[ s ] == sel[ s ] ) { // ok } else { match = false } } if ( match ) { return { msName : x } } } } } catch ( exc ) { log.warn( 'getMsName', exc.message) } if ( aPod.labels && aPod.labels.app ) { return { msName : aPod.labels.app } } return { msName : aPod.metadata.name } // better than nothing } function getMemMB( mem ) { let memStr = mem +'' let memMB = Number.parseInt( memStr.substring( 0, memStr.length - 6 ), 10 ) if ( isNaN( memMB ) ) { memMB = 0 } return memMB } async function getPodMetrics( ns ) { let podMetrics = {} if ( ! process.env.SKIP_METRICS ) try { let topPods = await k8s.topPods( k8sApi, k8sMetrics, ns ) for ( let pod of topPods ) try { // if ( pod.Memory.LimitTotal ) // log.info( pod.Pod.metadata.name, pod.Memory.CurrentUsage, getMemMB( pod.Memory.CurrentUsage ),pod.Memory.LimitTotal, getMemMB( pod.Memory.LimitTotal ) ) podMetrics[ pod.Pod.metadata.name ] = { cpu : pod.CPU.CurrentUsage / 10, // TODO:investigate why we need that ?? cpuL : pod.CPU.LimitTotal, mem : getMemMB( pod.Memory.CurrentUsage ), memL : getMemMB( pod.Memory.LimitTotal ) } } catch ( exc) { log.warn( 'getPodMetrics', ns, pod.Pod.metadata.name, exc.message ) } } catch ( e ) { log.warn( 'getPodMetrics', ns, e.message ) errorState = true } return podMetrics } //----------------------------------------------------------------------------- async function getPods( ns, nodes ) { let pods = {} let po = await k8sApi.listNamespacedPod( ns ) let obj = await loadMS( ns ) pods[ '_ing' ] = obj[ '_ing' ] pods[ '_issue' ] = obj[ '_issue' ] let podMetrics = await getPodMetrics( ns ) if ( po.body && po.body.items ) { for ( let aPod of po.body.items ) { try { // log.info( aPod.metadata.name, aPod.metadata.ownerReferences[0].kind ) let svc = getMsName( aPod, obj ) let msName = svc.msName let podName = aPod.metadata.name let kind = 'r' try { kind = aPod.metadata.ownerReferences[0].kind } catch (e) { log.verbose( 'getPods', podName, e.message, aPod.metadata )} //log.info( podName, svc ) let pod = { } if ( needCollectLogs( ns, msName ) ) { // log.info( 'collCfg', ns+'/'+msName, collCfg ) pod = getPodWithAllDetails( pod, aPod ) if ( podMetrics[ podName ] ) { pod.cpu = podMetrics[ podName ].cpu pod.mem = podMetrics[ podName ].mem pod.cpuL = podMetrics[ podName ].cpuL pod.memL = podMetrics[ podName ].memL } subscribePodLogs( ns, msName, podName, pod ) } if ( podMetrics[ podName ] ) { // log.info( aPod.spec.nodeName+'<'+podName, nodes[ aPod.spec.nodeName ].cpu, podMetrics[ podName ].cpu ) nodes[ aPod.spec.nodeName ].cpu += podMetrics[ podName ].cpu nodes[ aPod.spec.nodeName ].mem += podMetrics[ podName ].mem // log.info( aPod.spec.nodeName+'<'+podName, nodes[ aPod.spec.nodeName ].cpu ) } // log.info( aPod.spec.nodeName, podMetrics[ podName ].cpu, podMetrics[ podName ].mem) pod.n = nodes[ aPod.spec.nodeName ].no pod.k = ( kindMap[ kind ] ? kindMap[ kind ] : kind ) pod.s = aPod.status.phase if ( ! pods[ msName ] ) { pods[ msName ] = {} } pods[ msName ][ podName ] = pod } catch ( exc ) { log.warn( 'getPods', exc.message ) errorState = true } } } // log.info( 'pods', pods ) return pods } function needCollectLogs( ns, ms ) { if ( collectAllLogs ) { return true } if ( collCfg.indexOf( ns+'/'+ms ) >= 0 ) { return true } // Pod is in scope !! return false } //----------------------------------------------------------------------------- function getPodWithAllDetails( pod, aPod ) { try { pod = { ct : (new Date( aPod.metadata.creationTimestamp )).getTime(), st : (new Date( aPod.status.startTime )).getTime(), c : {}, ip : aPod.status.podIP, rc : 0, lt : Date.now() } if ( aPod.status.containerStatuses ) { for ( let c of aPod.status.containerStatuses ) { pod.c[ c.name ] = { s : ( c.started ? 'running' :'terminated' ), sr : c.reason, rc : c.restartCount, ci : c.image, lt : Date.now() } if ( c.restartCount > pod.PodRestartCount ) ( pod.rc = c.restartCount ) } } } catch ( e ) { log.warn( 'getPodWithAllDetails', e.message, aPod ) errorState = true } return pod } //----------------------------------------------------------------------------- async function getNode() { let nodeMap = {} let nodeNo = 0 let no = await k8sApi.listNode() if ( no.body && no.body.items ) { for ( let aNode of no.body.items ) { nodeMap[ aNode.metadata.name ] = { no : nodeNo, lastSeen : Date.now() } nodeNo ++ } } try { let top = await k8s.topNodes( k8sApi ) for ( let topNo of top ) { let nodeName = topNo.Node.metadata.name // log.info( 'cpu', topNo.CPU, topNo.Memory ) nodeMap[ nodeName ].cpu = 0 nodeMap[ nodeName ].cpuCapa = topNo.CPU.Capacity nodeMap[ nodeName ].cpuReq = topNo.CPU.RequestTotal nodeMap[ nodeName ].cpuLim = topNo.CPU.LimitTotal nodeMap[ nodeName ].mem = 0 nodeMap[ nodeName ].memCap = getMemMB( topNo.Memory.Capacity ) nodeMap[ nodeName ].memReq = getMemMB( topNo.Memory.RequestTotal ) nodeMap[ nodeName ].memLim = getMemMB( topNo.Memory.LimitTotal ) } } catch ( e ) { log.warn( 'getNode topNodes', e.message ) } log.verbose( 'top', nodeMap ) return nodeMap }