newrelic
Version:
New Relic agent
261 lines (229 loc) • 8.35 kB
JavaScript
/*
* Copyright 2024 New Relic Corporation. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/
'use strict'
const fs = require('node:fs')
const crypto = require('node:crypto')
const path = require('node:path')
const { fileURLToPath } = require('node:url')
const defaultLogger = require('./logger').child({ component: 'HealthReporter' })
const VALID_CODES = new Map([
['NR-APM-000', 'Healthy.'],
['NR-APM-001', 'Invalid license key.'],
['NR-APM-002', 'License key missing.'],
['NR-APM-003', 'Forced disconnect received from New Relic.'],
['NR-APM-004', 'HTTP error communicating with New Relic.'],
['NR-APM-005', 'Missing application name in agent configuration.'],
['NR-APM-006', 'The maximum number of configured app names is exceeded.'],
['NR-APM-007', 'HTTP proxy is misconfigured.'],
['NR-APM-008', 'Agent is disabled via configuration.'],
['NR-APM-009', 'Failed to connect to the New Relic data collector.'],
['NR-APM-010', 'Agent config could not be parsed.'],
['NR-APM-099', 'Agent has shutdown.'],
// Codes 300 through 399 are reserved for the Node.js Agent.
['NR-APM-300', 'An unexpected error occurred.']
])
function getTime() {
// `process.hrtime.bigint` does not return a value relative to the epoch.
// So we have to perform this lossy calculation because the spec is
// insisting on nanoseconds.
return Date.now() * 1_000_000
}
function writeStatus({ file, healthy = true, code, msg, startTime, callback } = {}) {
const currentTime = getTime()
const yaml = [
`healthy: ${healthy}`,
`status: '${msg}'`,
`last_error: ${code}`,
`start_time_unix_nano: ${startTime}`,
`status_time_unix_nano: ${currentTime}`
].join('\n')
fs.writeFile(file, yaml, { encoding: 'utf8' }, callback)
}
function directoryAvailable(dest) {
try {
fs.accessSync(dest, fs.constants.R_OK | fs.constants.W_OK)
return { available: true }
} catch (error) {
return { available: false, error }
}
}
/**
* HealthReporter implements the "super agent" (New Relic Control) health
* check spec. An instance of the reporter will continually write out the
* current status, as set by `reporter.setStatus`, on the interval defined
* by the environment.
*/
class HealthReporter {
#enabled = false
#status = HealthReporter.STATUS_HEALTHY
#interval
#destFile
#logger
#startTime
static STATUS_HEALTHY = 'NR-APM-000'
static STATUS_INVALID_LICENSE_KEY = 'NR-APM-001'
static STATUS_LICENSE_KEY_MISSING = 'NR-APM-002'
static STATUS_FORCED_DISCONNECT = 'NR-APM-003'
static STATUS_BACKEND_ERROR = 'NR-APM-004'
static STATUS_MISSING_APP_NAME = 'NR-APM-005'
static STATUS_MAXIMUM_APP_NAMES_EXCEEDED = 'NR-APM-006'
static STATUS_HTTP_PROXY_MISCONFIGURED = 'NR-APM-007'
static STATUS_AGENT_DISABLED = 'NR-APM-008'
static STATUS_CONNECT_ERROR = 'NR-APM-009'
static STATUS_CONFIG_PARSE_FAILURE = 'NR-APM-010'
static STATUS_AGENT_SHUTDOWN = 'NR-APM-099'
// STATUS_INTERNAL errors are the Node.js Agent specific error codes.
static STATUS_INTERNAL_UNEXPECTED_ERROR = 'NR-APM-300'
/**
* @typedef {object} AgentControlConfig
* @property {boolean} [enabled=false] Whether or not the agent control
* feature should be enabled.
* @property {object} health Configuration for the health reporting component
* of agent control.
* @property {string} health.outDir Path to the directory where status files
* will be written. May be a file URI.
* @property {number} health.frequency The time, in seconds, that will be
* used as the update interval for writing out health status.
*/
/**
* Build a new health reporter instance.
*
* Important: the shape of `agentConfig` will not be validated. It is expected
* that this module is used in the context of the agent. Therefore, a
* properly shaped configuration object should always be available and passed
* in.
*
* @param {object} [params] Construction parameters.
* @param {object} params.agentConfig A standard `newrelic` configuration
* object that has an `agent_control` property which is an instance of
* {@link AgentControlConfig}.
* @param {object} [params.logger] A standard logger instance.
* @param {Function} [params.setInterval] A function to use as `setInterval`.
* Must return an interval object that supports the `unref()` method.
*/
constructor({
agentConfig,
logger = defaultLogger,
setInterval = global.setInterval
} = {}) {
const enabled = agentConfig?.agent_control?.enabled
const checkInterval = parseInt(agentConfig?.agent_control?.health?.frequency, 10) * 1_000
let outDir = agentConfig?.agent_control?.health?.delivery_location
this.#logger = logger
if (enabled !== true) {
this.#logger.info('new relic agent control disabled, skipping health reporting')
return
}
if (outDir.includes('://') === true) {
outDir = fileURLToPath(outDir)
}
const dirCheck = directoryAvailable(outDir)
if (dirCheck.available === false) {
this.#logger.error('health check output directory not accessible, skipping health reporting', { error: dirCheck.error })
return
}
this.#startTime = getTime()
const uuid = crypto.randomUUID().replaceAll('-', '')
this.#destFile = path.join(outDir, `health-${uuid}.yaml`)
this.#logger.info(
`new relic agent control is present, writing health on interval ${checkInterval} milliseconds to ${
this.#destFile
}`
)
this.#interval = setInterval(this.#healthCheck.bind(this), checkInterval)
this.#interval.unref()
this.#enabled = true
this.#logger.info('health reporter initialized')
}
#healthCheck() {
const healthy = this.#status === HealthReporter.STATUS_HEALTHY
writeStatus({
file: this.#destFile,
healthy,
startTime: this.#startTime,
code: this.#status,
msg: VALID_CODES.get(this.#status),
callback: (error) => {
if (error) {
this.#logger.error(`error when writing out health status: ${error.message}`)
}
}
})
}
get enabled() {
return this.#enabled
}
get destFile() {
return this.#destFile
}
/**
* Update the known health status. This status will be written to the health
* file on the next interval. If the provided status is not a recognized
* status, a log will be written and the status will not be updated.
*
* @param {string} status Utilize one of the static status fields.
*/
setStatus(status) {
if (this.#enabled === false) {
return
}
if (VALID_CODES.has(status) === false) {
this.#logger.warn(`invalid health reporter status provided: ${status}`)
return
}
if (
status === HealthReporter.STATUS_AGENT_SHUTDOWN &&
this.#status !== HealthReporter.STATUS_HEALTHY
) {
this.#logger.info(
`not setting shutdown health status due to current status code: ${this.#status}`
)
return
}
this.#status = status
}
/**
* This should be invoked on agent shutdown after setting the status
* to the shutdown status. It will stop the ongoing update interval,
* initiate an immediate write of the status file, and then invoke the
* provided callback.
*
* @param {Function} done Callback to be invoked after the status file has
* been updated.
*/
stop(done) {
if (this.#enabled === false) {
done && done()
return
}
clearInterval(this.#interval)
const healthy = this.#status === HealthReporter.STATUS_HEALTHY
let code = this.#status
let msg = VALID_CODES.get(code)
if (healthy === true) {
// We only update the status on shutdown when the last known state is
// the healthy state. Otherwise, we need to leave the current code in
// place, and just update the report time.
code = HealthReporter.STATUS_AGENT_SHUTDOWN
msg = VALID_CODES.get(code)
}
writeStatus({
file: this.#destFile,
startTime: this.#startTime,
healthy,
code,
msg,
callback: (error) => {
if (error) {
this.#logger.error(
`error when writing out health status during shutdown: ${error.message}`
)
}
done && done()
}
})
}
}
module.exports = HealthReporter