newrelic
Version:
New Relic agent
671 lines (578 loc) • 21.4 kB
JavaScript
/*
* Copyright 2020 New Relic Corporation. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/
'use strict'
const CollectorResponse = require('./response')
const facts = require('./facts')
const logger = require('../logger').child({ component: 'collector_api' })
const RemoteMethod = require('./remote-method')
const HealthReporter = require('../health-reporter')
const NAMES = require('../metrics/names')
const DEFAULT_PORT = 443
// just to make clear what's going on
const TO_MILLIS = 1e3
// taken directly from Python agent's newrelic.core.application
const BACKOFFS = [
{ interval: 15, warn: false },
{ interval: 15, warn: false },
{ interval: 30, warn: false },
{ interval: 60, warn: true },
{ interval: 120, warn: false },
{ interval: 300, warn: false }
]
// Expected collector response codes
const SUCCESS = new Set([200, 202])
const RESTART = new Set([
401, // Authentication failed.
409 // NR says to reconnect for some reason.
])
const FAILURE_SAVE_DATA = new Set([
408, // Data took too long to reach NR.
429, // Too many requests being received by NR, rate limited.
500, // NR server went boom.
503 // NR server is not available.
])
const FAILURE_DISCARD_DATA = new Set([
400, // Format of the request is incorrect.
403, // Not entitled to perform the action.
404, // Sending to wrong destination.
405, // Using the wrong HTTP method (e.g. PUT instead of POST).
407, // Proxy authentication misconfigured.
411, // No Content-Length header provided, or value is incorrect.
413, // Payload is too large.
414, // URI exceeds allowed length.
415, // Content-type or Content-encoding values are incorrect.
417, // NR cannot meet the expectation of the request.
431 // Request headers exceed size limit.
])
const AGENT_RUN_BEHAVIOR = CollectorResponse.AGENT_RUN_BEHAVIOR
/**
* @param {Array} errors list of errors
* @param {string} name collector endpoint name
*/
function dumpErrors(errors, name) {
let index = 1
for (const error of errors) {
logger.trace(error, 'Error %s during %s:', index++, name)
if (error.laterErrors) {
for (const laterError of error.laterErrors) {
logger.trace(laterError, 'Error %s during %s:', index++, name)
}
}
}
}
/**
* @param {Agent} agent New Relic agent
*/
function CollectorAPI(agent) {
this._agent = agent
this._reqHeadersMap = null
const initialEndpoint = {
host: agent.config.host,
port: agent.config.port
}
/* RemoteMethods can be reused and have little per-object state, so why not
* save some GC time?
*/
this._methods = {}
for (const name of [
'preconnect',
'connect',
'agent_settings',
'error_data',
'metric_data',
'transaction_sample_data',
'shutdown',
'analytic_event_data',
'custom_event_data',
'sql_trace_data',
'error_event_data',
'span_event_data',
'log_event_data'
]) {
const method = new RemoteMethod(name, agent, initialEndpoint)
this._methods[name] = method
}
}
/**
* Handles sending data to the relevant collector method.
* It first checks if there is a callback and data provided before sending.
*
* @param {string} method collector method to send the data see this._methods
* @param {Array} data payload that will be sent to remote method
* @param {Function} callback function to invoke after sending data
*/
CollectorAPI.prototype.send = function send(method, data, callback) {
if (!callback) {
this._throwCallbackError()
}
if (!data) {
callback(new TypeError(`must pass data for ${method} to send`))
return
}
this._sendData(this._methods[method], data, callback)
}
CollectorAPI.prototype._throwCallbackError = function _throwCallbackError() {
throw new TypeError('callback is required')
}
/**
* Updates all methods except preconnect w/ new host/port pairs sent down from server
* during preconnect (via redirect_host). Preconnect does not update.
*
* @param {string} endpoint collector name
*/
CollectorAPI.prototype._updateEndpoints = function _updateEndpoints(endpoint) {
logger.trace('Updating endpoints to: ', endpoint)
for (const [key, remoteMethod] of Object.entries(this._methods)) {
// Preconnect should always use configured options, not updates from server.
if (key !== 'preconnect') {
remoteMethod.updateEndpoint(endpoint)
}
}
}
/**
* Connect to the data collector.
*
* @param {Function} callback A typical error first callback to be invoked
* upon successful or unsuccessful connection. The second parameter will be
* an instance of {@link CollectorResponse}.
*
* @fires Agent#connected By way of the full connection process. This event
* is not fired directly in this method.
* @fires Agent#connecting
*/
CollectorAPI.prototype.connect = function connect(callback) {
if (!callback) {
this._throwCallbackError()
}
logger.trace('Starting collector.')
this._agent.setState('connecting')
// Reset headers map for good measure
if (this._reqHeadersMap) {
this._reqHeadersMap = null
}
const ctx = {
callback,
max: BACKOFFS.length,
errors: [],
attempts: 1
}
this._login(this._retry.bind(this, ctx))
}
/**
* Checks if proxy is configured to connect via `proxy_host` and `proxy_port`
* and if error code is EPROTO or ECONNRESET. This is an indication their proxy
* server only accepts HTTP connections, and we should provide an actionable warning to
* fix the misconfiguration by setting `proxy` to a fully qualified URL
*
* @param {Error} error response error
* @returns {boolean} determines if proxy is properly configured
*/
CollectorAPI.prototype._isProxyMisconfigured = function _isProxyMisconfigured(error) {
const config = this._agent.config
return (
error &&
['EPROTO', 'ECONNRESET'].includes(error.code) &&
config.proxy_host &&
config.proxy_port &&
!config.proxy
)
}
/**
* @param {object} ctx context object to pass from parent and between recursions.
* @param {Error} error response error
* @param {http.ServerResponse} response response from collector
* @returns {void}
*/
CollectorAPI.prototype._retry = function _retry(ctx, error, response) {
const api = this
const metric = this._agent.metrics.getOrCreateMetric(
NAMES.SUPPORTABILITY.REGISTRATION + '/Attempts'
)
metric.incrementCallCount()
if (error) {
ctx.errors.push(error)
} else if (response && SUCCESS.has(response.status)) {
dumpErrors(ctx.errors, 'connect')
this._agent.healthReporter.setStatus(HealthReporter.STATUS_HEALTHY)
ctx.callback(null, CollectorResponse.success(response.payload))
return
}
if (!response) {
response = CollectorResponse.retry()
}
// Retry everything except for an explicit Disconnect response code.
if (response.status === 410 || response.agentRun === AGENT_RUN_BEHAVIOR.SHUTDOWN) {
logger.error('The New Relic collector rejected this agent.')
this._agent.healthReporter.setStatus(HealthReporter.STATUS_FORCED_DISCONNECT)
return ctx.callback(null, CollectorResponse.fatal(response.payload))
} else if (response.status === 401) {
logger.warn(
error,
'Your license key appears to be invalid. Reattempting connection to New' +
' Relic. If the problem persists, please contact support@newrelic.com.' +
' (status code %s)',
response.status
)
this._agent.healthReporter.setStatus(HealthReporter.STATUS_INVALID_LICENSE_KEY)
} else if (this._isProxyMisconfigured(error)) {
logger.warn(
error,
'Your proxy server appears to be configured to accept connections over http. ' +
'When setting `proxy_host` and `proxy_port` New Relic attempts to connect over ' +
'SSL(https). If your proxy is configured to accept connections over http, try ' +
'setting `proxy` to a fully qualified URL(e.g http://proxy-host:8080).'
)
this._agent.healthReporter.setStatus(HealthReporter.STATUS_HTTP_PROXY_MISCONFIGURED)
} else {
// Sometimes we get a `CollectorResponse` instance instead of an
// `http.ServerResponse`. In such cases, we do not have access to the
// status code.
let msg = 'Unexpected error communicating with New Relic backend.'
if (response.status) {
msg = `Received error status code from New Relic backend: ${response.status}.`
}
logger.warn(error, msg)
this._agent.healthReporter.setStatus(HealthReporter.STATUS_BACKEND_ERROR)
}
const backoff = BACKOFFS[Math.min(ctx.attempts, ctx.max) - 1]
if (backoff.warn) {
logger.warn('No connection has been established to New Relic after %d attempts.', ctx.attempts)
}
logger.debug(
error,
'Failed to connect to New Relic after attempt %d, waiting %ds to retry.',
ctx.attempts,
backoff.interval
)
++ctx.attempts
const timeout = setTimeout(function again() {
api._login(api._retry.bind(api, ctx))
}, backoff.interval * TO_MILLIS)
timeout.unref()
}
CollectorAPI.prototype._login = function _login(callback) {
const agent = this._agent
const preconnectData = { high_security: agent.config.high_security }
if (agent.config.security_policies_token) {
preconnectData.security_policies_token = agent.config.security_policies_token
}
const payload = [preconnectData]
this._methods.preconnect.invoke(payload, this._onPreConnect.bind(this, callback))
}
/**
* @param {Function} callback function to run after processing response
* @param {Error} error response error
* @param {http.ServerResponse} response collector response from pre connect
* @returns {void}
*/
CollectorAPI.prototype._onPreConnect = function _onPreConnect(callback, error, response) {
const agent = this._agent
if (error || !SUCCESS.has(response.status)) {
callback(error, response)
return
}
const res = response.payload || Object.create(null)
this._handlePreConnectResponse(res)
const policies = res.security_policies || Object.create(null)
const laspResponse = agent.config.applyLasp(agent, policies)
if (laspResponse.shouldShutdownRun()) {
callback(null, laspResponse)
return
}
this._getFacts(laspResponse.payload, callback)
}
/**
* Checks the redirect_host and determines based on the URL if the collector
* needs to update the endpoint with new endpoint
*
* @param {http.ServerResponse} res collector response
*/
CollectorAPI.prototype._handlePreConnectResponse = function _handlePreConnectResponse(res) {
const agent = this._agent
if (!res.redirect_host) {
logger.error(
"Requesting this account's collector from %s failed; trying default.",
agent.config.host
)
} else {
const parts = res.redirect_host.split(':')
if (parts.length > 2) {
logger.error(
"Requesting collector from %s returned bogus result '%s'; trying default.",
agent.config.host,
res.redirect_host
)
} else {
logger.debug(
"Requesting this account's collector from %s returned %s; reconfiguring.",
agent.config.host,
res.redirect_host
)
const [host, port] = parts
const newEndpoint = {
host,
port: port || DEFAULT_PORT
}
this._updateEndpoints(newEndpoint)
}
}
}
CollectorAPI.prototype._getFacts = function _getFacts(lasp, callback) {
const agent = this._agent
const self = this
facts(agent, function getEnvDict(environmentDict) {
if (lasp) {
environmentDict.security_policies = lasp
}
// The collector really likes arrays.
// In fact, it kind of insists on them.
const environment = [environmentDict]
self._connect(environment, callback)
})
}
CollectorAPI.prototype._connect = function _connect(env, callback) {
this._methods.connect.invoke(env, this._onConnect.bind(this, callback))
}
/**
*
* Handles the response to the connect call
*
* @param {Function} callback function to run after processing response
* @param {Error} error collector response error
* @param {http.ServerOptions} res collector response
*
* @fires Agent#connected
*/
CollectorAPI.prototype._onConnect = function _onConnect(callback, error, res) {
const agent = this._agent
const methods = this._methods
if (error || !SUCCESS.has(res.status)) {
callback(error, res)
return
}
const config = res.payload
if (!config || !config.agent_run_id) {
callback(new Error('No agent run ID received from handshake.'), res)
return
}
agent.setState('connected')
logger.info(
'Connected to %s:%d with agent run ID %s.',
methods.connect.endpoint.host,
methods.connect.endpoint.port,
config.agent_run_id
)
// Log "Reporting to..." message from connect response.
if (config.messages) {
for (const element of config.messages) {
logger.info(element.message)
}
}
// Store request headers for future collector requests if they're present
this._reqHeadersMap = config.request_headers_map
// pass configuration data from the API so automatic reconnect works
agent.reconfigure(config)
callback(null, res)
}
/**
* Send current public agent settings to collector. This should always be
* invoked after a successful connect response with server-side settings, but
* will also be invoked on any other config changes.
*
* @param {Function} callback The continuation / error handler.
*/
CollectorAPI.prototype.reportSettings = function reportSettings(callback) {
// The second argument to the callback is always empty data
this._methods.agent_settings.invoke(
[this._agent.config.publicSettings()],
this._reqHeadersMap,
function onReportSettings(error, response) {
if (error) {
dumpErrors([error], 'agent_settings')
}
if (callback) {
callback(error, response)
}
}
)
}
/**
* Sends no data aside from the message itself. Clears the run ID, which
* effectively disconnects the agent from the collector.
*
* @param {Function} callback Runs after the run ID has been cleared.
*/
CollectorAPI.prototype.shutdown = function shutdown(callback) {
if (!callback) {
this._throwCallbackError()
}
logger.info('Shutting down collector.')
const agent = this._agent
this._methods.shutdown.invoke(null, this._reqHeadersMap, onShutdown)
/**
* @param {Error} error response error
* @param {http.ServerResponse} response response from collector
*
* @fires Agent#disconnected
*/
function onShutdown(error, response) {
if (error) {
dumpErrors([error], 'shutdown')
}
agent.setState('disconnected')
logger.info('Disconnected from New Relic; clearing run ID %s.', agent.config.run_id)
agent.config.run_id = undefined
callback(error, CollectorResponse.fatal(response && response.payload))
}
}
CollectorAPI.prototype.restart = function restart(callback) {
logger.info('Restarting collector.')
this._agent.harvester.stop()
const api = this
this.shutdown(function reconnect() {
api.connect(function afterConnect() {
const shouldImmediatelyHarvest = false
api._agent.onConnect(shouldImmediatelyHarvest, callback)
})
})
}
CollectorAPI.prototype._runLifecycle = function _runLifecycle(method, body, callback) {
if (!this.isConnected()) {
logger.warn('Not connected to New Relic. Not calling.', method.name)
const error = new Error('Not connected to collector.', null, null)
return setImmediate(callback, error)
}
const api = this
method.invoke(body, this._reqHeadersMap, function standardHandler(error, response) {
if (error) {
callback(error)
return
}
return api._handleResponseCode(response, method.name, callback)
})
}
CollectorAPI.prototype._sendData = function _sendData(method, data, callback) {
this._runLifecycle(method, data, (error, response) => {
// Any runtime errors should preserve the agent run.
if (error) {
let retainData = true
if (error.code && error.code === 'NR_REMOTE_METHOD_MAX_PAYLOAD_SIZE_EXCEEDED') {
// We are going to drop whatever was in `data`.
// The collector + aggregator system collects events into batches until
// a harvest interval is reached. At that time, all collected events
// are popped off the queue into an array. That array is then serialized
// (and possibly compressed) before being sent. If that serialized data
// exceeds our allowed limit, there isn't any way to reduce items to
// send data, and requeue events, until the data meets the allowed
// limit. So we are going to drop it all here, and _not_ pass an error
// back to the sending routines.
//
// Ideally, we'd have an algorithm that:
// 1. Calculates if the collected events to be sent will exceed the
// allowed size
// 2. If so, pop events off the to-send array and re-queues them until
// the allowed size is met.
// 3. If the paired down to-send array reaches one element that will
// still fail, drop that item from the queue.
// 4. Let the next cycle do the same thing.
retainData = false
return callback(null, { retainData })
}
callback(error, { retainData })
return
}
if (!response) {
callback()
return
}
// TODO: log the payload if exists?
/*
if (response.agentRun === AGENT_RUN_BEHAVIOR.SHUTDOWN) {
// TODO: for now, shut handled in _handleResponseCode for consistency
// either find way to safely change while side-by-side or move
// once side-by-side gone. Currently, stop is called twice on the old code path
// TODO: almost seems better to let aggregator finish (cb) then shutdown?
// this._agent.stop((err) => {
// // TODO: agent stop requires a callback. if we don't care to do anything here
// // do we loosen that requirement or perhaps have a different "shutdown"
// // method? Does seem like you'd want to log a shutdown error
// // but don't really care about that *here*
// })
callback(null, { retainData: response.retainData })
*/
if (response.agentRun === AGENT_RUN_BEHAVIOR.RESTART) {
// TODO: almost seems better to leg aggregator finish (cb) then restart?
// TODO: ensure harvesting stopped for all other endpoints. same for shutdown.
this.restart(function afterRestart(connectError) {
if (connectError) {
// TODO: What if preconnect/connect respond with shutdown here?
// TODO: maybe indicate which endpoint triggered
// other behaviors on failure?
logger.warn('Failed to restart agent run.')
} else {
logger.trace('Restart succeeded.')
}
callback(null, { retainData: response.retainData })
// TODO: keep object or enum of actions? retain / split / other?
})
} else {
callback(null, {
retainData: response.retainData
})
}
})
}
CollectorAPI.prototype.isConnected = function isConnected() {
return !!this._agent.config.run_id
}
CollectorAPI.prototype._handleResponseCode = _handleResponseCode
/**
* Returns appropriate CollectorResponse object according to response code.
*
* @param {http.ServerResponse} response response from collector
* @param {number} response.status - Status code from collector response
* @param {object} response.payload - Parsed response body, if any
* @param {string} endpoint - Collector endpoint name
* @param {Function} cb - CollectorAPI method invocation callback
*/
function _handleResponseCode(response, endpoint, cb) {
const code = response.status
if (SUCCESS.has(code)) {
// The request was a success!
setImmediate(cb, null, CollectorResponse.success(response.payload))
} else if (RESTART.has(code)) {
// The agent needs to disconnect and restart.
logFailure(endpoint, code, 'Restarting')
setImmediate(cb, null, CollectorResponse.reconnect(0, null))
} else if (FAILURE_DISCARD_DATA.has(code)) {
// Something was wrong with our payload so we must delete our data.
logFailure(endpoint, code, 'Discarding harvest data')
setImmediate(cb, null, CollectorResponse.discard(null))
} else if (FAILURE_SAVE_DATA.has(code)) {
// Something was wrong with the request, but it wasn't our fault. We'll try again.
logFailure(endpoint, code, 'Retaining data for next harvest')
setImmediate(cb, null, CollectorResponse.error(response.payload))
} else if (code === 410) {
// New Relic doesn't like us and we shouldn't try to talk to them any more.
logFailure(endpoint, code, 'Disconnecting from New Relic')
this._agent.stop(function onShutdown() {
cb(null, CollectorResponse.fatal(response.payload))
})
} else {
// We're not sure what New Relic is trying to tell us. Let's get rid of our
// data just in case it is our fault.
logger.error('Agent endpoint %s returned unexpected status %s.', endpoint, code)
setImmediate(cb, null, CollectorResponse.discard(null))
}
}
/**
* @param {string} endpoint called endpoint
* @param {string} code http status code
* @param {string} action describes collector action
*/
function logFailure(endpoint, code, action) {
logger.error('Agent endpoint %s returned %s status. %s.', endpoint, code, action)
}
module.exports = CollectorAPI