UNPKG

newrelic

Version:
381 lines (321 loc) 12.2 kB
/* * Copyright 2020 New Relic Corporation. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ 'use strict' const protoLoader = require('@grpc/proto-loader') const grpc = require('../proxy/grpc') const logger = require('../logger').child({component: 'grpc_connection'}) const EventEmitter = require('events') const NAMES = require('../metrics/names') const util = require('util') const GRPC_TEST_META = { flaky: 'NEWRELIC_GRPCCONNECTION_METADATA_FLAKY', delay: 'NEWRELIC_GRPCCONNECTION_METADATA_DELAY', flaky_code: 'NEWRELIC_GRPCCONNECTION_METADATA_FLAKY_CODE', success_delay_ms: 'NEWRELIC_GRPCCONNECTION_METADATA_SUCCESS_DELAY_MS' } const connectionStates = require('./connection/states') const PROTO_OPTIONS = {keepCase: true, longs: String, enums: String, defaults: true, oneofs: true } const PROTO_DEFINITION_PATH = __dirname + '../../../lib/grpc/endpoints/infinite-tracing/v1.proto' const DEFAULT_RECONNECT_DELAY_MS = 15 * 1000 /** * Class for managing the GRPC connection * * Both @grpc/grpc-js and grpc will manage the http2 connections * for us -- this class manages the _stream_ connection logic. * * Will emit events based on the connectionStates (see above */ class GrpcConnection extends EventEmitter { /** * GrpcConnection constructor * * Standard property setting/initialization, and sets an initial * connection state of disconnected * * @param {Object} traceObserverConfig config item config.infinite_tracing.trace_observer * @param {MetricAggregator} metrics metric aggregator, for supportability metrics * @param {Number} [reconnectDelayMs=15000] number of milliseconds to wait before reconnecting * for error states that require a reconnect delay. */ constructor(traceObserverConfig, metrics, reconnectDelayMs) { super() this._reconnectDelayMs = reconnectDelayMs || DEFAULT_RECONNECT_DELAY_MS this._metrics = metrics this._setState(connectionStates.disconnected) this._licensekey = null this._runId = null this._requestHeadersMap = null this._endpoint = this.getTraceObserverEndpoint(traceObserverConfig) this._client = null this.stream = null } /** * Sets connection details * * Allows setting of connection details _after_ object constructions * but before the actual connection. * * @param {string} endpoint the GRPC server's endpoint * @param {string} license_key the agent license key * @param {string} run_id the current agent run id (also called agent run token) * @param {object} requestHeadersMap request headers map received from server connect. * @param {string} [rootCerts] string of root (ca) certificates to attach to the connection. */ setConnectionDetails(license_key, run_id, requestHeadersMap, rootCerts) { this._licenseKey = license_key this._runId = run_id this._requestHeadersMap = requestHeadersMap this._rootCerts = rootCerts return this } getTraceObserverEndpoint(traceObserverConfig) { return `${traceObserverConfig.host}:${traceObserverConfig.port}` } /** * Sets the connection state * * Used to indicate a transition from one connection state to * the next. Also responsible for emitting the connect state event * * @param {int} state The connection state (See connectionStates above) * @param {ClientDuplexStreamImpl} state The GRPC stream, when defined */ _setState(state, stream = null) { this._state = state this.emit(connectionStates[state], stream) } /** * Start the Connection * * Public Entry point -- initiates a connection */ connectSpans() { if (this._state !== connectionStates.disconnected) { return } this._setState(connectionStates.connecting) logger.trace('Connecting to gRPC endpoint.') try { this.stream = this._connectSpans() // May not actually be "connected" at this point but we can write to the stream // immediately. this._setState(connectionStates.connected, this.stream) } catch (err) { logger.warn( err, 'Unexpected error establishing gRPC stream, will not attempt reconnect.' ) this._disconnect() } } /** * End the current stream and set state to disconnected. * * No more data can be sent until connected again. */ disconnect() { if (this._state === connectionStates.disconnected) { return } this._disconnect() } /** * Method returns GRPC metadata for initial connection * * @param {string} license_key * @param {string} run_id */ _getMetadata(license_key, run_id, requestHeadersMap, env) { const metadata = new grpc.Metadata() metadata.add('license_key', license_key) metadata.add('agent_run_token', run_id) // p17 spec: If request_headers_map is empty or absent, // the agent SHOULD NOT apply anything to its requests. if (requestHeadersMap) { for (const [key, value] of Object.entries(requestHeadersMap)) { metadata.add(key.toLowerCase(), value) // keys MUST be lowercase for Infinite Tracing } } this._setTestMetadata(metadata, env) return metadata } /** * Adds test metadata used to simulate connectivity issues * when appropriate env vars are set * * @param {Metadata} metadata */ _setTestMetadata(metadata, env) { for (const [key, envVar] of Object.entries(GRPC_TEST_META)) { const value = parseInt(env[envVar], 10) if (value) { logger.trace('Adding %s metadata: %s', key, value) metadata.add(key, value) } } } /** * Disconnects from gRPC endpoint and schedules establishing a new connection. * @param {number} reconnectDelayMs number of milliseconds to wait before reconnecting. */ _reconnect(reconnectDelayMs = 0) { this._disconnect() logger.trace('Reconnecting to gRPC endpoint in [%s] seconds', reconnectDelayMs) setTimeout( this.connectSpans.bind(this), reconnectDelayMs ) } _disconnect() { logger.trace('Disconnecting from gRPC endpoint.') if (this.stream) { this.stream.removeAllListeners() const oldStream = this.stream this.stream.on('status', function endStreamStatusHandler(grpcStatus) { logger.trace('End stream status received [%s]: %s', grpcStatus.code, grpcStatus.details) // Cleanup the final end stream listeners. oldStream.removeAllListeners() }) // Listen to any final errors to prevent throwing. // This is unlikely but if the server closes post // removing listeners and prior to response it could // happen. We noticed this via tests on Node 14. this.stream.on('error', function endStreamErrorHandler(err) { logger.trace('End stream error received. Code: [%s]: %s', err.code, err.details) }) // Indicates to server we are done. // Server officially closes the stream. this.stream.end() this.stream = null } this._setState(connectionStates.disconnected) } /** * Central location to setup stream observers * * Events from the GRPC stream (a ClientDuplexStreamImpl) are the main way * we communicate with the GRPC server. * * @param {ClientDuplexStreamImpl} stream */ _setupSpanStreamObservers(stream) { // Node streams require all data sent by the server to be read before the end // (or status in this case) event gets fired. As such, we have to subscribe even // if we are not going to use the data. stream.on('data', function data(response) { if (logger.traceEnabled()) { logger.trace("gRPC span response stream: %s", JSON.stringify(response)) } }) // listen for status that indicate stream has ended, // and we need to disconnect stream.on('status', (grpcStatus) => { logger.trace('gRPC Status Received [%s]: %s', grpcStatus.code, grpcStatus.details) const grpcStatusName = grpc.status[grpcStatus.code] ? grpc.status[grpcStatus.code] : 'UNKNOWN' if (grpc.status[grpc.status.UNIMPLEMENTED] === grpcStatusName) { this._metrics.getOrCreateMetric( NAMES.INFINITE_TRACING.SPAN_RESPONSE_GRPC_UNIMPLEMENTED ).incrementCallCount() // per the spec, An UNIMPLEMENTED status code from gRPC indicates // that the versioned Trace Observer is no longer available. Agents // MUST NOT attempt to reconnect in this case logger.info( '[UNIMPLEMENTED]: Trace Obserserver is no longer available. Shutting down connection.' ) this._disconnect() } else if (grpc.status[grpc.status.OK] === grpcStatusName) { this._reconnect() } else { this._metrics.getOrCreateMetric( util.format(NAMES.INFINITE_TRACING.SPAN_RESPONSE_GRPC_STATUS, grpcStatusName) ).incrementCallCount() this._reconnect(this._reconnectDelayMs) } }) // if we don't listen for the errors they'll bubble // up and crash the application stream.on('error', (err) => { this._metrics.getOrCreateMetric(NAMES.INFINITE_TRACING.SPAN_RESPONSE_ERROR) .incrementCallCount() // For errors, the status will either result in a disconnect or a reconnect // delay that should prevent too frequent spamming. Unless the app is idle // and regularly getting Status 13 reconnects from the server, in which case // this will be almost the only logging. logger.warn('Span stream error. Code: [%s]: %s', err.code, err.details) }) } /** * Creates the GRPC credentials needed */ _generateCredentials(grpcApi) { let certBuffer = null // Current settable value for testing. If allowed to be overridden via // configuration, this should be removed in place of setting // this._rootCerts from config via normal configuration precedence. const envTestCerts = process.env.NEWRELIC_GRPCCONNECTION_CA const rootCerts = this._rootCerts || envTestCerts if (rootCerts) { logger.debug('Infinite tracing root certificates found to attach to requests.') try { certBuffer = Buffer.from(rootCerts, 'utf-8') } catch (err) { logger.warn('Failed to create buffer from rootCerts, proceeding without.', err) } } // null/undefined ca treated same as calling createSsl() return grpcApi.credentials.createSsl(certBuffer) } /** * Internal/private method for connection * * Contains the actual logic that connects to the GRPC service. * "Connection" can be a somewhat misleading term here. This method * invokes the "recordSpan" remote proceduce call. Behind the scenes * this makes an http2 request with the metadata, and then returns * a stream for further writing. */ _connectSpans() { if (!this._client) { // Only create once to avoid potential memory leak. // We create here (currently) for consistent error handling. this._client = this._createClient(this._endpoint) } const metadata = this._getMetadata(this._licenseKey, this._runId, this._requestHeadersMap, process.env) const stream = this._client.recordSpan(metadata) this._setupSpanStreamObservers(stream) return stream } /** * Creates gRPC service client to use for establishing gRPC streams. * * WARNING: creating a client more than once can result in a memory leak. * ChannelImplementation and related objects will stay in memory even after * the stream is closed and we do not have a handle to the client. Currently * impacting grpc-js@1.2.11 and several earlier versions. */ _createClient(endpoint) { logger.trace('Creating gRPC client for: ', endpoint) const packageDefinition = protoLoader.loadSync(PROTO_DEFINITION_PATH, PROTO_OPTIONS) const protoDescriptor = grpc.loadPackageDefinition( packageDefinition ) const traceApi = protoDescriptor.com.newrelic.trace.v1 const credentials = this._generateCredentials(grpc) const client = new traceApi.IngestService( endpoint, credentials ) return client } } module.exports = GrpcConnection