@instana/aws-lambda
Version:
Instana tracing and monitoring for Node.js based AWS Lambdas
558 lines (489 loc) • 20.5 kB
JavaScript
/*
* (c) Copyright IBM Corp. 2021
* (c) Copyright Instana Inc. and contributors 2019
*/
;
const instanaCore = require('@instana/core');
const { backendConnector, consoleLogger: serverlessLogger, environment } = require('@instana/serverless');
const arnParser = require('./arn');
const identityProvider = require('./identity_provider');
const metrics = require('./metrics');
const ssm = require('./ssm');
const triggers = require('./triggers');
const processResult = require('./process_result');
const captureHeaders = require('./capture_headers');
const { tracing, coreConfig } = instanaCore;
const { tracingHeaders, constants, spanBuffer } = tracing;
const lambdaConfigDefaults = {
tracing: { forceTransmissionStartingAt: 25, transmissionDelay: 100, initialTransmissionDelay: 100 }
};
const logger = serverlessLogger.init();
coreConfig.init(logger);
let config = coreConfig.normalize({}, lambdaConfigDefaults);
let coldStart = true;
// Initialize instrumentations early to allow for require statements after our
// package has been required but before the actual instana.wrap(...) call.
instanaCore.preInit(config);
/**
* Wraps an AWS Lambda handler so that metrics and traces are reported to Instana. This function will figure out if the
* Lambda handler uses the callback style or promise/async function style by inspecting the number of function arguments
* the function receives.
*/
exports.wrap = function wrap(_config, originalHandler) {
/* eslint-disable no-unused-vars */
if (arguments.length === 1) {
originalHandler = _config;
_config = null;
}
// Apparently the AWS Lambda Node.js runtime does not inspect the handlers signature for the number of arguments it
// accepts. But to be extra safe, we strive to return a function with the same number of arguments anyway.
switch (originalHandler.length) {
case 0:
return function handler0() {
return shimmedHandler(originalHandler, this, arguments, _config);
};
case 1:
return function handler1(event) {
return shimmedHandler(originalHandler, this, arguments, _config);
};
case 2:
return function handler2(event, context) {
return shimmedHandler(originalHandler, this, arguments, _config);
};
default:
return function handler3(event, context, callback) {
return shimmedHandler(originalHandler, this, arguments, _config);
};
}
};
function shimmedHandler(originalHandler, originalThis, originalArgs, _config) {
const event = originalArgs[0];
const context = originalArgs[1];
const lambdaCallback = originalArgs[2];
const arnInfo = arnParser(context);
const tracingEnabled = init(event, arnInfo, _config);
if (!tracingEnabled) {
return originalHandler.apply(originalThis, originalArgs);
}
// The AWS lambda runtime does not seem to inspect the number of arguments the handler function expects. Instead, it
// always call the handler with three arguments (event, context, callback), no matter if the handler will use the
// callback or not. If the handler returns a promise, the runtime uses the that promise's value when it resolves as
// the result. If the handler calls the callback, that value is used as the result. If the handler does both
// (return a promise and resolve it _and_ call the callback), it depends on the timing. Whichever happens first
// dictates the result of the lambda invocation, the later result is ignored. To match this behaviour, we always
// wrap the given callback _and_ return an instrumented promise.
let handlerHasFinished = false;
return tracing.getCls().ns.runPromiseOrRunAndReturn(() => {
const traceCorrelationData = triggers.readTraceCorrelationData(event, context);
const tracingSuppressed = traceCorrelationData.level === '0';
const w3cTraceContext = traceCorrelationData.w3cTraceContext;
let entrySpan;
if (w3cTraceContext) {
// Ususally we commit the W3C trace context to CLS in start span, but in some cases (e.g. when suppressed),
// we don't call startSpan, so we write to CLS here unconditionally. If we also write an updated trace context
// later, the one written here will be overwritten.
tracing.getCls().setW3cTraceContext(w3cTraceContext);
}
if (tracingSuppressed) {
tracing.getCls().setTracingLevel('0');
if (w3cTraceContext) {
w3cTraceContext.disableSampling();
}
} else {
entrySpan = tracing.getCls().startSpan({
spanName: 'aws.lambda.entry',
kind: constants.ENTRY,
traceId: traceCorrelationData.traceId,
parentSpanId: traceCorrelationData.parentId,
w3cTraceContext: w3cTraceContext
});
tracingHeaders.setSpanAttributes(entrySpan, traceCorrelationData);
const { arn, alias } = arnInfo;
entrySpan.data.lambda = {
arn,
alias,
runtime: 'nodejs',
functionName: context.functionName,
functionVersion: context.functionVersion,
reqId: context.awsRequestId
};
if (coldStart) {
entrySpan.data.lambda.coldStart = true;
coldStart = false;
}
triggers.enrichSpanWithTriggerData(event, context, entrySpan);
}
originalArgs[2] = function wrapper(originalError, originalResult) {
if (handlerHasFinished) {
lambdaCallback(originalError, originalResult);
return;
}
handlerHasFinished = true;
postHandler(entrySpan, originalError, originalResult, () => {
lambdaCallback(originalError, originalResult);
});
};
// The functions context.done, context.succeed, and context.fail constitute a deprecated legacy Lambda API from the
// very first incarnations of the Node.js Lambda execution environment from ca. 2016. Although it is not documented
// anymore in the AWS Lambda docs, it still works (and is also used by some customers). See
// eslint-disable-next-line max-len
// https://web.archive.org/web/20161216092320/https://docs.aws.amazon.com/lambda/latest/dg/nodejs-prog-model-using-old-runtime.html
// for information about it.
const originalDone = context.done;
context.done = (originalError, originalResult) => {
if (handlerHasFinished) {
originalDone(originalError, originalResult);
return;
}
handlerHasFinished = true;
postHandler(entrySpan, originalError, originalResult, () => {
originalDone(originalError, originalResult);
});
};
const originalSucceed = context.succeed;
context.succeed = originalResult => {
if (handlerHasFinished) {
originalSucceed(originalResult);
return;
}
handlerHasFinished = true;
postHandler(entrySpan, undefined, originalResult, () => {
originalSucceed(originalResult);
});
};
const originalFail = context.fail;
context.fail = originalError => {
if (handlerHasFinished) {
originalFail(originalError);
return;
}
handlerHasFinished = true;
postHandler(entrySpan, originalError, undefined, () => {
originalFail(originalError);
});
};
/**
* We offer the customer to enable the timeout detection
* But its not recommended to use it on production, only for debugging purposes.
* See https://github.com/instana/nodejs/pull/668.
*/
if (
process.env.INSTANA_ENABLE_LAMBDA_TIMEOUT_DETECTION &&
process.env.INSTANA_ENABLE_LAMBDA_TIMEOUT_DETECTION === 'true'
) {
logger.debug('Heuristical timeout detection enabled. Please only use for debugging purposes.');
registerTimeoutDetection(context, entrySpan);
}
let handlerPromise;
try {
handlerPromise = originalHandler.apply(originalThis, originalArgs);
if (handlerPromise && typeof handlerPromise.then === 'function') {
return handlerPromise.then(
value => {
if (handlerHasFinished) {
return Promise.resolve(value);
}
handlerHasFinished = true;
return postPromise(entrySpan, null, value);
},
error => {
if (handlerHasFinished) {
return Promise.reject(error);
}
handlerHasFinished = true;
return postPromise(entrySpan, error);
}
);
} else {
return handlerPromise;
}
} catch (e) {
// A synchronous exception occured in the original handler.
handlerHasFinished = true;
// eslint-disable-next-line no-console
console.error(
// eslint-disable-next-line max-len
'Your Lambda handler threw a synchronous exception. To report this call (including the error) to Instana, we need to convert this synchronous failure into an asynchronous failure.',
e
);
postHandler(entrySpan, e, undefined, () => {
// rethrow original exception
throw e;
});
return handlerPromise;
}
});
}
/**
* Initialize the wrapper.
*/
function init(event, arnInfo, _config) {
const userConfig = _config || {};
// CASE: customer provides a custom logger or custom level
if (userConfig.logger || userConfig.level) {
serverlessLogger.init(userConfig);
}
// NOTE: We SHOULD renormalize because of:
// - in-code _config object
// - late env variables (less likely)
// - custom logger
// - we always renormalize unconditionally to ensure safety.
config = coreConfig.normalize(userConfig, lambdaConfigDefaults);
if (!config.tracing.enabled) {
return false;
}
const useLambdaExtension = shouldUseLambdaExtension();
if (useLambdaExtension) {
logger.info('@instana/aws-lambda will use the Instana Lambda extension to send data to the Instana back end.');
} else {
logger.info(
'@instana/aws-lambda will not use the Instana Lambda extension, but instead send data to the Instana back end ' +
'directly.'
);
}
identityProvider.init(arnInfo);
triggers.init(config);
backendConnector.init({
config,
identityProvider,
defaultTimeout: 500,
useLambdaExtension,
isLambdaRequest: true,
// NOTE: We only retry for the extension, because if the extenion is not used, the time to transmit
// the data to the serverless acceptor directly takes too long.
retries: !!useLambdaExtension
});
instanaCore.init(config, backendConnector, identityProvider);
// After core init, because ssm requires require('@aws-sdk/client-ssm'), which triggers
// the requireHook + shimmer. Any module which requires another external module has to be
// initialized after the core.
ssm.init(config);
spanBuffer.setIsFaaS(true);
captureHeaders.init(config);
metrics.init(config);
metrics.activate();
tracing.activate();
return true;
}
function registerTimeoutDetection(context, entrySpan) {
// We register the timeout detection directly at the start so getRemainingTimeInMillis basically gives us the
// configured timeout for this Lambda function, minus roughly 50 - 100 ms that is spent in bootstrapping.
const initialRemainingMillis = getRemainingTimeInMillis(context);
if (typeof initialRemainingMillis !== 'number') {
return;
}
const minimumTimeoutInMs = process.env.INSTANA_MINIMUM_LAMBDA_TIMEOUT_FOR_TIMEOUT_DETECTION_IN_MS
? Number(process.env.INSTANA_MINIMUM_LAMBDA_TIMEOUT_FOR_TIMEOUT_DETECTION_IN_MS)
: 2000;
if (initialRemainingMillis <= minimumTimeoutInMs) {
logger.debug(
'Heuristical timeout detection will be disabled for Lambda functions with a short timeout ' +
'(2 seconds and smaller).'
);
return;
}
let triggerTimeoutHandlingAfter;
if (initialRemainingMillis <= 4000) {
// For Lambdas configured with a timeout of 3 or 4 seconds we heuristically assume a timeout when only
// 10% of time is remaining.
triggerTimeoutHandlingAfter = initialRemainingMillis * 0.9;
} else {
// For Lambdas configured with a timeout of 5 seconds or more we heuristically assume a timeout when only 400 ms of
// time are remaining.
triggerTimeoutHandlingAfter = initialRemainingMillis - 400;
}
logger.debug(
`Registering heuristical timeout detection to be triggered in ${triggerTimeoutHandlingAfter} milliseconds.`
);
setTimeout(() => {
postHandlerForTimeout(entrySpan, getRemainingTimeInMillis(context));
}, triggerTimeoutHandlingAfter).unref();
}
function getRemainingTimeInMillis(context) {
if (context && typeof context.getRemainingTimeInMillis === 'function') {
return context.getRemainingTimeInMillis();
} else {
logger.warn('context.getRemainingTimeInMillis() is not available, timeout detection will be disabled.');
return null;
}
}
// NOTE: This function only "guesses" whether the Lambda extension should be used or not.
// TODO: Figure out how we can reliably determine whether the Lambda extension should be
// used or not e.g. by checking the lambda handler name if that is possible.
function shouldUseLambdaExtension() {
if (process.env.INSTANA_DISABLE_LAMBDA_EXTENSION) {
logger.info('INSTANA_DISABLE_LAMBDA_EXTENSION is set, not using the Lambda extension.');
return false;
} else {
// Note: We could also use context.memoryLimitInMB here instead of the env var AWS_LAMBDA_FUNCTION_MEMORY_SIZE (both
// should always yield the same value), but this behaviour needs to be in sync with what the Lambda extension does.
// The context object is not available to the extension, so we prefer the env var over the value from the context.
const memorySetting = process.env.AWS_LAMBDA_FUNCTION_MEMORY_SIZE;
if (!memorySetting) {
logger.debug(
'The environment variable AWS_LAMBDA_FUNCTION_MEMORY_SIZE is not present, cannot determine memory settings.'
);
return true;
}
const memorySize = parseInt(memorySetting, 10);
if (isNaN(memorySize)) {
logger.debug(
`Could not parse the value of the environment variable AWS_LAMBDA_FUNCTION_MEMORY_SIZE: "${memorySetting}", ` +
'cannot determine memory settings, not using the Lambda extension.'
);
return false;
}
if (memorySize < 256) {
let logFn = logger.debug;
// CASE: We try to determine if the customer has the extension installed. We need to put a warning
// because the extension is **not** working and might block the lambda extension when
// its not used correctly e.g. slow startup of extension or waiting for invokes or incoming spans
// from the tracer.
if (process.env._HANDLER?.includes('instana-aws-lambda-auto-wrap')) {
logFn = logger.warn;
}
logFn(
'The Lambda function is configured with less than 256 MB of memory according to the value of ' +
`AWS_LAMBDA_FUNCTION_MEMORY_SIZE: ${memorySetting}. The Lambda extension does ` +
'not work with 256mb reliably with low memory settings. ' +
'As the extension is already running, it might ' +
'block the lambda execution which can result in larger execution times. Please configure at least ' +
'256 MB of memory for your Lambda function.'
);
return false;
}
return true;
}
}
/**
* A wrapper for post handler for promise based Lambdas (including async style Lambdas), to be executed after the
* promise returned by the original handler has completed.
*/
function postPromise(entrySpan, error, value) {
return new Promise((resolve, reject) => {
postHandler(entrySpan, error, value, () => {
if (error) {
reject(error);
} else {
resolve(value);
}
});
});
}
function sendToBackend({ spans, metricsPayload, finalLambdaRequest, callback }) {
const runBackendConnector = () => {
return backendConnector.sendBundle({ spans, metrics: metricsPayload }, finalLambdaRequest, callback);
};
// CASE: Customer uses process.env.INSTANA_AGENT_KEY
if (!ssm.isUsed()) {
return runBackendConnector();
}
return ssm.waitAndGetInstanaKey((err, value) => {
if (err) {
logger.debug(err);
return callback();
}
environment.setInstanaAgentKey(value);
return runBackendConnector();
});
}
/**
* When the original handler has completed, the postHandler will finish the entry span that represents the Lambda
* invocation and makes sure the final batch of data (including the Lambda entry span) is sent to the back end before
* letting the Lambda finish (that is, before letting the AWS Lambda runtime process the next invocation or freeze the
* current process).
*/
function postHandler(entrySpan, error, result, postHandlerDone) {
// entrySpan is null when tracing is suppressed due to X-Instana-L
if (entrySpan) {
if (entrySpan.transmitted) {
// The only possible reason for the entry span to already have been transmitted is when the timeout detection
// kicked in and finished the entry span prematurely. If that happened, we also have already triggered sending
// spans to the back end. We do not need to keep the Lambda waiting for another transmission, so we immediately
// let it finish.
postHandlerDone();
return;
}
if (error) {
entrySpan.ec = 1;
if (error.message) {
if (typeof error.message === 'string') {
entrySpan.data.lambda.error = error.message;
} else {
entrySpan.data.lambda.error = JSON.stringify(error.message);
}
} else {
entrySpan.data.lambda.error = error.toString();
}
}
processResult(result, entrySpan);
entrySpan.d = Date.now() - entrySpan.ts;
entrySpan.transmit();
}
const spans = spanBuffer.getAndResetSpans();
// We want that all upcoming spans are send immediately to the BE.
// Span collection happens all the time, but for AWS Lambda sending spans early via spanBuffer
// is disabled because we cannot use `setTimeout` on AWS Lambda.
// When the Lambda handler finishes we send all spans via `sendBundle`.
// If there is any span collected afterwards (async operations), we send them out
// directly and that's why we set `setTransmitImmediate` to true.
// We need to rework the default behavior via https://jsw.ibm.com/browse/INSTA-13498
spanBuffer.setTransmitImmediate(true);
const metricsData = metrics.gatherData();
const metricsPayload = {
plugins: [{ name: 'com.instana.plugin.aws.lambda', entityId: identityProvider.getEntityId(), data: metricsData }]
};
sendToBackend({
spans,
metricsPayload,
finalLambdaRequest: true,
callback: () => {
// We don't process or care if there is an error returned from the backend connector right now.
postHandlerDone();
}
});
}
/**
* When the timeout heuristic detects an imminent timeout, we finish the entry span prematurely and send it to the
* back end.
*/
function postHandlerForTimeout(entrySpan, remainingMillis) {
/**
* context.getRemainingTimeInMillis(context) can return negative values
* That just means that the lambda was already closed.
* `setTimeout` is not 100% reliable
*/
if (remainingMillis < 200) {
logger.debug('Skipping heuristical timeout detection because lambda timeout exceeded already.');
return;
}
if (entrySpan) {
// CASE: Timeout not needed, we already send the data to the backend successfully
if (entrySpan.transmitted) {
logger.debug('Skipping heuristical timeout detection because BE data was sent already.');
return;
}
entrySpan.ec = 1;
entrySpan.data.lambda.msleft = remainingMillis;
entrySpan.data.lambda.error = `Possible Lambda timeout with only ${remainingMillis} ms left.`;
entrySpan.d = Date.now() - entrySpan.ts;
entrySpan.transmit();
}
logger.debug(`Heuristical timeout detection was triggered with ${remainingMillis} milliseconds left.`);
// deliberately not gathering metrics but only sending spans.
const spans = spanBuffer.getAndResetSpans();
sendToBackend({
spans,
metricsPayload: {},
finalLambdaRequest: true,
callback: () => {}
});
}
exports.currentSpan = function getHandleForCurrentSpan() {
return tracing.getHandleForCurrentSpan();
};
exports.sdk = tracing.sdk;
exports.setLogger = function setLogger(_logger) {
serverlessLogger.init({ logger: _logger });
};
exports.opentracing = tracing.opentracing;