autotel
Version:
Write Once, Observe Anywhere
320 lines (290 loc) • 9.73 kB
text/typescript
import { NodeSDK } from '@opentelemetry/sdk-node';
import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-http';
import { OTLPLogExporter } from '@opentelemetry/exporter-logs-otlp-http';
import { BatchLogRecordProcessor } from '@opentelemetry/sdk-logs';
import { BatchSpanProcessor } from '@opentelemetry/sdk-trace-base';
import { TailSamplingSpanProcessor } from './tail-sampling-processor';
import { getLogger } from './init';
import {
resourceFromAttributes,
detectResources,
processDetector,
hostDetector,
type Resource,
type ResourceDetector,
} from '@opentelemetry/resources';
import {
ATTR_SERVICE_NAME,
ATTR_SERVICE_VERSION,
} from '@opentelemetry/semantic-conventions/incubating';
import { OTLPMetricExporter } from '@opentelemetry/exporter-metrics-otlp-http';
import { PeriodicExportingMetricReader } from '@opentelemetry/sdk-metrics';
import { requireModule } from './node-require';
/**
* Parse OTLP headers string into object format
* @param headersString - Headers as "key1=value1,key2=value2" or "Authorization=Basic ..."
* @returns Headers object for OTLP exporters
*/
function parseOtlpHeaders(headersString?: string): Record<string, string> {
if (!headersString) return {};
const headers: Record<string, string> = {};
const pairs = headersString.split(',');
for (const pair of pairs) {
const [key, ...valueParts] = pair.split('=');
if (key && valueParts.length > 0) {
headers[key.trim()] = valueParts.join('=').trim();
}
}
return headers;
}
/**
* Parse resource attributes string into object format
* @param attributesString - Attributes as "key1=value1,key2=value2"
* @returns Resource attributes object
*/
function parseResourceAttributes(
attributesString?: string,
): Record<string, string> {
if (!attributesString) return {};
const attributes: Record<string, string> = {};
const pairs = attributesString.split(',');
for (const pair of pairs) {
const [key, ...valueParts] = pair.split('=');
if (key && valueParts.length > 0) {
attributes[key.trim()] = valueParts.join('=').trim();
}
}
return attributes;
}
export interface InstrumentationConfig {
serviceName: string;
serviceVersion?: string;
deploymentEnvironment?: string;
otlpEndpoint?: string;
/** Headers for authentication (e.g., Grafana Cloud, Honeycomb) */
headers?: string;
/** Resource attributes as comma-separated key=value pairs */
resourceAttributes?: string;
/** Enable async resource detection for process/host info (default: false) */
detectResources?: boolean;
/**
* Use selective instrumentation instead of full auto-instrumentation
* **Default: true** (performance-first)
*
* When true, auto-instrumentation is disabled. You can manually add
* specific instrumentations via the `instrumentations` field.
* This reduces overhead from ~81% to near-zero based on Platformatic benchmarks.
*
* Set to false to enable full auto-instrumentation (not recommended for production).
*
* @see https://blogger.platformatic.dev/the-hidden-cost-of-context
*/
selectiveInstrumentation?: boolean;
/**
* Custom instrumentations to use (only when selectiveInstrumentation is true)
* @example
* ```typescript
* import { HttpInstrumentation } from '@opentelemetry/instrumentation-http'
*
* initInstrumentation({
* serviceName: 'api',
* selectiveInstrumentation: true,
* instrumentations: [new HttpInstrumentation()]
* })
* ```
*/
// eslint-disable-next-line @typescript-eslint/no-explicit-any
instrumentations?: any[];
}
/**
* Initialize OpenTelemetry instrumentation with OTLP exporters
*
* This sets up:
* - Traces (OTLP HTTP)
* - Metrics (OTLP HTTP)
* - Logs (OTLP HTTP)
* - Auto-instrumentation for common Node.js libraries
*
* @example
* // Call this at the very start of your application
* import { initInstrumentation } from '@your-org/otel-decorators'
*
* initInstrumentation({
* serviceName: 'my-service' }
* serviceVersion: '1.0.0',
* deploymentEnvironment: 'production',
* otlpEndpoint: 'http://localhost:4318'
* })
*
* // Or with async resource detection (top-level await required)
* await initInstrumentation({
* serviceName: 'my-service' }
* detectResources: true
* })
*/
// Enables graceful shutdown and prevents SDK leaks on hot-reload
let currentSDK: NodeSDK | null = null;
let shutdownHandlerRegistered = false;
/**
* Shutdown the OpenTelemetry SDK gracefully
* Call this before process exit or during hot-reloads
*/
export async function shutdownInstrumentation(sdk?: NodeSDK): Promise<void> {
const sdkToShutdown = sdk || currentSDK;
if (!sdkToShutdown) {
getLogger().warn({}, 'No SDK to shutdown');
return;
}
try {
await sdkToShutdown.shutdown();
getLogger().info({}, 'OpenTelemetry terminated successfully');
if (sdkToShutdown === currentSDK) {
currentSDK = null;
}
} catch (error) {
getLogger().error(
{
err: error instanceof Error ? error : undefined,
},
'Error terminating OpenTelemetry',
);
throw error;
}
}
export async function initInstrumentation(
config: InstrumentationConfig,
): Promise<NodeSDK> {
// Prevents resource leaks on hot-reload or multiple init calls
if (currentSDK) {
getLogger().info(
{},
'Shutting down existing OpenTelemetry SDK before reinitializing...',
);
await shutdownInstrumentation(currentSDK);
}
// Parse headers and resource attributes
const otlpHeaders = parseOtlpHeaders(config.headers);
const customResourceAttributes = parseResourceAttributes(
config.resourceAttributes,
);
let resource: Resource;
// Dynamically load optional resource detectors
const detectors: ResourceDetector[] = [processDetector, hostDetector];
try {
const awsDetectors = await import('@opentelemetry/resource-detector-aws');
detectors.push(
awsDetectors.awsEc2Detector,
awsDetectors.awsEcsDetector,
awsDetectors.awsEksDetector,
);
} catch {
// ignore
}
try {
const gcpDetectors = await import('@opentelemetry/resource-detector-gcp');
detectors.push(gcpDetectors.gcpDetector);
} catch {
// ignore
}
try {
const containerDetectors =
await import('@opentelemetry/resource-detector-container');
detectors.push(containerDetectors.containerDetector);
} catch {
// ignore
}
if (config.detectResources) {
const detectedResource = await detectResources({
detectors,
});
resource = detectedResource.merge(
resourceFromAttributes({
[ATTR_SERVICE_NAME]: config.serviceName,
[ATTR_SERVICE_VERSION]: config.serviceVersion || '1.0.0',
'deployment.environment': config.deploymentEnvironment || 'development',
...customResourceAttributes, // Merge custom resource attributes
}),
);
} else {
resource = resourceFromAttributes({
[ATTR_SERVICE_NAME]: config.serviceName,
[ATTR_SERVICE_VERSION]: config.serviceVersion || '1.0.0',
'deployment.environment': config.deploymentEnvironment || 'development',
...customResourceAttributes, // Merge custom resource attributes
});
}
// Default to selective (near-zero overhead) vs full auto (~81% overhead)
// Lazy-load to avoid importing ~40+ packages at module evaluation time
// eslint-disable-next-line @typescript-eslint/no-explicit-any
let instrumentations: any[] = config.instrumentations || [];
if (config.selectiveInstrumentation === false) {
const mod = requireModule<{
getNodeAutoInstrumentations: () => unknown[];
}>('@opentelemetry/auto-instrumentations-node');
instrumentations = [mod.getNodeAutoInstrumentations()];
}
const traceExporter = new OTLPTraceExporter({
url: `${config.otlpEndpoint || 'http://localhost:4318'}/v1/traces`,
headers: otlpHeaders,
});
// Enables tail sampling via autotel.sampling.tail.keep attribute
const spanProcessor = new TailSamplingSpanProcessor(
new BatchSpanProcessor(traceExporter),
);
const sdk = new NodeSDK({
resource,
spanProcessor, // Use our wrapped processor instead of traceExporter directly
metricReader: new PeriodicExportingMetricReader({
exporter: new OTLPMetricExporter({
url: `${config.otlpEndpoint || 'http://localhost:4318'}/v1/metrics`,
headers: otlpHeaders,
}),
}),
logRecordProcessors: [
new BatchLogRecordProcessor(
new OTLPLogExporter({
url: `${config.otlpEndpoint || 'http://localhost:4318'}/v1/logs`,
headers: otlpHeaders,
}),
),
],
instrumentations,
});
try {
await sdk.start();
getLogger().info({}, 'OpenTelemetry instrumentation started successfully');
} catch (error) {
getLogger().error(
{
err: error instanceof Error ? error : undefined,
},
'Failed to start OpenTelemetry SDK',
);
throw error;
}
// Track current SDK for shutdown handler
currentSDK = sdk;
if (!shutdownHandlerRegistered) {
shutdownHandlerRegistered = true;
const shutdownHandler = () => {
shutdownInstrumentation()
.then(() => {
// eslint-disable-next-line unicorn/no-process-exit
process.exit(0);
})
.catch((error) => {
getLogger().error(
{
err: error instanceof Error ? error : undefined,
},
'Shutdown error',
);
// eslint-disable-next-line unicorn/no-process-exit
process.exit(1);
});
};
process.on('SIGTERM', shutdownHandler);
process.on('SIGINT', shutdownHandler);
}
return sdk;
}