UNPKG

@zendesk/retrace

Version:

define and capture Product Operation Traces along with computed metrics with an optional friendly React beacon API

711 lines (643 loc) 23.2 kB
/* eslint-disable no-continue */ import { INHERIT_FROM_PARENT } from './constants' import { ensureMatcherFn, ensureMatcherFnOrSpecialToken, } from './ensureMatcherFn' import { findMatchingSpan } from './matchSpan' import type { SpanAndAnnotation } from './spanAnnotationTypes' import { type ActiveTraceInput, type DraftTraceInput, type ErrorLike, PARENT_SPAN, } from './spanTypes' import type { FinalTransition } from './Trace' import type { RecordedSpanAndAnnotation, TraceRecording, } from './traceRecordingTypes' import type { PromoteSpanAttributesDefinition, RelationSchemasBase, SpecialEndToken, SpecialStartToken, TraceContext, } from './types' /** * ### Deriving SLIs and other metrics from a trace * * ℹ️ It is our recommendation that the primary way of creating duration metrics would be to derive them from data in the trace. * * Instead of the traditional approach of capturing isolated metrics imperatively in the code, * the **trace** model allows us the flexibility to define and compute any number of metrics from the **trace recording**. * * We can distinguish the following types of metrics: * * 1. **Duration of a Computed Span** — the time between any two **spans** that appeared in the **trace**. For example: * 1. _time between the user’s click on a ticket_ and _everything in the ticket page has fully rendered with content_ (duration of the entire operation) * 2. _time between the user’s click on a ticket_ and _the moment the first piece of the ticket UI was displayed_ (duration of a segment of the operation) * * 2. **Computed Values** — any numerical value derived from the **spans** or their attributes. For example: * 1. _The total number of times the log component re-rendered while loading the ticket_ * 2. _The total number of requests made while loading the ticket_ * 3. _The total number of iframe apps were initialized while loading the ticket_ */ export function getComputedValues< SelectedRelationNameT extends keyof RelationSchemasT, RelationSchemasT extends RelationSchemasBase<RelationSchemasT>, const VariantsT extends string, >( context: TraceContext<SelectedRelationNameT, RelationSchemasT, VariantsT>, ): TraceRecording<SelectedRelationNameT, RelationSchemasT>['computedValues'] { const computedValues: TraceRecording< SelectedRelationNameT, RelationSchemasT >['computedValues'] = {} for (const [name, computedValueDefinition] of Object.entries( context.definition.computedValueDefinitions, )) { const { matches, computeValueFromMatches } = computedValueDefinition // Initialize arrays to hold matches for each matcher const matchingEntriesByMatcher: SpanAndAnnotation<RelationSchemasT>[][] = Array.from({ length: matches.length }, () => []) // Single pass through recordedItems for (const item of context.recordedItems.values()) { // TODO: refactor findMatchingSpan to be a generator function // that returns multiple matches and use it here matches.forEach((doesSpanMatch, index) => { if (!item.annotation.isGhost && doesSpanMatch(item, context)) { matchingEntriesByMatcher[index]!.push(item) } }) } const value = computeValueFromMatches(...matchingEntriesByMatcher) if (value !== undefined) { computedValues[name] = value } } return computedValues } export function getComputedSpans< SelectedRelationNameT extends keyof RelationSchemasT, RelationSchemasT extends RelationSchemasBase<RelationSchemasT>, const VariantsT extends string, >( context: TraceContext<SelectedRelationNameT, RelationSchemasT, VariantsT>, finalState?: { completeSpanAndAnnotation?: SpanAndAnnotation<RelationSchemasT> cpuIdleSpanAndAnnotation?: SpanAndAnnotation<RelationSchemasT> }, ): TraceRecording<SelectedRelationNameT, RelationSchemasT>['computedSpans'] { const computedSpans: TraceRecording< SelectedRelationNameT, RelationSchemasT >['computedSpans'] = {} const recordedItemsArray = [...context.recordedItems.values()].filter( (item) => !item.annotation.isGhost, ) for (const [name, computedSpanDefinition] of Object.entries( context.definition.computedSpanDefinitions, )) { // Create matchers from the span definitions const startSpanMatcher = ensureMatcherFnOrSpecialToken< SelectedRelationNameT, RelationSchemasT, VariantsT, SpecialStartToken >(computedSpanDefinition.startSpan) const endSpanMatcher = ensureMatcherFnOrSpecialToken< SelectedRelationNameT, RelationSchemasT, VariantsT, SpecialEndToken >(computedSpanDefinition.endSpan) // Find matching start entry let matchingStartEntry: | SpanAndAnnotation<RelationSchemasT> | 'operation-start' | undefined = startSpanMatcher === 'operation-start' ? 'operation-start' : undefined if (typeof startSpanMatcher === 'function') { matchingStartEntry = findMatchingSpan( startSpanMatcher, recordedItemsArray, context, ) } // Find matching end entry let matchingEndEntry: SpanAndAnnotation<RelationSchemasT> | undefined if (typeof endSpanMatcher === 'function') { matchingEndEntry = findMatchingSpan( endSpanMatcher, recordedItemsArray, context, ) } else if (endSpanMatcher === 'operation-end') { matchingEndEntry = finalState?.completeSpanAndAnnotation } else if (endSpanMatcher === 'interactive') { matchingEndEntry = finalState?.cpuIdleSpanAndAnnotation } // Calculate timing values const matchingStartTime = matchingStartEntry === 'operation-start' ? context.input.startTime.now : matchingStartEntry?.span.startTime.now const matchingEndTime = matchingEndEntry ? matchingEndEntry.span.startTime.now + matchingEndEntry.span.duration : undefined // Create computed span if both start and end times are found if ( typeof matchingStartTime === 'number' && typeof matchingEndTime === 'number' ) { computedSpans[name] = { duration: matchingEndTime - matchingStartTime, startOffset: matchingStartTime - context.input.startTime.now, // DECISION: After considering which events happen first and which one is defined as the start // the start offset is always going to be anchored to the start span. // cases: // -----S------E (computed val is positive) // -----E------S (computed val is negative) // this way the `endOffset` can be derived as follows: // endOffset = computedSpan.startOffset + computedSpan.duration } } } return computedSpans } function getComputedRenderBeaconSpans< SelectedRelationNameT extends keyof RelationSchemasT, RelationSchemasT extends RelationSchemasBase<RelationSchemasT>, const VariantsT extends string, >( recordedItems: readonly SpanAndAnnotation<RelationSchemasT>[], input: ActiveTraceInput<RelationSchemasT[SelectedRelationNameT], VariantsT>, ): TraceRecording< SelectedRelationNameT, RelationSchemasT >['computedRenderBeaconSpans'] { const renderSpansByBeacon = new Map< string, { firstStart: number firstContentfulRenderEnd: number | undefined firstLoadingEnd: number | undefined firstContentStart: number | undefined renderCount: number sumOfDurations: number attributes: Record<string, unknown> } >() const relatedToKey = Object.keys(input.relatedTo) // Group render spans by beacon and compute firstStart and lastEnd for (const entry of recordedItems) { if ( entry.annotation.isGhost || (entry.span.type !== 'component-render' && entry.span.type !== 'component-render-start') ) { continue } const { name, startTime, duration, relatedTo: r, renderedOutput, } = entry.span const relatedTo = r as Record<string, unknown> | undefined const inputRelatedTo: Record<string, unknown> = input.relatedTo const relationMatch = relatedToKey.every( (key) => relatedTo?.[key] === undefined || inputRelatedTo[key] === relatedTo[key], ) if (!relationMatch) continue const start = startTime.now const contentfulRenderEnd = entry.span.type === 'component-render' && renderedOutput === 'content' ? start + duration : undefined const spanTimes = renderSpansByBeacon.get(name) if (!spanTimes) { renderSpansByBeacon.set(name, { firstStart: start, firstContentfulRenderEnd: contentfulRenderEnd, renderCount: entry.span.type === 'component-render' ? 1 : 0, sumOfDurations: duration, firstContentStart: renderedOutput === 'content' ? start : undefined, firstLoadingEnd: entry.span.type === 'component-render' && renderedOutput === 'loading' ? start + duration : undefined, attributes: entry.span.attributes ?? {}, }) } else { // merge attributes: spanTimes.attributes = { ...spanTimes.attributes, ...entry.span.attributes, } spanTimes.firstStart = Math.min(spanTimes.firstStart, start) spanTimes.firstContentfulRenderEnd = contentfulRenderEnd && spanTimes.firstContentfulRenderEnd ? Math.min(spanTimes.firstContentfulRenderEnd, contentfulRenderEnd) : contentfulRenderEnd ?? spanTimes.firstContentfulRenderEnd if (entry.span.type === 'component-render') { spanTimes.renderCount += 1 spanTimes.sumOfDurations += duration } if ( spanTimes.firstContentStart === undefined && renderedOutput === 'content' ) { spanTimes.firstContentStart = start } if ( spanTimes.firstLoadingEnd === undefined && entry.span.type === 'component-render' && renderedOutput === 'loading' ) { spanTimes.firstLoadingEnd = start + duration } } } const computedRenderBeaconSpans: TraceRecording< SelectedRelationNameT, RelationSchemasT >['computedRenderBeaconSpans'] = {} // Calculate duration and startOffset for each beacon for (const [beaconName, renderSummary] of renderSpansByBeacon) { if (!renderSummary.firstContentfulRenderEnd) continue computedRenderBeaconSpans[beaconName] = { startOffset: renderSummary.firstStart - input.startTime.now, firstRenderTillContent: renderSummary.firstContentfulRenderEnd - renderSummary.firstStart, firstRenderTillLoading: renderSummary.firstLoadingEnd ? renderSummary.firstLoadingEnd - renderSummary.firstStart : 0, firstRenderTillData: renderSummary.firstContentStart ? renderSummary.firstContentStart - renderSummary.firstStart : 0, renderCount: renderSummary.renderCount, sumOfRenderDurations: renderSummary.sumOfDurations, // TODO: potentially expose attributes; though this might duplicate the span attributes // ...(Object.keys(renderSummary.attributes).length > 0 // ? { // attributes: renderSummary.attributes, // } // : {}), } } return computedRenderBeaconSpans } /** * Find and promote span attributes to trace attributes per promoteSpanAttributes definition. */ function promoteSpanAttributesForTrace< SelectedRelationNameT extends keyof RelationSchemasT, RelationSchemasT extends RelationSchemasBase<RelationSchemasT>, VariantsT extends string, >( definition: { promoteSpanAttributes?: PromoteSpanAttributesDefinition< SelectedRelationNameT, RelationSchemasT, VariantsT >[] }, recordedItemsArray: SpanAndAnnotation<RelationSchemasT>[], context: TraceContext<SelectedRelationNameT, RelationSchemasT, VariantsT>, ): Record<string, unknown> { if (!definition.promoteSpanAttributes) return {} const promoted: Record<string, unknown> = {} for (const rule of definition.promoteSpanAttributes) { const matcher = ensureMatcherFn< SelectedRelationNameT, RelationSchemasT, VariantsT >(rule.span) if (matcher.nthMatch === undefined) { // if no specific index is provided, we accumulate attributes from all matches // last one wins for (const spanAnn of recordedItemsArray) { if (matcher(spanAnn, context)) { const attrs = spanAnn.span.attributes if (attrs) { for (const key of rule.attributes) { if (key in attrs) promoted[key] = attrs[key] } } } } } else { const matchingSpan = findMatchingSpan( matcher, recordedItemsArray, context, ) if (matchingSpan) { const attrs = matchingSpan.span.attributes if (attrs) { for (const key of rule.attributes) { if (key in attrs) promoted[key] = attrs[key] } } } } } return promoted } function isActiveTraceInput< SelectedRelationNameT extends keyof RelationSchemasT, RelationSchemasT, const VariantsT extends string, >( input: | DraftTraceInput<RelationSchemasT[SelectedRelationNameT], VariantsT> | ActiveTraceInput<RelationSchemasT[SelectedRelationNameT], VariantsT>, ): input is ActiveTraceInput< RelationSchemasT[SelectedRelationNameT], VariantsT > { return Boolean(input.relatedTo) } type ChildrenMap = Map<string, string[]> type SpanMap<RelationSchemasT extends RelationSchemasBase<RelationSchemasT>> = ReadonlyMap<string, SpanAndAnnotation<RelationSchemasT>> /** * @returns Map<parentId, childIds[]> */ function buildChildrenMap< const RelationSchemasT extends RelationSchemasBase<RelationSchemasT>, >(spanMap: SpanMap<RelationSchemasT>): ChildrenMap { const kids = new Map<string, string[]>() for (const { span } of spanMap.values()) { const parent = span[PARENT_SPAN] if (!parent) continue const childrenIds = kids.get(parent.id) ?? [] childrenIds.push(span.id) kids.set(parent.id, childrenIds) } return kids // O(n) time, O(n) memory } interface PropagationConfig< RelationSchemasT extends RelationSchemasBase<RelationSchemasT>, > { /** attribute keys that flow *downward* unless child overrides */ heritableSpanAttributes?: readonly string[] /** stops errors bubbling *upward* if true */ shouldSuppressErrorStatusPropagation: ( spanAndAnnotation: SpanAndAnnotation<RelationSchemasT>, ) => boolean } export function propagateStatusAndAttributes< const RelationSchemasT extends RelationSchemasBase<RelationSchemasT>, >( idToSpanAndAnnotationMap: SpanMap<RelationSchemasT>, children: ChildrenMap, cfg: PropagationConfig<RelationSchemasT>, ): void { // 1. build parent-before-child topological order const roots: string[] = [] for (const { span } of idToSpanAndAnnotationMap.values()) { if ( !span[PARENT_SPAN] || !idToSpanAndAnnotationMap.has(span[PARENT_SPAN].id) ) roots.push(span.id) } const topo: string[] = [] // DFS stack build const stack: string[] = [...roots] while (stack.length > 0) { const id = stack.pop()! topo.push(id) const kids = children.get(id) if (kids) { for (const cid of kids) stack.push(cid) } } // note: this currently happens in span post-processing: if (cfg.heritableSpanAttributes) { // 2. push selected attributes downward (pre-order) const inherited = new Map<string, Record<string, unknown>>() // id → merged bag for (const id of topo) { const node = idToSpanAndAnnotationMap.get(id) if (!node) continue const parentHeritableAttributes = node.span[PARENT_SPAN] ? inherited.get(node.span[PARENT_SPAN].id) : undefined if (!parentHeritableAttributes && !node.span.attributes) { // no parent and no attributes, nothing to inherit continue } const heritableAttributes: Record<string, unknown> = {} for (const key of cfg.heritableSpanAttributes) { // child attribute wins over parent if defined, // unless it is literally the INHERIT_FROM_PARENT placeholder const childValue = node.span.attributes?.[key] const parentValue = parentHeritableAttributes?.[key] const value = childValue === INHERIT_FROM_PARENT ? parentValue : childValue ?? parentValue if (value !== undefined) { heritableAttributes[key] = value } } inherited.set(id, heritableAttributes) if (Object.keys(heritableAttributes).length > 0) { node.span.attributes = { ...heritableAttributes, ...node.span.attributes, } } } } // 3. bubble errors upward (post-order) for (let i = topo.length - 1; i >= 0; --i) { const id = topo[i]! const node = idToSpanAndAnnotationMap.get(id)! if (cfg.shouldSuppressErrorStatusPropagation(node)) { // skip this node, it should not propagate (bubble up) errors continue } const ownError = node.span.error ?? node.span.status === 'error' if (ownError) { continue } let childError: boolean | ErrorLike = false const kids = children.get(id) if (kids) { for (const childId of kids) { const child = idToSpanAndAnnotationMap.get(childId)! childError = child.span.error ?? child.span.status === 'error' if (childError) { break } } } if (childError) { node.span.status = 'error' if (!node.span.error && typeof childError === 'object') node.span.error = childError } } } export function createTraceRecording< const SelectedRelationNameT extends keyof RelationSchemasT, const RelationSchemasT extends RelationSchemasBase<RelationSchemasT>, const VariantsT extends string, >( context: TraceContext<SelectedRelationNameT, RelationSchemasT, VariantsT>, transition: FinalTransition<RelationSchemasT>, ): TraceRecording<SelectedRelationNameT, RelationSchemasT> { const { definition, recordedItems, input } = context const { id, relatedTo, variant, parentTraceId } = input const { name } = definition const { transitionToState, interruption, cpuIdleSpanAndAnnotation, completeSpanAndAnnotation, lastRequiredSpanAndAnnotation, lastRelevantSpanAndAnnotation, } = { cpuIdleSpanAndAnnotation: undefined, completeSpanAndAnnotation: undefined, lastRequiredSpanAndAnnotation: undefined, ...transition, } const endOfOperationSpan = (transitionToState === 'complete' && (cpuIdleSpanAndAnnotation ?? completeSpanAndAnnotation)) || lastRelevantSpanAndAnnotation const childrenMap = buildChildrenMap(recordedItems) const shouldSuppressErrorStatusPropagation = ( spanAndAnnotation: SpanAndAnnotation<RelationSchemasT>, ) => definition.suppressErrorStatusPropagationOnSpans?.some((doesSpanMatch) => doesSpanMatch(spanAndAnnotation, context), ) ?? false // errors should bubble up to the parent (unless suppressed) propagateStatusAndAttributes(recordedItems, childrenMap, { // selected attributes (like `team`) should propagate to every child (unless set by the child) // however this currently happens in span post-processing, not here: // heritableSpanAttributes: definition.heritableSpanAttributes, shouldSuppressErrorStatusPropagation, }) const recordedItemsArray: SpanAndAnnotation<RelationSchemasT>[] = [] for (const item of recordedItems.values()) { if (endOfOperationSpan) { if ( // and spans captured until the endOfOperationSpan, // or if not available, the lastRelevantSpan item.annotation.operationRelativeEndTime <= endOfOperationSpan.annotation.operationRelativeEndTime ) { recordedItemsArray.push(item) } } else { recordedItemsArray.push(item) } } // CODE CLEAN UP TODO: let's get this information (wasInterrupted) from up top (in FinalState) const isIncompleteTrace = transitionToState === 'interrupted' const computedSpans = !isIncompleteTrace ? getComputedSpans(context, { completeSpanAndAnnotation, cpuIdleSpanAndAnnotation, }) : {} const computedValues = !isIncompleteTrace ? getComputedValues(context) : {} const computedRenderBeaconSpans = !isIncompleteTrace && isActiveTraceInput(input) ? getComputedRenderBeaconSpans(recordedItemsArray, input) : {} let markTraceAsErrored = false let error: ErrorLike | undefined for (const spanAndAnnotation of recordedItemsArray) { if ( !spanAndAnnotation.annotation.isGhost && spanAndAnnotation.span.status === 'error' && !definition.suppressErrorStatusPropagationOnSpans?.some((doesSpanMatch) => doesSpanMatch(spanAndAnnotation, context), ) ) { markTraceAsErrored = true // eslint-disable-next-line prefer-destructuring error = spanAndAnnotation.span.error // first error found will be used, don't iterate further break } } // promote span attributes to trace attributes per configuration const promotedAttributes = promoteSpanAttributesForTrace( definition, recordedItemsArray, context, ) const traceAttributes = { ...promotedAttributes, ...input.attributes } const duration = completeSpanAndAnnotation?.annotation.operationRelativeEndTime ?? null const startTillInteractive = cpuIdleSpanAndAnnotation?.annotation.operationRelativeEndTime ?? null const startTillRequirementsMet = lastRequiredSpanAndAnnotation?.annotation.operationRelativeEndTime ?? null const filteredRecordedItemsArray = recordedItemsArray.flatMap< RecordedSpanAndAnnotation<RelationSchemasT> >( // remove getParentSpan function ({ span, ...rest }) => { // exclude internalUse spans from the final trace recording // unless they are errored if (!span.internalUse || span.status === 'error') { return [ { ...rest, span: { // remove any internal properties (symbols) ...span, // bake-in parentSpanId parentSpanId: span[PARENT_SPAN]?.id, }, }, ] } return [] }, ) return { id, parentTraceId, name, startTime: input.startTime, relatedTo, type: 'operation', duration, variant, additionalDurations: { startTillRequirementsMet, startTillInteractive, // last entry until the tti? completeTillInteractive: startTillInteractive && duration ? startTillInteractive - duration : null, }, // ?: If we have any error entries then should we mark the status as 'error' status: isIncompleteTrace ? 'interrupted' : markTraceAsErrored ? 'error' : 'ok', error, computedSpans, computedRenderBeaconSpans, computedValues, attributes: traceAttributes, interruption, entries: filteredRecordedItemsArray, } }