@posthog/ai

Version:

PostHog Node.js AI integrations

1 lines • 78.3 kB

Source Map (JSON)

{"version":3,"file":"index.mjs","sources":["../../src/sanitization/media_type_context.ts","../../src/utils.ts","../../src/openai-agents/processor.ts","../../src/openai-agents/index.ts"],"sourcesContent":["const MIME_HINT_KEYS = ['mediaType', 'media_type', 'mimeType', 'mime_type'] as const\n\nconst STRONG_CONTEXT_KEYS = new Set([\n 'data',\n 'file_data',\n 'fileData',\n 'image_url',\n 'imageUrl',\n 'video_url',\n 'videoUrl',\n 'audio',\n 'audio_data',\n 'audioData',\n 'inline_data',\n 'inlineData',\n 'source',\n 'result',\n])\n\nconst STRONG_CONTEXT_TYPES = new Set([\n 'image',\n 'image_url',\n 'input_image',\n 'audio',\n 'input_audio',\n 'video',\n 'video_url',\n 'file',\n 'input_file',\n 'document',\n 'media',\n 'file-data',\n])\n\nconst FILE_FAMILY_TYPES = new Set(['file', 'input_file', 'document', 'media', 'file-data'])\n\nconst KNOWN_AUDIO_FORMATS = new Set(['wav', 'mp3', 'ogg', 'flac', 'm4a', 'aac', 'webm'])\n\nexport class MediaTypeContext {\n static readonly EMPTY = new MediaTypeContext(undefined, undefined)\n\n constructor(\n private readonly parent: Record<string, unknown> | undefined,\n private readonly key: string | undefined\n ) {}\n\n inferMediaType(): string | undefined {\n return (\n this.inferFromSiblingMime() ?? this.inferFromSiblingFormat() ?? this.inferFromParentType() ?? this.inferFromKey()\n )\n }\n\n inferFromSiblingMime(): string | undefined {\n if (!this.parent) return undefined\n for (const hint of MIME_HINT_KEYS) {\n const v = this.parent[hint]\n if (typeof v === 'string') return v\n }\n return undefined\n }\n\n inferFromSiblingFormat(): string | undefined {\n if (!this.parent) return undefined\n const fmt = this.parent.format\n if (typeof fmt === 'string' && KNOWN_AUDIO_FORMATS.has(fmt.toLowerCase())) {\n return `audio/${fmt.toLowerCase()}`\n }\n return undefined\n }\n\n inferFromParentType(): string | undefined {\n if (!this.parent) return undefined\n const t = this.parent.type\n if (typeof t !== 'string') return undefined\n if (t === 'image' || t === 'image_url' || t === 'input_image') return 'image'\n if (t === 'audio' || t === 'input_audio') return 'audio'\n if (t === 'video' || t === 'video_url') return 'video'\n if (FILE_FAMILY_TYPES.has(t)) return 'application/octet-stream'\n return undefined\n }\n\n inferFromKey(): string | undefined {\n if (!this.key) return undefined\n const key = this.key.toLowerCase()\n if (key.includes('audio')) return 'audio'\n if (key.includes('video')) return 'video'\n if (key.includes('image')) return 'image'\n if (key.includes('file') || key.includes('document')) return 'application/octet-stream'\n return undefined\n }\n\n signalsBinary(): boolean {\n if (this.parent) {\n for (const hint of MIME_HINT_KEYS) {\n if (typeof this.parent[hint] === 'string') return true\n }\n const fmt = this.parent.format\n if (typeof fmt === 'string' && KNOWN_AUDIO_FORMATS.has(fmt.toLowerCase())) return true\n const t = this.parent.type\n if (typeof t === 'string' && STRONG_CONTEXT_TYPES.has(t)) return true\n }\n if (this.key && STRONG_CONTEXT_KEYS.has(this.key)) return true\n return false\n }\n}\n","import { PostHog } from 'posthog-node'\nimport OpenAIOrignal from 'openai'\nimport AnthropicOriginal from '@anthropic-ai/sdk'\nimport type { ChatCompletionTool } from 'openai/resources/chat/completions'\nimport type { ResponseCreateParamsWithTools } from 'openai/lib/ResponsesParser'\nimport type { Tool as GeminiTool } from '@google/genai'\nimport type {\n FormattedMessage,\n FormattedContent,\n FormattedAudioContent,\n FormattedImageContent,\n FormattedDocumentContent,\n} from './types'\nimport { v4 as uuidv4 } from 'uuid'\nimport { isString } from './typeGuards'\nimport { redactBase64DataUrl } from './sanitization'\n\ntype ChatCompletionCreateParamsBase = OpenAIOrignal.Chat.Completions.ChatCompletionCreateParams\ntype MessageCreateParams = AnthropicOriginal.Messages.MessageCreateParams\ntype ResponseCreateParams = OpenAIOrignal.Responses.ResponseCreateParams\ntype EmbeddingCreateParams = OpenAIOrignal.EmbeddingCreateParams\ntype TranscriptionCreateParams = OpenAIOrignal.Audio.Transcriptions.TranscriptionCreateParams\ntype AnthropicTool = AnthropicOriginal.Tool\n\nconst TOKEN_PROPERTY_KEYS = new Set([\n '$ai_input_tokens',\n '$ai_output_tokens',\n '$ai_cache_read_input_tokens',\n '$ai_cache_creation_input_tokens',\n '$ai_total_tokens',\n '$ai_reasoning_tokens',\n])\n\nexport function getTokensSource(posthogProperties?: Record<string, unknown>): string {\n if (posthogProperties && Object.keys(posthogProperties).some((key) => TOKEN_PROPERTY_KEYS.has(key))) {\n return 'passthrough'\n }\n return 'sdk'\n}\n\n// limit large outputs by truncating to 200kb (approx 200k bytes)\nexport const MAX_OUTPUT_SIZE = 200000\nconst STRING_FORMAT = 'utf8'\n\n// Reused across calls to avoid per-invocation allocation; truncate() runs\n// hundreds of times for prompts with many parts.\nconst sharedTextEncoder = new TextEncoder()\nconst sharedTextDecoder = new TextDecoder(STRING_FORMAT, { fatal: false })\n\nexport const utf8ByteLength = (str: string): number => sharedTextEncoder.encode(str).byteLength\n\n/**\n * Safely converts content to a string, preserving structure for objects/arrays.\n * - If content is already a string, returns it as-is\n * - If content is an object or array, stringifies it with JSON.stringify to preserve structure\n * - Otherwise, converts to string with String()\n *\n * This prevents the \"[object Object]\" bug when objects are naively converted to strings.\n *\n * @param content - The content to convert to a string\n * @returns A string representation that preserves structure for complex types\n */\nexport function toContentString(content: unknown): string {\n if (typeof content === 'string') {\n return content\n }\n if (content !== undefined && content !== null && typeof content === 'object') {\n try {\n return JSON.stringify(content)\n } catch {\n // Fallback for circular refs, BigInt, or objects with throwing toJSON\n return String(content)\n }\n }\n return String(content)\n}\n\nexport interface MonitoringEventPropertiesWithDefaults {\n distinctId?: string\n traceId: string\n properties?: Record<string, any>\n privacyMode: boolean\n groups?: Record<string, any>\n modelOverride?: string\n providerOverride?: string\n costOverride?: CostOverride\n captureImmediate?: boolean\n}\n\nexport type MonitoringEventProperties = Partial<MonitoringEventPropertiesWithDefaults>\n\nexport type MonitoringParams = {\n [K in keyof MonitoringEventProperties as `posthog${Capitalize<string & K>}`]: MonitoringEventProperties[K]\n}\n\nexport interface CostOverride {\n inputCost: number\n outputCost: number\n}\n\nexport const getModelParams = (\n params:\n | ((\n | ChatCompletionCreateParamsBase\n | MessageCreateParams\n | ResponseCreateParams\n | ResponseCreateParamsWithTools\n | EmbeddingCreateParams\n | TranscriptionCreateParams\n ) &\n MonitoringParams)\n | null\n): Record<string, any> => {\n if (!params) {\n return {}\n }\n const modelParams: Record<string, any> = {}\n const paramKeys = [\n 'temperature',\n 'max_tokens',\n 'max_completion_tokens',\n 'top_p',\n 'frequency_penalty',\n 'presence_penalty',\n 'n',\n 'stop',\n 'stream',\n 'streaming',\n 'language',\n 'response_format',\n 'timestamp_granularities',\n ] as const\n\n for (const key of paramKeys) {\n if (key in params && (params as any)[key] !== undefined) {\n modelParams[key] = (params as any)[key]\n }\n }\n return modelParams\n}\n\n/**\n * Helper to format responses (non-streaming) for consumption\n */\nexport const formatResponse = (response: any, provider: string): FormattedMessage[] => {\n if (!response) {\n return []\n }\n if (provider === 'anthropic') {\n return formatResponseAnthropic(response)\n } else if (provider === 'openai') {\n return formatResponseOpenAI(response)\n } else if (provider === 'gemini') {\n return formatResponseGemini(response)\n }\n return []\n}\n\nexport const formatResponseAnthropic = (response: any): FormattedMessage[] => {\n const output: FormattedMessage[] = []\n const content: FormattedContent = []\n\n for (const choice of response.content ?? []) {\n if (choice?.type === 'text' && choice?.text) {\n content.push({ type: 'text', text: choice.text })\n } else if (choice?.type === 'tool_use' && choice?.name && choice?.id) {\n content.push({\n type: 'function',\n id: choice.id,\n function: {\n name: choice.name,\n arguments: choice.input || {},\n },\n })\n }\n }\n\n if (content.length > 0) {\n output.push({\n role: 'assistant',\n content,\n })\n }\n\n return output\n}\n\nexport const formatResponseOpenAI = (response: any): FormattedMessage[] => {\n const output: FormattedMessage[] = []\n\n if (response.choices) {\n for (const choice of response.choices) {\n const content: FormattedContent = []\n let role = 'assistant'\n\n if (choice.message) {\n if (choice.message.role) {\n role = choice.message.role\n }\n\n if (choice.message.content) {\n content.push({ type: 'text', text: choice.message.content })\n }\n\n if (choice.message.tool_calls) {\n for (const toolCall of choice.message.tool_calls) {\n content.push({\n type: 'function',\n id: toolCall.id,\n function: {\n name: toolCall.function.name,\n arguments: toolCall.function.arguments,\n },\n })\n }\n }\n\n // Handle audio output (gpt-4o-audio-preview)\n if (choice.message.audio) {\n content.push({\n type: 'audio',\n ...choice.message.audio,\n })\n }\n }\n\n if (content.length > 0) {\n output.push({\n role,\n content,\n })\n }\n }\n }\n\n // Handle Responses API format\n if (response.output) {\n const content: FormattedContent = []\n let role = 'assistant'\n\n for (const item of response.output) {\n if (item.type === 'message') {\n role = item.role\n\n if (item.content && Array.isArray(item.content)) {\n for (const contentItem of item.content) {\n if (contentItem.type === 'output_text' && contentItem.text) {\n content.push({ type: 'text', text: contentItem.text })\n } else if (contentItem.text) {\n content.push({ type: 'text', text: contentItem.text })\n } else if (contentItem.type === 'input_image' && contentItem.image_url) {\n content.push({\n type: 'image',\n image: contentItem.image_url,\n })\n }\n }\n } else if (item.content) {\n content.push({ type: 'text', text: String(item.content) })\n }\n } else if (item.type === 'function_call') {\n content.push({\n type: 'function',\n id: item.call_id || item.id || '',\n function: {\n name: item.name,\n arguments: item.arguments || {},\n },\n })\n }\n }\n\n if (content.length > 0) {\n output.push({\n role,\n content,\n })\n }\n }\n\n return output\n}\n\nexport const buildInlineDataBlock = (\n mimeType: string,\n data: string\n): FormattedAudioContent | FormattedImageContent | FormattedDocumentContent => {\n if (mimeType.startsWith('audio/')) {\n return { type: 'audio', mime_type: mimeType, data }\n }\n if (mimeType.startsWith('image/')) {\n return { type: 'image', inline_data: { mime_type: mimeType, data } }\n }\n return { type: 'document', inline_data: { mime_type: mimeType, data } }\n}\n\nexport const formatResponseGemini = (response: any): FormattedMessage[] => {\n const output: FormattedMessage[] = []\n\n if (response.candidates && Array.isArray(response.candidates)) {\n for (const candidate of response.candidates) {\n if (candidate.content && candidate.content.parts) {\n const content: FormattedContent = []\n\n for (const part of candidate.content.parts) {\n if (part.text) {\n content.push({ type: 'text', text: part.text })\n } else if (part.functionCall) {\n content.push({\n type: 'function',\n function: {\n name: part.functionCall.name,\n arguments: part.functionCall.args,\n },\n })\n } else if (part.inlineData) {\n // Handle inline data (images, audio, documents)\n const mimeType = part.inlineData.mimeType || part.inlineData.mime_type || 'application/octet-stream'\n let data = part.inlineData.data\n\n // Handle binary data (Uint8Array/Buffer -> base64)\n if (data instanceof Uint8Array) {\n if (typeof Buffer !== 'undefined') {\n data = Buffer.from(data).toString('base64')\n } else {\n let binary = ''\n for (let i = 0; i < data.length; i++) {\n binary += String.fromCharCode(data[i])\n }\n data = btoa(binary)\n }\n }\n\n // Sanitize base64 data for images and other large inline data\n data = redactBase64DataUrl(data)\n\n content.push(buildInlineDataBlock(mimeType, data))\n }\n }\n\n if (content.length > 0) {\n output.push({\n role: 'assistant',\n content,\n })\n }\n } else if (candidate.text) {\n output.push({\n role: 'assistant',\n content: [{ type: 'text', text: candidate.text }],\n })\n }\n }\n } else if (response.text) {\n output.push({\n role: 'assistant',\n content: [{ type: 'text', text: response.text }],\n })\n }\n\n return output\n}\n\nexport const mergeSystemPrompt = (params: MessageCreateParams & MonitoringParams, provider: string): any => {\n if (provider == 'anthropic') {\n const messages = params.messages || []\n if (!(params as any).system) {\n return messages\n }\n const systemMessage = (params as any).system\n return [{ role: 'system', content: systemMessage }, ...messages]\n }\n return params.messages\n}\n\nexport const withPrivacyMode = (client: PostHog, privacyMode: boolean, input: any): any => {\n return (client as any).privacy_mode || privacyMode ? null : input\n}\n\nfunction toSafeString(input: unknown): string {\n if (input === undefined || input === null) {\n return ''\n }\n if (typeof input === 'string') {\n return input\n }\n try {\n return JSON.stringify(input)\n } catch {\n console.warn('Failed to stringify input', input)\n return ''\n }\n}\n\nexport const truncate = (input: unknown): string => {\n const str = toSafeString(input)\n if (str === '') {\n return ''\n }\n\n // Check if we need to truncate and ensure STRING_FORMAT is respected\n const buffer = sharedTextEncoder.encode(str)\n if (buffer.length <= MAX_OUTPUT_SIZE) {\n // Ensure STRING_FORMAT is respected\n return sharedTextDecoder.decode(buffer)\n }\n\n // Truncate the buffer and ensure a valid string is returned.\n // fatal: false means we get U+FFFD at the end if truncation broke the encoding.\n const truncatedBuffer = buffer.slice(0, MAX_OUTPUT_SIZE)\n let truncatedStr = sharedTextDecoder.decode(truncatedBuffer)\n if (truncatedStr.endsWith('\\uFFFD')) {\n truncatedStr = truncatedStr.slice(0, -1)\n }\n return `${truncatedStr}... [truncated]`\n}\n\n/**\n * Calculate web search count from raw API response.\n *\n * Uses a two-tier detection strategy:\n * Priority 1 (Exact Count): Count actual web search calls when available\n * Priority 2 (Binary Detection): Return 1 if web search indicators are present, 0 otherwise\n *\n * @param result - Raw API response from any provider (OpenAI, Perplexity, OpenRouter, Gemini, etc.)\n * @returns Number of web searches performed (exact count or binary 1/0)\n */\nexport function calculateWebSearchCount(result: unknown): number {\n if (!result || typeof result !== 'object') {\n return 0\n }\n\n // Priority 1: Exact Count\n // Check for OpenAI Responses API web_search_call items\n if ('output' in result && Array.isArray(result.output)) {\n let count = 0\n\n for (const item of result.output) {\n if (typeof item === 'object' && item !== null && 'type' in item && item.type === 'web_search_call') {\n count++\n }\n }\n\n if (count > 0) {\n return count\n }\n }\n\n // Priority 2: Binary Detection (1 or 0)\n\n // Check for citations at root level (Perplexity)\n if ('citations' in result && Array.isArray(result.citations) && result.citations.length > 0) {\n return 1\n }\n\n // Check for search_results at root level (Perplexity via OpenRouter)\n if ('search_results' in result && Array.isArray(result.search_results) && result.search_results.length > 0) {\n return 1\n }\n\n // Check for usage.search_context_size (Perplexity via OpenRouter)\n if ('usage' in result && typeof result.usage === 'object' && result.usage !== null) {\n if ('search_context_size' in result.usage && result.usage.search_context_size) {\n return 1\n }\n }\n\n // Check for annotations with url_citation in choices[].message or choices[].delta (OpenAI/Perplexity)\n if ('choices' in result && Array.isArray(result.choices)) {\n for (const choice of result.choices) {\n if (typeof choice === 'object' && choice !== null) {\n // Check both message (non-streaming) and delta (streaming) for annotations\n const content = ('message' in choice ? choice.message : null) || ('delta' in choice ? choice.delta : null)\n\n if (typeof content === 'object' && content !== null && 'annotations' in content) {\n const annotations = content.annotations\n\n if (Array.isArray(annotations)) {\n const hasUrlCitation = annotations.some((ann: unknown) => {\n return typeof ann === 'object' && ann !== null && 'type' in ann && ann.type === 'url_citation'\n })\n\n if (hasUrlCitation) {\n return 1\n }\n }\n }\n }\n }\n }\n\n // Check for annotations in output[].content[] (OpenAI Responses API)\n if ('output' in result && Array.isArray(result.output)) {\n for (const item of result.output) {\n if (typeof item === 'object' && item !== null && 'content' in item) {\n const content = item.content\n\n if (Array.isArray(content)) {\n for (const contentItem of content) {\n if (typeof contentItem === 'object' && contentItem !== null && 'annotations' in contentItem) {\n const annotations = contentItem.annotations\n\n if (Array.isArray(annotations)) {\n const hasUrlCitation = annotations.some((ann: unknown) => {\n return typeof ann === 'object' && ann !== null && 'type' in ann && ann.type === 'url_citation'\n })\n\n if (hasUrlCitation) {\n return 1\n }\n }\n }\n }\n }\n }\n }\n }\n\n // Check for grounding_metadata (Gemini)\n if ('candidates' in result && Array.isArray(result.candidates)) {\n for (const candidate of result.candidates) {\n if (\n typeof candidate === 'object' &&\n candidate !== null &&\n 'grounding_metadata' in candidate &&\n candidate.grounding_metadata\n ) {\n return 1\n }\n }\n }\n\n return 0\n}\n\n/**\n * Extract available tool calls from the request parameters.\n * These are the tools provided to the LLM, not the tool calls in the response.\n */\nexport const extractAvailableToolCalls = (\n provider: string,\n params: any\n): ChatCompletionTool[] | AnthropicTool[] | GeminiTool[] | null => {\n if (provider === 'anthropic') {\n if (params.tools) {\n return params.tools\n }\n\n return null\n } else if (provider === 'gemini') {\n if (params.config && params.config.tools) {\n return params.config.tools\n }\n\n return null\n } else if (provider === 'openai') {\n if (params.tools) {\n return params.tools\n }\n\n return null\n } else if (provider === 'vercel') {\n if (params.tools) {\n return params.tools\n }\n\n return null\n }\n\n return null\n}\n\nexport enum AIEvent {\n Generation = '$ai_generation',\n Embedding = '$ai_embedding',\n}\n\nexport function sanitizeValues(obj: any): any {\n if (obj === undefined || obj === null) {\n return obj\n }\n const jsonSafe = JSON.parse(JSON.stringify(obj))\n if (typeof jsonSafe === 'string') {\n // Sanitize lone surrogates by round-tripping through UTF-8\n return new TextDecoder().decode(new TextEncoder().encode(jsonSafe))\n } else if (Array.isArray(jsonSafe)) {\n return jsonSafe.map(sanitizeValues)\n } else if (jsonSafe && typeof jsonSafe === 'object') {\n return Object.fromEntries(Object.entries(jsonSafe).map(([k, v]) => [k, sanitizeValues(v)]))\n }\n return jsonSafe\n}\n\nconst POSTHOG_PARAMS_MAP: Record<keyof MonitoringParams, string> = {\n posthogDistinctId: 'distinctId',\n posthogTraceId: 'traceId',\n posthogProperties: 'properties',\n posthogPrivacyMode: 'privacyMode',\n posthogGroups: 'groups',\n posthogModelOverride: 'modelOverride',\n posthogProviderOverride: 'providerOverride',\n posthogCostOverride: 'costOverride',\n posthogCaptureImmediate: 'captureImmediate',\n}\n\nexport function extractPosthogParams<T>(body: T & MonitoringParams): {\n providerParams: T\n posthogParams: MonitoringEventPropertiesWithDefaults\n} {\n const providerParams: Record<string, unknown> = {}\n const posthogParams: Record<string, unknown> = {}\n\n for (const [key, value] of Object.entries(body)) {\n if (POSTHOG_PARAMS_MAP[key as keyof MonitoringParams]) {\n posthogParams[POSTHOG_PARAMS_MAP[key as keyof MonitoringParams]] = value\n } else if (key.startsWith('posthog')) {\n console.warn(`Unknown Posthog parameter ${key}`)\n } else {\n providerParams[key] = value\n }\n }\n\n return {\n providerParams: providerParams as T,\n posthogParams: addDefaults(posthogParams),\n }\n}\n\nfunction addDefaults(params: MonitoringEventProperties): MonitoringEventPropertiesWithDefaults {\n return {\n ...params,\n privacyMode: params.privacyMode ?? false,\n traceId: params.traceId ?? uuidv4(),\n }\n}\n\nexport function formatOpenAIResponsesInput(input: unknown, instructions?: string | null): FormattedMessage[] {\n const messages: FormattedMessage[] = []\n\n if (instructions) {\n messages.push({\n role: 'system',\n content: instructions,\n })\n }\n\n if (Array.isArray(input)) {\n for (const item of input) {\n if (typeof item === 'string') {\n messages.push({ role: 'user', content: item })\n } else if (item && typeof item === 'object') {\n const obj = item as Record<string, unknown>\n const role = isString(obj.role) ? obj.role : 'user'\n\n // Handle content properly - preserve structure for objects/arrays\n const content = obj.content ?? obj.text ?? item\n messages.push({ role, content: toContentString(content) })\n } else {\n messages.push({ role: 'user', content: toContentString(item) })\n }\n }\n } else if (typeof input === 'string') {\n messages.push({ role: 'user', content: input })\n } else if (input) {\n messages.push({ role: 'user', content: toContentString(input) })\n }\n\n return messages\n}\n","import type { PostHog, EventMessage } from 'posthog-node'\nimport type {\n TracingProcessor,\n Trace,\n Span,\n SpanData,\n SpanError,\n AgentSpanData,\n FunctionSpanData,\n GenerationSpanData,\n ResponseSpanData,\n HandoffSpanData,\n CustomSpanData,\n GuardrailSpanData,\n TranscriptionSpanData,\n SpeechSpanData,\n SpeechGroupSpanData,\n MCPListToolsSpanData,\n} from '@openai/agents-core'\nimport { MAX_OUTPUT_SIZE, truncate, withPrivacyMode } from '../utils'\nimport { version } from '../../package.json'\n\n/**\n * Normalize OpenAI Responses API input items to include a `role` field.\n * Items like `function_call` and `function_call_result` don't have a role,\n * causing PostHog's trace viewer to default them to \"user\".\n */\nfunction normalizeInputRoles(input: unknown): unknown {\n if (!Array.isArray(input)) {\n return input\n }\n return input.map((item) => {\n if (item && typeof item === 'object' && !('role' in item) && 'type' in item) {\n if (item.type === 'function_call') {\n return { ...item, role: 'assistant' }\n }\n if (item.type === 'function_call_result') {\n return { ...item, role: 'tool' }\n }\n }\n return item\n })\n}\n\nfunction ensureSerializable(obj: unknown): unknown {\n if (obj === null || obj === undefined) {\n return obj\n }\n try {\n JSON.stringify(obj)\n return obj\n } catch {\n return String(obj)\n }\n}\n\nfunction exceedsMaxOutputSize(value: unknown): boolean {\n if (value === null || value === undefined) {\n return false\n }\n\n try {\n const serializedValue = typeof value === 'string' ? value : JSON.stringify(value)\n return new TextEncoder().encode(serializedValue).length > MAX_OUTPUT_SIZE\n } catch {\n return false\n }\n}\n\nfunction parseIsoTimestamp(isoStr: string | null | undefined): number | null {\n if (!isoStr) {\n return null\n }\n try {\n const ts = new Date(isoStr).getTime()\n return isNaN(ts) ? null : ts / 1000\n } catch {\n return null\n }\n}\n\ninterface TraceMetadata {\n name: string\n groupId: string | null\n metadata: Record<string, any> | undefined\n distinctId: string | undefined\n startTime: number\n}\n\nexport type DistinctIdResolver = string | ((trace: Trace) => string | null | undefined)\n\nexport interface PostHogTracingProcessorOptions {\n client: PostHog\n distinctId?: DistinctIdResolver\n privacyMode?: boolean\n groups?: Record<string, any>\n properties?: Record<string, any>\n}\n\n/**\n * A tracing processor that sends OpenAI Agents SDK traces to PostHog.\n *\n * Implements the TracingProcessor interface from the OpenAI Agents SDK\n * and maps agent traces, spans, and generations to PostHog's LLM analytics events.\n *\n * @example\n * ```typescript\n * import { PostHogTracingProcessor } from '@posthog/ai/openai-agents'\n * import { addTraceProcessor } from '@openai/agents'\n *\n * const processor = new PostHogTracingProcessor({\n * client: posthog,\n * distinctId: 'user@example.com',\n * })\n * addTraceProcessor(processor)\n * ```\n */\nexport class PostHogTracingProcessor implements TracingProcessor {\n private _client: PostHog\n private _distinctId: DistinctIdResolver | undefined\n private _privacyMode: boolean\n private _groups: Record<string, any>\n private _properties: Record<string, any>\n\n private _spanStartTimes: Map<string, number> = new Map()\n private _traceMetadata: Map<string, TraceMetadata> = new Map()\n private _maxTrackedEntries = 10000\n\n constructor(options: PostHogTracingProcessorOptions) {\n this._client = options.client\n this._distinctId = options.distinctId\n this._privacyMode = options.privacyMode ?? false\n this._groups = options.groups ?? {}\n this._properties = options.properties ?? {}\n }\n\n private _getDistinctId(trace: Trace | null): string | undefined {\n if (typeof this._distinctId === 'function') {\n if (trace) {\n const result = this._distinctId(trace)\n if (result) {\n return String(result)\n }\n }\n return undefined\n } else if (this._distinctId) {\n return String(this._distinctId)\n }\n return undefined\n }\n\n private _withPrivacyMode(value: unknown): unknown {\n return withPrivacyMode(this._client, this._privacyMode, value)\n }\n\n private _prepareCapturedValue(value: unknown): unknown {\n const serializableValue = ensureSerializable(value)\n const boundedValue = exceedsMaxOutputSize(serializableValue) ? truncate(serializableValue) : serializableValue\n return this._withPrivacyMode(boundedValue)\n }\n\n private _evictStaleEntries(): void {\n if (this._spanStartTimes.size > this._maxTrackedEntries) {\n const entries = [...this._spanStartTimes.entries()].sort((a, b) => a[1] - b[1])\n const toRemove = entries.slice(0, Math.floor(entries.length / 2))\n for (const [key] of toRemove) {\n this._spanStartTimes.delete(key)\n }\n }\n\n if (this._traceMetadata.size > this._maxTrackedEntries) {\n const keys = [...this._traceMetadata.keys()]\n const toRemove = keys.slice(0, Math.floor(keys.length / 2))\n for (const key of toRemove) {\n this._traceMetadata.delete(key)\n }\n }\n }\n\n private _captureEvent(event: string, properties: Record<string, any>, distinctId?: string): void {\n try {\n if (!this._client?.capture) {\n return\n }\n\n const finalProperties = {\n ...this._properties,\n ...properties,\n }\n\n const eventMessage: EventMessage = {\n distinctId: distinctId || 'unknown',\n event,\n properties: finalProperties,\n groups: Object.keys(this._groups).length > 0 ? this._groups : undefined,\n }\n\n this._client.capture(eventMessage)\n } catch {\n // Silently ignore capture errors\n }\n }\n\n private _baseProperties(\n traceId: string,\n spanId: string,\n parentId: string | null,\n latency: number,\n groupId: string | null,\n errorProperties: Record<string, any>\n ): Record<string, any> {\n const properties: Record<string, any> = {\n $ai_lib: 'posthog-ai',\n $ai_lib_version: version,\n $ai_trace_id: traceId,\n $ai_span_id: spanId,\n $ai_parent_id: parentId,\n $ai_provider: 'openai',\n $ai_framework: 'openai-agents',\n $ai_latency: latency,\n ...errorProperties,\n }\n if (groupId) {\n properties.$ai_group_id = groupId\n }\n return properties\n }\n\n private _getErrorProperties(error: SpanError | null): Record<string, any> {\n if (!error) {\n return {}\n }\n\n const errorMessage = error.message || String(error)\n\n let errorType = 'unknown'\n if (errorMessage.includes('ModelBehaviorError')) {\n errorType = 'model_behavior_error'\n } else if (errorMessage.includes('UserError')) {\n errorType = 'user_error'\n } else if (errorMessage.includes('InputGuardrailTripwireTriggered')) {\n errorType = 'input_guardrail_triggered'\n } else if (errorMessage.includes('OutputGuardrailTripwireTriggered')) {\n errorType = 'output_guardrail_triggered'\n } else if (errorMessage.includes('MaxTurnsExceeded')) {\n errorType = 'max_turns_exceeded'\n }\n\n return {\n $ai_is_error: true,\n $ai_error: errorMessage,\n $ai_error_type: errorType,\n }\n }\n\n // --- TracingProcessor interface ---\n\n async onTraceStart(trace: Trace): Promise<void> {\n try {\n this._evictStaleEntries()\n\n const traceId = trace.traceId\n const traceName = trace.name\n const groupId = trace.groupId ?? null\n const metadata = trace.metadata\n\n const distinctId = this._getDistinctId(trace)\n\n this._traceMetadata.set(traceId, {\n name: traceName,\n groupId,\n metadata,\n distinctId,\n startTime: Date.now() / 1000,\n })\n } catch {\n // Silently ignore errors\n }\n }\n\n async onTraceEnd(trace: Trace): Promise<void> {\n try {\n const traceId = trace.traceId\n\n const traceInfo = this._traceMetadata.get(traceId)\n this._traceMetadata.delete(traceId)\n\n const traceName = traceInfo?.name ?? trace.name\n const groupId = traceInfo?.groupId ?? trace.groupId ?? null\n const metadata = traceInfo?.metadata ?? trace.metadata\n const distinctId = traceInfo?.distinctId ?? this._getDistinctId(trace)\n\n const startTime = traceInfo?.startTime\n const latency = startTime != null ? Date.now() / 1000 - startTime : undefined\n\n const properties: Record<string, any> = {\n $ai_lib: 'posthog-ai',\n $ai_lib_version: version,\n $ai_trace_id: traceId,\n $ai_trace_name: traceName,\n $ai_provider: 'openai',\n $ai_framework: 'openai-agents',\n }\n\n if (latency != null) {\n properties.$ai_latency = latency\n }\n\n if (groupId) {\n properties.$ai_group_id = groupId\n }\n\n if (metadata && Object.keys(metadata).length > 0) {\n properties.$ai_trace_metadata = this._prepareCapturedValue(metadata)\n }\n\n if (distinctId == null) {\n properties.$process_person_profile = false\n }\n\n this._captureEvent('$ai_trace', properties, distinctId ?? traceId)\n } catch {\n // Silently ignore errors\n }\n }\n\n async onSpanStart(span: Span<SpanData>): Promise<void> {\n try {\n this._evictStaleEntries()\n this._spanStartTimes.set(span.spanId, Date.now() / 1000)\n } catch {\n // Silently ignore errors\n }\n }\n\n async onSpanEnd(span: Span<SpanData>): Promise<void> {\n try {\n const spanId = span.spanId\n const traceId = span.traceId\n const parentId = span.parentId\n const spanData = span.spanData\n\n // Calculate latency\n const startTime = this._spanStartTimes.get(spanId)\n this._spanStartTimes.delete(spanId)\n let latency: number\n if (startTime != null) {\n latency = Date.now() / 1000 - startTime\n } else {\n const started = parseIsoTimestamp(span.startedAt)\n const ended = parseIsoTimestamp(span.endedAt)\n latency = started != null && ended != null ? ended - started : 0\n }\n\n // Get distinct ID from trace metadata\n const traceInfo = this._traceMetadata.get(traceId)\n const userDistinctId = traceInfo?.distinctId ?? this._getDistinctId(null)\n\n // Get group_id from trace metadata\n const groupId = traceInfo?.groupId ?? null\n\n // Get error properties\n const errorProperties = this._getErrorProperties(span.error)\n\n // Personless mode: no user-provided distinct_id, fallback to trace_id\n if (userDistinctId == null) {\n errorProperties.$process_person_profile = false\n }\n const distinctId: string = userDistinctId ?? traceId\n\n // Dispatch based on span data type\n switch (spanData.type) {\n case 'generation':\n this._handleGenerationSpan(spanData, traceId, spanId, parentId, latency, distinctId, groupId, errorProperties)\n break\n case 'response':\n this._handleResponseSpan(spanData, traceId, spanId, parentId, latency, distinctId, groupId, errorProperties)\n break\n case 'function':\n this._handleFunctionSpan(spanData, traceId, spanId, parentId, latency, distinctId, groupId, errorProperties)\n break\n case 'agent':\n this._handleAgentSpan(spanData, traceId, spanId, parentId, latency, distinctId, groupId, errorProperties)\n break\n case 'handoff':\n this._handleHandoffSpan(spanData, traceId, spanId, parentId, latency, distinctId, groupId, errorProperties)\n break\n case 'guardrail':\n this._handleGuardrailSpan(spanData, traceId, spanId, parentId, latency, distinctId, groupId, errorProperties)\n break\n case 'custom':\n this._handleCustomSpan(spanData, traceId, spanId, parentId, latency, distinctId, groupId, errorProperties)\n break\n case 'transcription':\n case 'speech':\n case 'speech_group':\n this._handleAudioSpan(spanData, traceId, spanId, parentId, latency, distinctId, groupId, errorProperties)\n break\n case 'mcp_tools':\n this._handleMcpSpan(spanData, traceId, spanId, parentId, latency, distinctId, groupId, errorProperties)\n break\n default:\n this._handleGenericSpan(spanData, traceId, spanId, parentId, latency, distinctId, groupId, errorProperties)\n break\n }\n } catch {\n // Silently ignore errors\n }\n }\n\n async shutdown(): Promise<void> {\n try {\n this._spanStartTimes.clear()\n this._traceMetadata.clear()\n\n if (typeof this._client?.flush === 'function') {\n await this._client.flush()\n }\n } catch {\n // Silently ignore errors\n }\n }\n\n async forceFlush(): Promise<void> {\n try {\n if (typeof this._client?.flush === 'function') {\n await this._client.flush()\n }\n } catch {\n // Silently ignore errors\n }\n }\n\n // --- Span handlers ---\n\n private _handleGenerationSpan(\n spanData: GenerationSpanData,\n traceId: string,\n spanId: string,\n parentId: string | null,\n latency: number,\n distinctId: string,\n groupId: string | null,\n errorProperties: Record<string, any>\n ): void {\n const usage = spanData.usage ?? {}\n const inputTokens = (usage.input_tokens as number) || (usage as any).prompt_tokens || 0\n const outputTokens = (usage.output_tokens as number) || (usage as any).completion_tokens || 0\n\n const modelConfig = spanData.model_config ?? {}\n const modelParams: Record<string, any> = {}\n for (const param of ['temperature', 'max_tokens', 'top_p', 'frequency_penalty', 'presence_penalty']) {\n if (param in modelConfig) {\n modelParams[param] = modelConfig[param]\n }\n }\n\n const properties: Record<string, any> = {\n ...this._baseProperties(traceId, spanId, parentId, latency, groupId, errorProperties),\n $ai_model: spanData.model,\n $ai_model_parameters: Object.keys(modelParams).length > 0 ? modelParams : null,\n $ai_input: this._prepareCapturedValue(normalizeInputRoles(spanData.input)),\n $ai_output_choices: this._prepareCapturedValue(spanData.output),\n $ai_input_tokens: inputTokens,\n $ai_output_tokens: outputTokens,\n $ai_total_tokens: inputTokens + outputTokens,\n }\n\n if (usage.details) {\n const details = usage.details as Record<string, unknown>\n if (details.reasoning_tokens) {\n properties.$ai_reasoning_tokens = details.reasoning_tokens\n }\n if (details.cache_read_input_tokens) {\n properties.$ai_cache_read_input_tokens = details.cache_read_input_tokens\n }\n if (details.cache_creation_input_tokens) {\n properties.$ai_cache_creation_input_tokens = details.cache_creation_input_tokens\n }\n }\n\n // Also check top-level usage for reasoning/cache tokens (flexible schema)\n if ((usage as any).reasoning_tokens) {\n properties.$ai_reasoning_tokens = (usage as any).reasoning_tokens\n }\n if ((usage as any).cache_read_input_tokens) {\n properties.$ai_cache_read_input_tokens = (usage as any).cache_read_input_tokens\n }\n if ((usage as any).cache_creation_input_tokens) {\n properties.$ai_cache_creation_input_tokens = (usage as any).cache_creation_input_tokens\n }\n\n this._captureEvent('$ai_generation', properties, distinctId)\n }\n\n private _handleResponseSpan(\n spanData: ResponseSpanData,\n traceId: string,\n spanId: string,\n parentId: string | null,\n latency: number,\n distinctId: string,\n groupId: string | null,\n errorProperties: Record<string, any>\n ): void {\n // The OpenAI Agents SDK exposes these underscored fields for non-OpenAI tracing providers.\n // Treat them as best-effort and avoid assuming they are always present.\n const responseSpanData = spanData as ResponseSpanData & { _input?: unknown; _response?: Record<string, any> }\n const response = responseSpanData._response\n const responseId = spanData.response_id ?? (response?.id as string | undefined)\n\n // Extract usage from response\n const usage = response?.usage ?? {}\n const inputTokens = usage?.input_tokens ?? 0\n const outputTokens = usage?.output_tokens ?? 0\n\n // Extract model from response\n const model = response?.model as string | undefined\n\n const properties: Record<string, any> = {\n ...this._baseProperties(traceId, spanId, parentId, latency, groupId, errorProperties),\n $ai_model: model,\n $ai_response_id: responseId,\n $ai_input: this._prepareCapturedValue(normalizeInputRoles(responseSpanData._input)),\n $ai_input_tokens: inputTokens,\n $ai_output_tokens: outputTokens,\n $ai_total_tokens: inputTokens + outputTokens,\n }\n\n // Extract output from response\n if (response?.output) {\n properties.$ai_output_choices = this._prepareCapturedValue(response.output)\n }\n\n this._captureEvent('$ai_generation', properties, distinctId)\n }\n\n private _handleFunctionSpan(\n spanData: FunctionSpanData,\n traceId: string,\n spanId: string,\n parentId: string | null,\n latency: number,\n distinctId: string,\n groupId: string | null,\n errorProperties: Record<string, any>\n ): void {\n const properties: Record<string, any> = {\n ...this._baseProperties(traceId, spanId, parentId, latency, groupId, errorProperties),\n $ai_span_name: spanData.name,\n $ai_span_type: 'tool',\n $ai_input_state: this._prepareCapturedValue(spanData.input),\n $ai_output_state: this._prepareCapturedValue(spanData.output),\n }\n\n if (spanData.mcp_data) {\n properties.$ai_mcp_data = this._prepareCapturedValue(spanData.mcp_data)\n }\n\n this._captureEvent('$ai_span', properties, distinctId)\n }\n\n private _handleAgentSpan(\n spanData: AgentSpanData,\n traceId: string,\n spanId: string,\n parentId: string | null,\n latency: number,\n distinctId: string,\n groupId: string | null,\n errorProperties: Record<string, any>\n ): void {\n const properties: Record<string, any> = {\n ...this._baseProperties(traceId, spanId, parentId, latency, groupId, errorProperties),\n $ai_span_name: spanData.name,\n $ai_span_type: 'agent',\n }\n\n if (spanData.handoffs) {\n properties.$ai_agent_handoffs = spanData.handoffs\n }\n if (spanData.tools) {\n properties.$ai_agent_tools = spanData.tools\n }\n if (spanData.output_type) {\n properties.$ai_agent_output_type = spanData.output_type\n }\n\n this._captureEvent('$ai_span', properties, distinctId)\n }\n\n private _handleHandoffSpan(\n spanData: HandoffSpanData,\n traceId: string,\n spanId: string,\n parentId: string | null,\n latency: number,\n distinctId: string,\n groupId: string | null,\n errorProperties: Record<string, any>\n ): void {\n const properties: Record<string, any> = {\n ...this._baseProperties(traceId, spanId, parentId, latency, groupId, errorProperties),\n $ai_span_name: `${spanData.from_agent} -> ${spanData.to_agent}`,\n $ai_span_type: 'handoff',\n $ai_handoff_from_agent: spanData.from_agent,\n $ai_handoff_to_agent: spanData.to_agent,\n }\n\n this._captureEvent('$ai_span', properties, distinctId)\n }\n\n private _handleGuardrailSpan(\n spanData: GuardrailSpanData,\n traceId: string,\n spanId: string,\n parentId: string | null,\n latency: number,\n distinctId: string,\n groupId: string | null,\n errorProperties: Record<string, any>\n ): void {\n const properties: Record<string, any> = {\n ...this._baseProperties(traceId, spanId, parentId, latency, groupId, errorProperties),\n $ai_span_name: spanData.name,\n $ai_span_type: 'guardrail',\n $ai_guardrail_triggered: spanData.triggered,\n }\n\n this._captureEvent('$ai_span', properties, distinctId)\n }\n\n private _handleCustomSpan(\n spanData: CustomSpanData,\n traceId: string,\n spanId: string,\n parentId: string | null,\n latency: number,\n distinctId: string,\n groupId: string | null,\n errorProperties: Record<string, any>\n ): void {\n const properties: Record<string, any> = {\n ...this._baseProperties(traceId, spanId, parentId, latency, groupId, errorProperties),\n $ai_span_name: spanData.name,\n $ai_span_type: 'custom',\n $ai_custom_data: this._prepareCapturedValue(spanData.data),\n }\n\n this._captureEvent('$ai_span', properties, distinctId)\n }\n\n private _handleAudioSpan(\n spanData: TranscriptionSpanData | SpeechSpanData | SpeechGroupSpanData,\n traceId: string,\n spanId: string,\n parentId: string | null,\n latency: number,\n distinctId: string,\n groupId: string | null,\n errorProperties: Record<string, any>\n ): void {\n const spanType = spanData.type\n\n const properties: Record<string, any> = {\n ...this._baseProperties(traceId, spanId, parentId, latency, groupId, errorProperties),\n $ai_span_name: spanType,\n $ai_span_type: spanType,\n }\n\n // Add model info if available\n if ('model' in spanData && spanData.model) {\n properties.$ai_model = spanData.model\n }\n\n // Add model config if available\n if ('model_config' in spanData && spanData.model_config) {\n properties.$ai_model_config = this._prepareCapturedValue(spanData.model_config)\n }\n\n // Add audio format info\n if (spanData.type === 'transcription') {\n const transcription = spanData as TranscriptionSpanData\n if (transcription.input?.format) {\n properties.$ai_audio_input_format = transcription.input.format\n }\n // Transcription output is text\n if (transcription.output) {\n properties.$ai_output_state = this._prepareCapturedValue(transcription.output)\n }\n } else if (spanData.type === 'speech') {\n const speech = spanData as SpeechSpanData\n if (speech.output?.format) {\n properties.$ai_audio_output_format = speech.output.format\n }\n // Text input for TTS\n if (speech.input) {\n properties.$ai_input = this._prepareCapturedValue(speech.input)\n }\n } else if (spanData.type === 'speech_group') {\n const speechGroup = spanData as SpeechGroupSpanData\n if (speechGroup.input) {\n properties.$ai_input = this._prepareCapturedValue(speechGroup.input)\n }\n }\n\n this._captureEvent('$ai_span', properties, distinctId)\n }\n\n private _handleMcpSpan(\n spanData: MCPListToolsSpanData,\n traceId: string,\n spanId: string,\n parentId: string | null,\n latency: number,\n distinctId: string,\n groupId: string | null,\n errorProperties: Record<string, any>\n ): void {\n const properties: Record<string, any> = {\n ...this._baseProperties(traceId, spanId, parentId, latency, groupId, errorProperties),\n $ai_span_name: `mcp:${spanData.server}`,\n $ai_span_type: 'mcp_tools',\n $ai_mcp_server: spanData.server,\n $ai_mcp_tools: this._prepareCapturedValue(spanData.result),\n }\n\n this._captureEvent('$ai_span', properties, distinctId)\n }\n\n private _handleGenericSpan(\n spanData: SpanData,\n traceId: string,\n spanId: string,\n parentId: string | null,\n latency: number,\n distinctId: string,\n groupId: string | null,\n errorProperties: Record<string, any>\n ): void {\n const spanType = spanData.type || 'unknown'\n\n const properties: Record<string, any> = {\n ...this._baseProperties(traceId, spanId, parentId, latency, groupId, errorProperties),\n $ai_span_name: spanType,\n $ai_span_type: spanType,\n }\n\n this._captureEvent('$ai_span', properties, distinctId)\n }\n}\n","import { PostHogTracingProcessor } from './processor'\nimport type { PostHogTracingProcessorOptions } from './processor'\n\nexport { PostHogTracingProcessor } from './processor'\nexport type { PostHogTracingProcessorOptions, DistinctIdResolver } from './processor'\n\nexport type InstrumentOptions = PostHogTracingProcessorOptions\n\n/**\n * One-liner to instrument OpenAI Agents SDK with PostHog tracing.\n *\n * This registers a PostHogTracingProcessor with the OpenAI Agents SDK,\n * automatically capturing traces, spans, and LLM generations.\n *\n * @param options - Configuration options\n * @returns The registered processor instance\n *\n * @example\n * ```typescript\n * import { instrument } from '@posthog/ai/openai-agents'\n * import PostHog from 'posthog-node'\n *\n * const phClient = new PostHog('<API_KEY>')\n *\n * // Simple setup — await before running agents\n * await instrument({ client: phClient, distinctId: 'user@example.com' })\n *\n * // With dynamic distinct ID\n * await instrument({\n * client: phClient,\n * distinctId: (trace) => trace.metadata?.userId,\n * privacyMode: true,\n * properties: { environment: 'production' },\n * })\n *\n * // Now run agents as normal - traces automatically sent to PostHog\n * import { Agent, run } from '@openai/agents'\n * const agent = new Agent({ name: 'Assistant', instructions: 'You are helpful.' })\n * const result = await run(agent, 'Hello!')\n * ```\n */\nexport async function instrument(options: InstrumentOptions): Promise<PostHogTracingProcessor> {\n const { addTraceProcessor } = await import('@openai/agents-core')\n\n const processor = new PostHogTracingProcessor({\n client: