@posthog/ai

Version:

PostHog Node.js AI integrations

1 lines • 108 kB

Source Map (JSON)

{"version":3,"file":"index.cjs","sources":["../../src/typeGuards.ts","../../src/sanitization.ts","../../src/utils.ts","../../src/otel/mappers/aiSdk.ts","../../src/otel/mappers/index.ts","../../src/otel/capture.ts","../../src/otel/processor.ts"],"sourcesContent":["// Type guards for safer type checking\n\nexport const isString = (value: unknown): value is string => {\n return typeof value === 'string'\n}\n\nexport const isObject = (value: unknown): value is Record<string, unknown> => {\n return value !== null && typeof value === 'object' && !Array.isArray(value)\n}\n","import { isString, isObject } from './typeGuards'\n\nconst REDACTED_IMAGE_PLACEHOLDER = '[base64 image redacted]'\n\n// ============================================\n// Multimodal Feature Toggle\n// ============================================\n\nconst isMultimodalEnabled = (): boolean => {\n const val = process.env._INTERNAL_LLMA_MULTIMODAL || ''\n return val.toLowerCase() === 'true' || val === '1' || val.toLowerCase() === 'yes'\n}\n\n// ============================================\n// Base64 Detection Helpers\n// ============================================\n\nconst isBase64DataUrl = (str: string): boolean => {\n return /^data:([^;]+);base64,/.test(str)\n}\n\nconst isValidUrl = (str: string): boolean => {\n try {\n new URL(str)\n return true\n } catch {\n // Not an absolute URL, check if it's a relative URL or path\n return str.startsWith('/') || str.startsWith('./') || str.startsWith('../')\n }\n}\n\nconst isRawBase64 = (str: string): boolean => {\n // Skip if it's a valid URL or path\n if (isValidUrl(str)) {\n return false\n }\n\n // Check if it's a valid base64 string\n // Base64 images are typically at least a few hundred chars, but we'll be conservative\n return str.length > 20 && /^[A-Za-z0-9+/]+=*$/.test(str)\n}\n\nexport function redactBase64DataUrl(str: string): string\nexport function redactBase64DataUrl(str: unknown): unknown\nexport function redactBase64DataUrl(str: unknown): unknown {\n if (isMultimodalEnabled()) return str\n if (!isString(str)) return str\n\n // Check for data URL format\n if (isBase64DataUrl(str)) {\n return REDACTED_IMAGE_PLACEHOLDER\n }\n\n // Check for raw base64 (Vercel sends raw base64 for inline images)\n if (isRawBase64(str)) {\n return REDACTED_IMAGE_PLACEHOLDER\n }\n\n return str\n}\n\n// ============================================\n// Common Message Processing\n// ============================================\n\ntype ContentTransformer = (item: unknown) => unknown\n\nconst processMessages = (messages: unknown, transformContent: ContentTransformer): unknown => {\n if (!messages) return messages\n\n const processContent = (content: unknown): unknown => {\n if (typeof content === 'string') return content\n\n if (!content) return content\n\n if (Array.isArray(content)) {\n return content.map(transformContent)\n }\n\n // Handle single object content\n return transformContent(content)\n }\n\n const processMessage = (msg: unknown): unknown => {\n if (!isObject(msg) || !('content' in msg)) return msg\n return { ...msg, content: processContent(msg.content) }\n }\n\n // Handle both arrays and single messages\n if (Array.isArray(messages)) {\n return messages.map(processMessage)\n }\n\n return processMessage(messages)\n}\n\n// ============================================\n// Provider-Specific Image Sanitizers\n// ============================================\n\nconst sanitizeOpenAIImage = (item: unknown): unknown => {\n if (!isObject(item)) return item\n\n // Handle image_url format\n if (item.type === 'image_url' && 'image_url' in item && isObject(item.image_url) && 'url' in item.image_url) {\n return {\n ...item,\n image_url: {\n ...item.image_url,\n url: redactBase64DataUrl(item.image_url.url),\n },\n }\n }\n\n // Handle audio format\n if (item.type === 'audio' && 'data' in item) {\n if (isMultimodalEnabled()) return item\n return { ...item, data: REDACTED_IMAGE_PLACEHOLDER }\n }\n\n return item\n}\n\nconst sanitizeOpenAIResponseImage = (item: unknown): unknown => {\n if (!isObject(item)) return item\n\n // Handle input_image format\n if (item.type === 'input_image' && 'image_url' in item) {\n return {\n ...item,\n image_url: redactBase64DataUrl(item.image_url),\n }\n }\n\n return item\n}\n\nconst sanitizeAnthropicImage = (item: unknown): unknown => {\n if (isMultimodalEnabled()) return item\n if (!isObject(item)) return item\n\n // Handle Anthropic's image and document formats (same structure, different type field)\n if (\n (item.type === 'image' || item.type === 'document') &&\n 'source' in item &&\n isObject(item.source) &&\n item.source.type === 'base64' &&\n 'data' in item.source\n ) {\n return {\n ...item,\n source: {\n ...item.source,\n data: REDACTED_IMAGE_PLACEHOLDER,\n },\n }\n }\n\n return item\n}\n\nconst sanitizeGeminiPart = (part: unknown): unknown => {\n if (isMultimodalEnabled()) return part\n if (!isObject(part)) return part\n\n // Handle Gemini's inline data format (images, audio, PDFs all use inlineData)\n if ('inlineData' in part && isObject(part.inlineData) && 'data' in part.inlineData) {\n return {\n ...part,\n inlineData: {\n ...part.inlineData,\n data: REDACTED_IMAGE_PLACEHOLDER,\n },\n }\n }\n\n return part\n}\n\nconst processGeminiItem = (item: unknown): unknown => {\n if (!isObject(item)) return item\n\n // If it has parts, process them\n if ('parts' in item && item.parts) {\n const parts = Array.isArray(item.parts) ? item.parts.map(sanitizeGeminiPart) : sanitizeGeminiPart(item.parts)\n\n return { ...item, parts }\n }\n\n return item\n}\n\nconst sanitizeLangChainImage = (item: unknown): unknown => {\n if (!isObject(item)) return item\n\n // OpenAI style\n if (item.type === 'image_url' && 'image_url' in item && isObject(item.image_url) && 'url' in item.image_url) {\n return {\n ...item,\n image_url: {\n ...item.image_url,\n url: redactBase64DataUrl(item.image_url.url),\n },\n }\n }\n\n // Direct image with data field\n if (item.type === 'image' && 'data' in item) {\n return { ...item, data: redactBase64DataUrl(item.data) }\n }\n\n // Anthropic style\n if (item.type === 'image' && 'source' in item && isObject(item.source) && 'data' in item.source) {\n if (isMultimodalEnabled()) return item\n return {\n ...item,\n source: {\n ...item.source,\n data: redactBase64DataUrl(item.source.data),\n },\n }\n }\n\n // Google style\n if (item.type === 'media' && 'data' in item) {\n return { ...item, data: redactBase64DataUrl(item.data) }\n }\n\n return item\n}\n\n// Export individual sanitizers for tree-shaking\nexport const sanitizeOpenAI = (data: unknown): unknown => {\n return processMessages(data, sanitizeOpenAIImage)\n}\n\nexport const sanitizeOpenAIResponse = (data: unknown): unknown => {\n return processMessages(data, sanitizeOpenAIResponseImage)\n}\n\nexport const sanitizeAnthropic = (data: unknown): unknown => {\n return processMessages(data, sanitizeAnthropicImage)\n}\n\nexport const sanitizeGemini = (data: unknown): unknown => {\n // Gemini has a different structure with 'parts' directly on items instead of 'content'\n // So we need custom processing instead of using processMessages\n if (!data) return data\n\n if (Array.isArray(data)) {\n return data.map(processGeminiItem)\n }\n\n return processGeminiItem(data)\n}\n\nexport const sanitizeLangChain = (data: unknown): unknown => {\n return processMessages(data, sanitizeLangChainImage)\n}\n","import { EventMessage, PostHog } from 'posthog-node'\nimport OpenAIOrignal from 'openai'\nimport AnthropicOriginal from '@anthropic-ai/sdk'\nimport type { ChatCompletionTool } from 'openai/resources/chat/completions'\nimport type { ResponseCreateParamsWithTools } from 'openai/lib/ResponsesParser'\nimport type { Tool as GeminiTool } from '@google/genai'\nimport type { FormattedMessage, FormattedContent, TokenUsage } from './types'\nimport { version } from '../package.json'\nimport { v4 as uuidv4 } from 'uuid'\nimport { isString } from './typeGuards'\nimport { uuidv7, ErrorTracking as CoreErrorTracking } from '@posthog/core'\nimport { redactBase64DataUrl } from './sanitization'\n\ntype ChatCompletionCreateParamsBase = OpenAIOrignal.Chat.Completions.ChatCompletionCreateParams\ntype MessageCreateParams = AnthropicOriginal.Messages.MessageCreateParams\ntype ResponseCreateParams = OpenAIOrignal.Responses.ResponseCreateParams\ntype EmbeddingCreateParams = OpenAIOrignal.EmbeddingCreateParams\ntype TranscriptionCreateParams = OpenAIOrignal.Audio.Transcriptions.TranscriptionCreateParams\ntype AnthropicTool = AnthropicOriginal.Tool\n\n// limit large outputs by truncating to 200kb (approx 200k bytes)\nexport const MAX_OUTPUT_SIZE = 200000\nconst STRING_FORMAT = 'utf8'\n\n/**\n * Safely converts content to a string, preserving structure for objects/arrays.\n * - If content is already a string, returns it as-is\n * - If content is an object or array, stringifies it with JSON.stringify to preserve structure\n * - Otherwise, converts to string with String()\n *\n * This prevents the \"[object Object]\" bug when objects are naively converted to strings.\n *\n * @param content - The content to convert to a string\n * @returns A string representation that preserves structure for complex types\n */\nexport function toContentString(content: unknown): string {\n if (typeof content === 'string') {\n return content\n }\n if (content !== undefined && content !== null && typeof content === 'object') {\n try {\n return JSON.stringify(content)\n } catch {\n // Fallback for circular refs, BigInt, or objects with throwing toJSON\n return String(content)\n }\n }\n return String(content)\n}\n\nexport interface MonitoringEventPropertiesWithDefaults {\n distinctId?: string\n traceId: string\n properties?: Record<string, any>\n privacyMode: boolean\n groups?: Record<string, any>\n modelOverride?: string\n providerOverride?: string\n costOverride?: CostOverride\n captureImmediate?: boolean\n}\n\nexport type MonitoringEventProperties = Partial<MonitoringEventPropertiesWithDefaults>\n\nexport type MonitoringParams = {\n [K in keyof MonitoringEventProperties as `posthog${Capitalize<string & K>}`]: MonitoringEventProperties[K]\n}\n\nexport interface CostOverride {\n inputCost: number\n outputCost: number\n}\n\nexport const getModelParams = (\n params:\n | ((\n | ChatCompletionCreateParamsBase\n | MessageCreateParams\n | ResponseCreateParams\n | ResponseCreateParamsWithTools\n | EmbeddingCreateParams\n | TranscriptionCreateParams\n ) &\n MonitoringParams)\n | null\n): Record<string, any> => {\n if (!params) {\n return {}\n }\n const modelParams: Record<string, any> = {}\n const paramKeys = [\n 'temperature',\n 'max_tokens',\n 'max_completion_tokens',\n 'top_p',\n 'frequency_penalty',\n 'presence_penalty',\n 'n',\n 'stop',\n 'stream',\n 'streaming',\n 'language',\n 'response_format',\n 'timestamp_granularities',\n ] as const\n\n for (const key of paramKeys) {\n if (key in params && (params as any)[key] !== undefined) {\n modelParams[key] = (params as any)[key]\n }\n }\n return modelParams\n}\n\n/**\n * Helper to format responses (non-streaming) for consumption\n */\nexport const formatResponse = (response: any, provider: string): FormattedMessage[] => {\n if (!response) {\n return []\n }\n if (provider === 'anthropic') {\n return formatResponseAnthropic(response)\n } else if (provider === 'openai') {\n return formatResponseOpenAI(response)\n } else if (provider === 'gemini') {\n return formatResponseGemini(response)\n }\n return []\n}\n\nexport const formatResponseAnthropic = (response: any): FormattedMessage[] => {\n const output: FormattedMessage[] = []\n const content: FormattedContent = []\n\n for (const choice of response.content ?? []) {\n if (choice?.type === 'text' && choice?.text) {\n content.push({ type: 'text', text: choice.text })\n } else if (choice?.type === 'tool_use' && choice?.name && choice?.id) {\n content.push({\n type: 'function',\n id: choice.id,\n function: {\n name: choice.name,\n arguments: choice.input || {},\n },\n })\n }\n }\n\n if (content.length > 0) {\n output.push({\n role: 'assistant',\n content,\n })\n }\n\n return output\n}\n\nexport const formatResponseOpenAI = (response: any): FormattedMessage[] => {\n const output: FormattedMessage[] = []\n\n if (response.choices) {\n for (const choice of response.choices) {\n const content: FormattedContent = []\n let role = 'assistant'\n\n if (choice.message) {\n if (choice.message.role) {\n role = choice.message.role\n }\n\n if (choice.message.content) {\n content.push({ type: 'text', text: choice.message.content })\n }\n\n if (choice.message.tool_calls) {\n for (const toolCall of choice.message.tool_calls) {\n content.push({\n type: 'function',\n id: toolCall.id,\n function: {\n name: toolCall.function.name,\n arguments: toolCall.function.arguments,\n },\n })\n }\n }\n\n // Handle audio output (gpt-4o-audio-preview)\n if (choice.message.audio) {\n content.push({\n type: 'audio',\n ...choice.message.audio,\n })\n }\n }\n\n if (content.length > 0) {\n output.push({\n role,\n content,\n })\n }\n }\n }\n\n // Handle Responses API format\n if (response.output) {\n const content: FormattedContent = []\n let role = 'assistant'\n\n for (const item of response.output) {\n if (item.type === 'message') {\n role = item.role\n\n if (item.content && Array.isArray(item.content)) {\n for (const contentItem of item.content) {\n if (contentItem.type === 'output_text' && contentItem.text) {\n content.push({ type: 'text', text: contentItem.text })\n } else if (contentItem.text) {\n content.push({ type: 'text', text: contentItem.text })\n } else if (contentItem.type === 'input_image' && contentItem.image_url) {\n content.push({\n type: 'image',\n image: contentItem.image_url,\n })\n }\n }\n } else if (item.content) {\n content.push({ type: 'text', text: String(item.content) })\n }\n } else if (item.type === 'function_call') {\n content.push({\n type: 'function',\n id: item.call_id || item.id || '',\n function: {\n name: item.name,\n arguments: item.arguments || {},\n },\n })\n }\n }\n\n if (content.length > 0) {\n output.push({\n role,\n content,\n })\n }\n }\n\n return output\n}\n\nexport const formatResponseGemini = (response: any): FormattedMessage[] => {\n const output: FormattedMessage[] = []\n\n if (response.candidates && Array.isArray(response.candidates)) {\n for (const candidate of response.candidates) {\n if (candidate.content && candidate.content.parts) {\n const content: FormattedContent = []\n\n for (const part of candidate.content.parts) {\n if (part.text) {\n content.push({ type: 'text', text: part.text })\n } else if (part.functionCall) {\n content.push({\n type: 'function',\n function: {\n name: part.functionCall.name,\n arguments: part.functionCall.args,\n },\n })\n } else if (part.inlineData) {\n // Handle audio/media inline data\n const mimeType = part.inlineData.mimeType || 'audio/pcm'\n let data = part.inlineData.data\n\n // Handle binary data (Uint8Array/Buffer -> base64)\n if (data instanceof Uint8Array) {\n if (typeof Buffer !== 'undefined') {\n data = Buffer.from(data).toString('base64')\n } else {\n let binary = ''\n for (let i = 0; i < data.length; i++) {\n binary += String.fromCharCode(data[i])\n }\n data = btoa(binary)\n }\n }\n\n // Sanitize base64 data for images and other large inline data\n data = redactBase64DataUrl(data)\n\n content.push({\n type: 'audio',\n mime_type: mimeType,\n data: data,\n })\n }\n }\n\n if (content.length > 0) {\n output.push({\n role: 'assistant',\n content,\n })\n }\n } else if (candidate.text) {\n output.push({\n role: 'assistant',\n content: [{ type: 'text', text: candidate.text }],\n })\n }\n }\n } else if (response.text) {\n output.push({\n role: 'assistant',\n content: [{ type: 'text', text: response.text }],\n })\n }\n\n return output\n}\n\nexport const mergeSystemPrompt = (params: MessageCreateParams & MonitoringParams, provider: string): any => {\n if (provider == 'anthropic') {\n const messages = params.messages || []\n if (!(params as any).system) {\n return messages\n }\n const systemMessage = (params as any).system\n return [{ role: 'system', content: systemMessage }, ...messages]\n }\n return params.messages\n}\n\nexport const withPrivacyMode = (client: PostHog, privacyMode: boolean, input: any): any => {\n return (client as any).privacy_mode || privacyMode ? null : input\n}\n\nfunction toSafeString(input: unknown): string {\n if (input === undefined || input === null) {\n return ''\n }\n if (typeof input === 'string') {\n return input\n }\n try {\n return JSON.stringify(input)\n } catch {\n console.warn('Failed to stringify input', input)\n return ''\n }\n}\n\nexport const truncate = (input: unknown): string => {\n const str = toSafeString(input)\n if (str === '') {\n return ''\n }\n\n // Check if we need to truncate and ensure STRING_FORMAT is respected\n const encoder = new TextEncoder()\n const buffer = encoder.encode(str)\n if (buffer.length <= MAX_OUTPUT_SIZE) {\n // Ensure STRING_FORMAT is respected\n return new TextDecoder(STRING_FORMAT).decode(buffer)\n }\n\n // Truncate the buffer and ensure a valid string is returned\n const truncatedBuffer = buffer.slice(0, MAX_OUTPUT_SIZE)\n // fatal: false means we get U+FFFD at the end if truncation broke the encoding\n const decoder = new TextDecoder(STRING_FORMAT, { fatal: false })\n let truncatedStr = decoder.decode(truncatedBuffer)\n if (truncatedStr.endsWith('\\uFFFD')) {\n truncatedStr = truncatedStr.slice(0, -1)\n }\n return `${truncatedStr}... [truncated]`\n}\n\n/**\n * Calculate web search count from raw API response.\n *\n * Uses a two-tier detection strategy:\n * Priority 1 (Exact Count): Count actual web search calls when available\n * Priority 2 (Binary Detection): Return 1 if web search indicators are present, 0 otherwise\n *\n * @param result - Raw API response from any provider (OpenAI, Perplexity, OpenRouter, Gemini, etc.)\n * @returns Number of web searches performed (exact count or binary 1/0)\n */\nexport function calculateWebSearchCount(result: unknown): number {\n if (!result || typeof result !== 'object') {\n return 0\n }\n\n // Priority 1: Exact Count\n // Check for OpenAI Responses API web_search_call items\n if ('output' in result && Array.isArray(result.output)) {\n let count = 0\n\n for (const item of result.output) {\n if (typeof item === 'object' && item !== null && 'type' in item && item.type === 'web_search_call') {\n count++\n }\n }\n\n if (count > 0) {\n return count\n }\n }\n\n // Priority 2: Binary Detection (1 or 0)\n\n // Check for citations at root level (Perplexity)\n if ('citations' in result && Array.isArray(result.citations) && result.citations.length > 0) {\n return 1\n }\n\n // Check for search_results at root level (Perplexity via OpenRouter)\n if ('search_results' in result && Array.isArray(result.search_results) && result.search_results.length > 0) {\n return 1\n }\n\n // Check for usage.search_context_size (Perplexity via OpenRouter)\n if ('usage' in result && typeof result.usage === 'object' && result.usage !== null) {\n if ('search_context_size' in result.usage && result.usage.search_context_size) {\n return 1\n }\n }\n\n // Check for annotations with url_citation in choices[].message or choices[].delta (OpenAI/Perplexity)\n if ('choices' in result && Array.isArray(result.choices)) {\n for (const choice of result.choices) {\n if (typeof choice === 'object' && choice !== null) {\n // Check both message (non-streaming) and delta (streaming) for annotations\n const content = ('message' in choice ? choice.message : null) || ('delta' in choice ? choice.delta : null)\n\n if (typeof content === 'object' && content !== null && 'annotations' in content) {\n const annotations = content.annotations\n\n if (Array.isArray(annotations)) {\n const hasUrlCitation = annotations.some((ann: unknown) => {\n return typeof ann === 'object' && ann !== null && 'type' in ann && ann.type === 'url_citation'\n })\n\n if (hasUrlCitation) {\n return 1\n }\n }\n }\n }\n }\n }\n\n // Check for annotations in output[].content[] (OpenAI Responses API)\n if ('output' in result && Array.isArray(result.output)) {\n for (const item of result.output) {\n if (typeof item === 'object' && item !== null && 'content' in item) {\n const content = item.content\n\n if (Array.isArray(content)) {\n for (const contentItem of content) {\n if (typeof contentItem === 'object' && contentItem !== null && 'annotations' in contentItem) {\n const annotations = contentItem.annotations\n\n if (Array.isArray(annotations)) {\n const hasUrlCitation = annotations.some((ann: unknown) => {\n return typeof ann === 'object' && ann !== null && 'type' in ann && ann.type === 'url_citation'\n })\n\n if (hasUrlCitation) {\n return 1\n }\n }\n }\n }\n }\n }\n }\n }\n\n // Check for grounding_metadata (Gemini)\n if ('candidates' in result && Array.isArray(result.candidates)) {\n for (const candidate of result.candidates) {\n if (\n typeof candidate === 'object' &&\n candidate !== null &&\n 'grounding_metadata' in candidate &&\n candidate.grounding_metadata\n ) {\n return 1\n }\n }\n }\n\n return 0\n}\n\n/**\n * Extract available tool calls from the request parameters.\n * These are the tools provided to the LLM, not the tool calls in the response.\n */\nexport const extractAvailableToolCalls = (\n provider: string,\n params: any\n): ChatCompletionTool[] | AnthropicTool[] | GeminiTool[] | null => {\n if (provider === 'anthropic') {\n if (params.tools) {\n return params.tools\n }\n\n return null\n } else if (provider === 'gemini') {\n if (params.config && params.config.tools) {\n return params.config.tools\n }\n\n return null\n } else if (provider === 'openai') {\n if (params.tools) {\n return params.tools\n }\n\n return null\n } else if (provider === 'vercel') {\n if (params.tools) {\n return params.tools\n }\n\n return null\n }\n\n return null\n}\n\nexport enum AIEvent {\n Generation = '$ai_generation',\n Embedding = '$ai_embedding',\n}\n\nexport type SendEventToPosthogParams = {\n client: PostHog\n eventType?: AIEvent\n distinctId?: string\n traceId: string\n model?: string\n provider: string\n input: any\n output: any\n latency: number\n timeToFirstToken?: number\n baseURL: string\n httpStatus: number\n usage?: TokenUsage\n params: (\n | ChatCompletionCreateParamsBase\n | MessageCreateParams\n | ResponseCreateParams\n | ResponseCreateParamsWithTools\n | EmbeddingCreateParams\n | TranscriptionCreateParams\n ) &\n MonitoringParams\n error?: unknown\n exceptionId?: string\n tools?: ChatCompletionTool[] | AnthropicTool[] | GeminiTool[] | null\n captureImmediate?: boolean\n}\n\nfunction sanitizeValues(obj: any): any {\n if (obj === undefined || obj === null) {\n return obj\n }\n const jsonSafe = JSON.parse(JSON.stringify(obj))\n if (typeof jsonSafe === 'string') {\n // Sanitize lone surrogates by round-tripping through UTF-8\n return new TextDecoder().decode(new TextEncoder().encode(jsonSafe))\n } else if (Array.isArray(jsonSafe)) {\n return jsonSafe.map(sanitizeValues)\n } else if (jsonSafe && typeof jsonSafe === 'object') {\n return Object.fromEntries(Object.entries(jsonSafe).map(([k, v]) => [k, sanitizeValues(v)]))\n }\n return jsonSafe\n}\n\nconst POSTHOG_PARAMS_MAP: Record<keyof MonitoringParams, string> = {\n posthogDistinctId: 'distinctId',\n posthogTraceId: 'traceId',\n posthogProperties: 'properties',\n posthogPrivacyMode: 'privacyMode',\n posthogGroups: 'groups',\n posthogModelOverride: 'modelOverride',\n posthogProviderOverride: 'providerOverride',\n posthogCostOverride: 'costOverride',\n posthogCaptureImmediate: 'captureImmediate',\n}\n\nexport function extractPosthogParams<T>(body: T & MonitoringParams): {\n providerParams: T\n posthogParams: MonitoringEventPropertiesWithDefaults\n} {\n const providerParams: Record<string, unknown> = {}\n const posthogParams: Record<string, unknown> = {}\n\n for (const [key, value] of Object.entries(body)) {\n if (POSTHOG_PARAMS_MAP[key as keyof MonitoringParams]) {\n posthogParams[POSTHOG_PARAMS_MAP[key as keyof MonitoringParams]] = value\n } else if (key.startsWith('posthog')) {\n console.warn(`Unknown Posthog parameter ${key}`)\n } else {\n providerParams[key] = value\n }\n }\n\n return {\n providerParams: providerParams as T,\n posthogParams: addDefaults(posthogParams),\n }\n}\n\nfunction addDefaults(params: MonitoringEventProperties): MonitoringEventPropertiesWithDefaults {\n return {\n ...params,\n privacyMode: params.privacyMode ?? false,\n traceId: params.traceId ?? uuidv4(),\n }\n}\n\nexport const sendEventWithErrorToPosthog = async ({\n client,\n traceId,\n error,\n ...args\n}: Omit<SendEventToPosthogParams, 'error' | 'httpStatus'> &\n Required<Pick<SendEventToPosthogParams, 'error'>>): Promise<unknown> => {\n const httpStatus =\n error && typeof error === 'object' && 'status' in error ? ((error as { status?: number }).status ?? 500) : 500\n\n const properties = { client, traceId, httpStatus, error: JSON.stringify(error), ...args }\n const enrichedError = error as CoreErrorTracking.PreviouslyCapturedError\n\n if (client.options?.enableExceptionAutocapture) {\n // assign a uuid that can be used to link the trace and exception events\n const exceptionId = uuidv7()\n client.captureException(error, undefined, { $ai_trace_id: traceId }, exceptionId)\n enrichedError.__posthog_previously_captured_error = true\n properties.exceptionId = exceptionId\n }\n\n await sendEventToPosthog(properties)\n\n return enrichedError\n}\n\nexport const sendEventToPosthog = async ({\n client,\n eventType = AIEvent.Generation,\n distinctId,\n traceId,\n model,\n provider,\n input,\n output,\n latency,\n timeToFirstToken,\n baseURL,\n params,\n httpStatus = 200,\n usage = {},\n error,\n exceptionId,\n tools,\n captureImmediate = false,\n}: SendEventToPosthogParams): Promise<void> => {\n if (!client.capture) {\n return Promise.resolve()\n }\n // sanitize input and output for UTF-8 validity\n const safeInput = sanitizeValues(input)\n const safeOutput = sanitizeValues(output)\n const safeError = sanitizeValues(error)\n\n let errorData = {}\n if (error) {\n errorData = {\n $ai_is_error: true,\n $ai_error: safeError,\n $exception_event_id: exceptionId,\n }\n }\n let costOverrideData = {}\n if (params.posthogCostOverride) {\n const inputCostUSD = (params.posthogCostOverride.inputCost ?? 0) * (usage.inputTokens ?? 0)\n const outputCostUSD = (params.posthogCostOverride.outputCost ?? 0) * (usage.outputTokens ?? 0)\n costOverrideData = {\n $ai_input_cost_usd: inputCostUSD,\n $ai_output_cost_usd: outputCostUSD,\n $ai_total_cost_usd: inputCostUSD + outputCostUSD,\n }\n }\n\n const additionalTokenValues = {\n ...(usage.reasoningTokens ? { $ai_reasoning_tokens: usage.reasoningTokens } : {}),\n ...(usage.cacheReadInputTokens ? { $ai_cache_read_input_tokens: usage.cacheReadInputTokens } : {}),\n ...(usage.cacheCreationInputTokens ? { $ai_cache_creation_input_tokens: usage.cacheCreationInputTokens } : {}),\n ...(usage.webSearchCount ? { $ai_web_search_count: usage.webSearchCount } : {}),\n ...(usage.rawUsage ? { $ai_usage: usage.rawUsage } : {}),\n }\n\n const properties = {\n $ai_lib: 'posthog-ai',\n $ai_lib_version: version,\n $ai_provider: params.posthogProviderOverride ?? provider,\n $ai_model: params.posthogModelOverride ?? model,\n $ai_model_parameters: getModelParams(params),\n $ai_input: withPrivacyMode(client, params.posthogPrivacyMode ?? false, safeInput),\n $ai_output_choices: withPrivacyMode(client, params.posthogPrivacyMode ?? false, safeOutput),\n $ai_http_status: httpStatus,\n $ai_input_tokens: usage.inputTokens ?? 0,\n ...(usage.outputTokens !== undefined ? { $ai_output_tokens: usage.outputTokens } : {}),\n ...additionalTokenValues,\n $ai_latency: latency,\n ...(timeToFirstToken !== undefined ? { $ai_time_to_first_token: timeToFirstToken } : {}),\n $ai_trace_id: traceId,\n $ai_base_url: baseURL,\n ...params.posthogProperties,\n ...(distinctId ? {} : { $process_person_profile: false }),\n ...(tools ? { $ai_tools: tools } : {}),\n ...errorData,\n ...costOverrideData,\n }\n\n const event: EventMessage = {\n distinctId: distinctId ?? traceId,\n event: eventType,\n properties,\n groups: params.posthogGroups,\n }\n\n if (captureImmediate) {\n // await capture promise to send single event in serverless environments\n await client.captureImmediate(event)\n } else {\n client.capture(event)\n }\n\n return Promise.resolve()\n}\n\nexport function formatOpenAIResponsesInput(input: unknown, instructions?: string | null): FormattedMessage[] {\n const messages: FormattedMessage[] = []\n\n if (instructions) {\n messages.push({\n role: 'system',\n content: instructions,\n })\n }\n\n if (Array.isArray(input)) {\n for (const item of input) {\n if (typeof item === 'string') {\n messages.push({ role: 'user', content: item })\n } else if (item && typeof item === 'object') {\n const obj = item as Record<string, unknown>\n const role = isString(obj.role) ? obj.role : 'user'\n\n // Handle content properly - preserve structure for objects/arrays\n const content = obj.content ?? obj.text ?? item\n messages.push({ role, content: toContentString(content) })\n } else {\n messages.push({ role: 'user', content: toContentString(item) })\n }\n }\n } else if (typeof input === 'string') {\n messages.push({ role: 'user', content: input })\n } else if (input) {\n messages.push({ role: 'user', content: toContentString(input) })\n }\n\n return messages\n}\n","import { AIEvent, truncate } from '../../utils'\nimport { redactBase64DataUrl } from '../../sanitization'\nimport type { ReadableSpan } from '@opentelemetry/sdk-trace-base'\nimport type { PostHogSpanMapper, PostHogSpanMapperResult, UsageData } from '../types'\n\nconst OTEL_STATUS_ERROR = 2\nconst AI_TELEMETRY_METADATA_PREFIX = 'ai.telemetry.metadata.'\n\ntype InstrumentationInfo = {\n version?: unknown\n}\n\ntype ReadableSpanWithInstrumentation = ReadableSpan & {\n instrumentationScope?: InstrumentationInfo\n instrumentationLibrary?: InstrumentationInfo\n}\n\nfunction parseJsonValue<T>(value: unknown): T | null {\n if (value === undefined || value === null) {\n return null\n }\n if (typeof value !== 'string') {\n return value as T\n }\n try {\n return JSON.parse(value) as T\n } catch {\n return null\n }\n}\n\nfunction toNumber(value: unknown): number | undefined {\n if (typeof value === 'number' && Number.isFinite(value)) {\n return value\n }\n if (typeof value === 'string') {\n const parsed = Number(value)\n if (Number.isFinite(parsed)) {\n return parsed\n }\n }\n return undefined\n}\n\nfunction toStringValue(value: unknown): string | undefined {\n return typeof value === 'string' ? value : undefined\n}\n\nfunction toStringArray(value: unknown): string[] {\n if (!Array.isArray(value)) {\n return []\n }\n return value.filter((item): item is string => typeof item === 'string')\n}\n\nfunction toSafeBinaryData(value: unknown): string {\n const asString = typeof value === 'string' ? value : JSON.stringify(value ?? '')\n return truncate(redactBase64DataUrl(asString))\n}\n\nfunction toMimeType(value: unknown): string {\n return typeof value === 'string' && value.length > 0 ? value : 'application/octet-stream'\n}\n\nfunction getSpanLatencySeconds(span: ReadableSpan): number {\n const duration = span.duration\n if (!duration || !Array.isArray(duration) || duration.length !== 2) {\n return 0\n }\n const seconds = Number(duration[0]) || 0\n const nanos = Number(duration[1]) || 0\n return seconds + nanos / 1_000_000_000\n}\n\nfunction getOperationId(span: ReadableSpan): string {\n const attributes = span.attributes || {}\n const operationId = toStringValue(attributes['ai.operationId'])\n if (operationId) {\n return operationId\n }\n return span.name || ''\n}\n\nfunction isDoGenerateSpan(operationId: string): boolean {\n return operationId.endsWith('.doGenerate')\n}\n\nfunction isDoStreamSpan(operationId: string): boolean {\n return operationId.endsWith('.doStream')\n}\n\nfunction isDoEmbedSpan(operationId: string): boolean {\n return operationId.endsWith('.doEmbed')\n}\n\nfunction shouldMapAiSdkSpan(span: ReadableSpan): boolean {\n const operationId = getOperationId(span)\n return isDoGenerateSpan(operationId) || isDoStreamSpan(operationId) || isDoEmbedSpan(operationId)\n}\n\nfunction extractAiSdkTelemetryMetadata(attributes: Record<string, unknown>): Record<string, unknown> {\n const metadata: Record<string, unknown> = {}\n for (const [key, value] of Object.entries(attributes)) {\n if (key.startsWith(AI_TELEMETRY_METADATA_PREFIX)) {\n metadata[key.slice(AI_TELEMETRY_METADATA_PREFIX.length)] = value\n }\n }\n\n if (metadata.traceId && typeof metadata.traceId === 'string') {\n metadata.trace_id = metadata.traceId\n }\n\n return metadata\n}\n\nfunction mapPromptMessagesInput(attributes: Record<string, unknown>): Array<Record<string, unknown>> {\n const promptMessages = parseJsonValue<Array<Record<string, unknown>>>(attributes['ai.prompt.messages']) || []\n if (!Array.isArray(promptMessages)) {\n return []\n }\n\n return promptMessages.map((message) => {\n const role = typeof message?.role === 'string' ? message.role : 'user'\n const content = message?.content\n\n if (typeof content === 'string') {\n return {\n role,\n content: [{ type: 'text', text: truncate(content) }],\n }\n }\n\n if (Array.isArray(content)) {\n return {\n role,\n content: content.map((part) => {\n if (part && typeof part === 'object' && 'type' in part) {\n const typedPart = part as Record<string, unknown>\n if (typedPart.type === 'text' && typeof typedPart.text === 'string') {\n return {\n type: 'text',\n text: truncate(typedPart.text),\n }\n }\n return typedPart\n }\n return { type: 'text', text: truncate(String(part)) }\n }),\n }\n }\n\n return {\n role,\n content: [{ type: 'text', text: truncate(content) }],\n }\n })\n}\n\nfunction mapPromptInput(attributes: Record<string, unknown>, operationId: string): any {\n if (isDoEmbedSpan(operationId)) {\n if (attributes['ai.values'] !== undefined) {\n return attributes['ai.values']\n }\n return attributes['ai.value'] ?? null\n }\n\n const promptMessages = mapPromptMessagesInput(attributes)\n if (promptMessages.length > 0) {\n return promptMessages\n }\n\n if (attributes['ai.prompt'] !== undefined) {\n return [{ role: 'user', content: [{ type: 'text', text: truncate(attributes['ai.prompt']) }] }]\n }\n\n return []\n}\n\nfunction mapOutputPart(part: Record<string, unknown>): Record<string, unknown> | null {\n const partType = toStringValue(part.type)\n\n if (partType === 'text' && typeof part.text === 'string') {\n return { type: 'text', text: truncate(part.text) }\n }\n\n if (partType === 'tool-call') {\n const toolName = toStringValue(part.toolName) || toStringValue((part as any).function?.name) || ''\n const toolCallId = toStringValue(part.toolCallId) || toStringValue(part.id) || ''\n const input = 'input' in part ? part.input : (part as any).function?.arguments\n if (toolName) {\n return {\n type: 'tool-call',\n id: toolCallId,\n function: {\n name: toolName,\n arguments: typeof input === 'string' ? input : JSON.stringify(input ?? {}),\n },\n }\n }\n }\n\n if (partType === 'file') {\n const mediaType = toMimeType(part.mediaType ?? part.mimeType ?? part.contentType)\n const data = part.data ?? part.base64 ?? part.bytes ?? part.url ?? part.uri\n if (data !== undefined) {\n return {\n type: 'file',\n name: 'generated_file',\n mediaType,\n data: toSafeBinaryData(data),\n }\n }\n }\n\n if (partType === 'image') {\n const mediaType = toMimeType(part.mediaType ?? part.mimeType ?? part.contentType ?? 'image/unknown')\n const data =\n part.data ?? part.base64 ?? part.bytes ?? part.url ?? part.uri ?? (part as any).image ?? (part as any).image_url\n if (data !== undefined) {\n return {\n type: 'file',\n name: 'generated_file',\n mediaType,\n data: toSafeBinaryData(data),\n }\n }\n }\n\n const inlineData = (part as any).inlineData ?? (part as any).inline_data\n if (inlineData && typeof inlineData === 'object' && (inlineData as any).data !== undefined) {\n const mediaType = toMimeType((inlineData as any).mimeType ?? (inlineData as any).mime_type)\n return {\n type: 'file',\n name: 'generated_file',\n mediaType,\n data: toSafeBinaryData((inlineData as any).data),\n }\n }\n\n if (partType === 'object' && part.object !== undefined) {\n return {\n type: 'object',\n object: part.object,\n }\n }\n\n return null\n}\n\nfunction mapResponseMessagesOutput(attributes: Record<string, unknown>): Array<Record<string, unknown>> {\n const messagesRaw =\n parseJsonValue<Array<Record<string, unknown>>>(attributes['ai.response.messages']) ??\n parseJsonValue<Record<string, unknown>>(attributes['ai.response.message'])\n\n if (!messagesRaw) {\n return []\n }\n\n const messages = Array.isArray(messagesRaw) ? messagesRaw : [messagesRaw]\n const mappedMessages: Array<Record<string, unknown>> = []\n\n for (const message of messages) {\n if (!message || typeof message !== 'object') {\n continue\n }\n\n const role = toStringValue(message.role) || 'assistant'\n const content = (message as any).content\n\n if (typeof content === 'string') {\n mappedMessages.push({\n role,\n content: [{ type: 'text', text: truncate(content) }],\n })\n continue\n }\n\n if (Array.isArray(content)) {\n const parts = content\n .map((part) => (part && typeof part === 'object' ? mapOutputPart(part as Record<string, unknown>) : null))\n .filter((part): part is Record<string, unknown> => part !== null)\n if (parts.length > 0) {\n mappedMessages.push({\n role,\n content: parts,\n })\n }\n continue\n }\n }\n\n return mappedMessages\n}\n\nfunction mapTextToolObjectOutputParts(attributes: Record<string, unknown>): Array<Record<string, unknown>> {\n const responseText = toStringValue(attributes['ai.response.text']) || ''\n const toolCalls = parseJsonValue<Array<Record<string, unknown>>>(attributes['ai.response.toolCalls']) || []\n const responseObjectRaw = attributes['ai.response.object']\n const responseObject = parseJsonValue(responseObjectRaw)\n const contentParts: Array<Record<string, unknown>> = []\n\n if (responseText) {\n contentParts.push({ type: 'text', text: truncate(responseText) })\n }\n\n if (responseObjectRaw !== undefined) {\n contentParts.push({\n type: 'object',\n object: responseObject ?? responseObjectRaw,\n })\n }\n\n if (Array.isArray(toolCalls)) {\n for (const toolCall of toolCalls) {\n if (!toolCall || typeof toolCall !== 'object') {\n continue\n }\n\n const toolName = typeof toolCall.toolName === 'string' ? toolCall.toolName : ''\n const toolCallId = typeof toolCall.toolCallId === 'string' ? toolCall.toolCallId : ''\n if (!toolName) {\n continue\n }\n\n const input = 'input' in toolCall ? toolCall.input : {}\n contentParts.push({\n type: 'tool-call',\n id: toolCallId,\n function: {\n name: toolName,\n arguments: typeof input === 'string' ? input : JSON.stringify(input),\n },\n })\n }\n }\n\n return contentParts\n}\n\nfunction mapResponseFilesOutput(attributes: Record<string, unknown>): Array<Record<string, unknown>> {\n const responseFiles = parseJsonValue<Array<Record<string, unknown>>>(attributes['ai.response.files']) || []\n if (!Array.isArray(responseFiles)) {\n return []\n }\n\n const mapped: Array<Record<string, unknown>> = []\n for (const file of responseFiles) {\n if (!file || typeof file !== 'object') {\n continue\n }\n\n const mimeType = toMimeType(file.mimeType ?? file.mediaType ?? file.contentType)\n const data = file.data ?? file.base64 ?? file.bytes\n const url = typeof file.url === 'string' ? file.url : typeof file.uri === 'string' ? file.uri : undefined\n\n if (data !== undefined) {\n mapped.push({\n type: 'file',\n name: 'generated_file',\n mediaType: mimeType,\n data: toSafeBinaryData(data),\n })\n continue\n }\n\n if (url) {\n mapped.push({\n type: 'file',\n name: 'generated_file',\n mediaType: mimeType,\n data: truncate(url),\n })\n }\n }\n\n return mapped\n}\n\nfunction extractGeminiParts(providerMetadata: unknown): Array<Record<string, unknown>> {\n const parts: Array<Record<string, unknown>> = []\n const visit = (node: unknown): void => {\n if (!node || typeof node !== 'object') {\n return\n }\n if (Array.isArray(node)) {\n for (const item of node) {\n visit(item)\n }\n return\n }\n\n const objectNode = node as Record<string, unknown>\n const maybeParts = objectNode.parts\n if (Array.isArray(maybeParts)) {\n for (const part of maybeParts) {\n if (part && typeof part === 'object') {\n parts.push(part as Record<string, unknown>)\n }\n }\n }\n\n for (const value of Object.values(objectNode)) {\n visit(value)\n }\n }\n\n visit(providerMetadata)\n return parts\n}\n\nfunction mapProviderMetadataInlineDataOutput(providerMetadata: unknown): Array<Record<string, unknown>> {\n const parts = extractGeminiParts(providerMetadata)\n const mapped: Array<Record<string, unknown>> = []\n\n for (const part of parts) {\n const inlineData = part.inlineData ?? part.inline_data\n if (!inlineData || typeof inlineData !== 'object') {\n continue\n }\n\n const mimeType = toMimeType((inlineData as any).mimeType ?? (inlineData as any).mime_type)\n if ((inlineData as any).data === undefined) {\n continue\n }\n\n mapped.push({\n type: 'file',\n name: 'generated_file',\n mediaType: mimeType,\n data: toSafeBinaryData((inlineData as any).data),\n })\n }\n\n return mapped\n}\n\nfunction mapProviderMetadataTextOutput(providerMetadata: unknown): Array<Record<string, unknown>> {\n const parts = extractGeminiParts(providerMetadata)\n const mapped: Array<Record<string, unknown>> = []\n for (const part of parts) {\n if (typeof part.text === 'string' && part.text.length > 0) {\n mapped.push({\n type: 'text',\n text: truncate(part.text),\n })\n }\n }\n return mapped\n}\n\nfunction extractMediaBlocksFromUnknownNode(node: unknown): Array<Record<string, unknown>> {\n const mapped: Array<Record<string, unknown>> = []\n\n const visit = (value: unknown): void => {\n if (!value || typeof value !== 'object') {\n return\n }\n if (Array.isArray(value)) {\n for (const item of value) {\n visit(item)\n }\n return\n }\n\n const objectValue = value as Record<string, unknown>\n const inlineData = (objectValue as any).inlineData ?? (objectValue as any).inline_data\n if (inlineData && typeof inlineData === 'object' && (inlineData as any).data !== undefined) {\n const mediaType = toMimeType((inlineData as any).mimeType ?? (inlineData as any).mime_type)\n mapped.push({\n type: 'file',\n name: 'generated_file',\n mediaType,\n data: toSafeBinaryData((inlineData as any).data),\n })\n }\n\n if ((objectValue.type === 'file' || 'mediaType' in objectValue || 'mimeType' in objectValue) && objectValue.data) {\n const mediaType = toMimeType((objectValue as any).mediaType ?? (objectValue as any).mimeType)\n mapped.push({\n type: 'file',\n name: 'generated_file',\n mediaType,\n data: toSafeBinaryData(objectValue.data),\n })\n }\n\n for (const child of Object.values(objectValue)) {\n visit(child)\n }\n }\n\n visit(node)\n return mapped\n}\n\nfunction mapUnknownResponseAttributeMediaOutput(attributes: Record<string, unknown>): Array<Record<string, unknown>> {\n const mapped: Array<Record<string, unknown>> = []\n for (const [key, value] of Object.entries(attributes)) {\n if (!key.startsWith('ai.response.')) {\n continue\n }\n if (\n key === 'ai.response.text' ||\n key === 'ai.response.toolCalls' ||\n key === 'ai.response.object' ||\n key === 'ai.response.files' ||\n key === 'ai.response.message' ||\n key === 'ai.response.messages' ||\n key === 'ai.response.providerMetadata'\n ) {\n continue\n }\n\n const parsed = typeof value === 'string' ? (parseJsonValue(value) ?? value) : value\n mapped.push(...extractMediaBlocksFromUnknownNode(parsed))\n }\n return mapped\n}\n\nfunction mapGenericResponseAttributeMediaOutput(attributes: Record<string, unknown>): Array<Record<string, unknown>> {\n const mapped: Array<Record<string, unknown>> = []\n for (const [key, value] of Object.entries(attributes)) {\n if (\n !key.includes('response') ||\n key.startsWith('ai.response.') ||\n key === 'ai.response.providerMetadata' ||\n key.startsWith('ai.prompt.') ||\n key.startsWith('gen_ai.request.')\n ) {\n continue\n }\n\n const parsed = typeof value === 'string' ? parseJsonValue(value) : value\n if (parsed === null || parsed === undefined) {\n continue\n }\n mapped.push(...extractMediaBlocksFromUnknownNode(parsed))\n }\n return mapped\n}\n\nfunction dedupeContentParts(parts: Array<Record<string, unknown>>): Array<Record<string, unknown>> {\n const seen = new Set<string>()\n const deduped: Array<Record<string, unknown>> = []\n for (const part of parts) {\n const key = JSON.stringify(part)\n if (seen.has(key)) {\n continue\n }\n seen.add(key)\n deduped.push(part)\n }\n return deduped\n}\n\nfunction mapOutput(attributes: Record<string, unknown>, operationId: string, providerMetadata: unknown): any {\n if (isDoEmbedSpan(operationId)) {\n // Keep embedding behavior aligned with existing provider wrappers.\n return null\n }\n\n const responseMessages = mapResponseMessagesOutput(attributes)\n if (responseMessages.length > 0) {\n return responseMessages\n }\n\n const textToolObjectParts = mapTextToolObjectOutputParts(attributes)\n const responseFileParts = mapResponseFilesOutput(attributes)\n const unknownMediaParts = mapUnknownResponseAttributeMediaOutput(attributes)\n const genericResponseMediaParts = mapGenericResponseAttributeMediaOutput(attributes)\n const providerMetadataTextParts = mapProviderMetadataTextOutput(providerMetadata)\n const providerMetadataInlineParts = mapProviderMetadataInlineDataOutput(providerMetadata)\n\n const mergedContentParts = dedupeContentParts([\n ...textTool