UNPKG

@posthog/ai

Version:
1 lines 110 kB
{"version":3,"file":"index.mjs","sources":["../../src/typeGuards.ts","../../src/sanitization/base64_recognizer.ts","../../src/sanitization/media_type_context.ts","../../src/sanitization/binary_content_redactor.ts","../../src/sanitization.ts","../../src/utils.ts","../../src/serializeError.ts","../../src/captureAiGeneration.ts","../../src/vercel/middleware.ts"],"sourcesContent":["// Type guards for safer type checking\n\nexport const isString = (value: unknown): value is string => {\n return typeof value === 'string'\n}\n\nexport const isObject = (value: unknown): value is Record<string, unknown> => {\n return value !== null && typeof value === 'object' && !Array.isArray(value)\n}\n","const DATA_URL_PREFIX_RE = /^data:([^;,\\s]+)(?:;[^;,\\s]+)*;base64,/i\nconst BASE64_ALPHABET_RE = /^[A-Za-z0-9+/_=-]+$/\n\nexport type Base64Recognition = { kind: 'data-url'; mediaType: string } | { kind: 'raw' } | { kind: 'none' }\n\nexport class Base64Recognizer {\n recognize(value: string, minLength: number): Base64Recognition {\n const dataUrl = DATA_URL_PREFIX_RE.exec(value)\n if (dataUrl) return { kind: 'data-url', mediaType: dataUrl[1] }\n\n if (value.length < minLength) return { kind: 'none' }\n\n const confidencePrefix = value.slice(0, minLength)\n if (BASE64_ALPHABET_RE.test(confidencePrefix)) {\n return { kind: 'raw' }\n } else {\n return { kind: 'none' }\n }\n }\n}\n","const MIME_HINT_KEYS = ['mediaType', 'media_type', 'mimeType', 'mime_type'] as const\n\nconst STRONG_CONTEXT_KEYS = new Set([\n 'data',\n 'file_data',\n 'fileData',\n 'image_url',\n 'imageUrl',\n 'video_url',\n 'videoUrl',\n 'audio',\n 'audio_data',\n 'audioData',\n 'inline_data',\n 'inlineData',\n 'source',\n 'result',\n])\n\nconst STRONG_CONTEXT_TYPES = new Set([\n 'image',\n 'image_url',\n 'input_image',\n 'audio',\n 'input_audio',\n 'video',\n 'video_url',\n 'file',\n 'input_file',\n 'document',\n 'media',\n 'file-data',\n])\n\nconst FILE_FAMILY_TYPES = new Set(['file', 'input_file', 'document', 'media', 'file-data'])\n\nconst KNOWN_AUDIO_FORMATS = new Set(['wav', 'mp3', 'ogg', 'flac', 'm4a', 'aac', 'webm'])\n\nexport class MediaTypeContext {\n static readonly EMPTY = new MediaTypeContext(undefined, undefined)\n\n constructor(\n private readonly parent: Record<string, unknown> | undefined,\n private readonly key: string | undefined\n ) {}\n\n inferMediaType(): string | undefined {\n return (\n this.inferFromSiblingMime() ?? this.inferFromSiblingFormat() ?? this.inferFromParentType() ?? this.inferFromKey()\n )\n }\n\n inferFromSiblingMime(): string | undefined {\n if (!this.parent) return undefined\n for (const hint of MIME_HINT_KEYS) {\n const v = this.parent[hint]\n if (typeof v === 'string') return v\n }\n return undefined\n }\n\n inferFromSiblingFormat(): string | undefined {\n if (!this.parent) return undefined\n const fmt = this.parent.format\n if (typeof fmt === 'string' && KNOWN_AUDIO_FORMATS.has(fmt.toLowerCase())) {\n return `audio/${fmt.toLowerCase()}`\n }\n return undefined\n }\n\n inferFromParentType(): string | undefined {\n if (!this.parent) return undefined\n const t = this.parent.type\n if (typeof t !== 'string') return undefined\n if (t === 'image' || t === 'image_url' || t === 'input_image') return 'image'\n if (t === 'audio' || t === 'input_audio') return 'audio'\n if (t === 'video' || t === 'video_url') return 'video'\n if (FILE_FAMILY_TYPES.has(t)) return 'application/octet-stream'\n return undefined\n }\n\n inferFromKey(): string | undefined {\n if (!this.key) return undefined\n const key = this.key.toLowerCase()\n if (key.includes('audio')) return 'audio'\n if (key.includes('video')) return 'video'\n if (key.includes('image')) return 'image'\n if (key.includes('file') || key.includes('document')) return 'application/octet-stream'\n return undefined\n }\n\n signalsBinary(): boolean {\n if (this.parent) {\n for (const hint of MIME_HINT_KEYS) {\n if (typeof this.parent[hint] === 'string') return true\n }\n const fmt = this.parent.format\n if (typeof fmt === 'string' && KNOWN_AUDIO_FORMATS.has(fmt.toLowerCase())) return true\n const t = this.parent.type\n if (typeof t === 'string' && STRONG_CONTEXT_TYPES.has(t)) return true\n }\n if (this.key && STRONG_CONTEXT_KEYS.has(this.key)) return true\n return false\n }\n}\n","import { Base64Recognizer } from './base64_recognizer'\nimport { MediaTypeContext } from './media_type_context'\n\nconst STRONG_CONTEXT_MIN_LENGTH = 64\nconst WEAK_CONTEXT_MIN_LENGTH = 1024\n\nexport class BinaryContentRedactor {\n private visited: WeakSet<object> = new WeakSet()\n\n constructor(private readonly recognizer: Base64Recognizer = new Base64Recognizer()) {}\n\n redact<T>(value: T): T\n redact(value: unknown): unknown {\n if (this.isMultimodalEnabled()) return value\n this.visited = new WeakSet()\n return this.walk(value, MediaTypeContext.EMPTY)\n }\n\n private walk(value: unknown, ctx: MediaTypeContext): unknown {\n if (value === null || value === undefined) return value\n if (typeof value === 'string') return this.redactString(value, ctx)\n if (typeof value !== 'object') return value\n\n // Buffer extends Uint8Array, so this branch catches both.\n if (typeof Uint8Array !== 'undefined' && value instanceof Uint8Array) {\n return this.placeholderFor(ctx.inferMediaType())\n }\n\n if (this.visited.has(value)) return null\n this.visited.add(value)\n\n if (Array.isArray(value)) {\n return value.map((item) => this.walk(item, ctx))\n }\n\n const obj = value as Record<string, unknown>\n const out: Record<string, unknown> = {}\n for (const k of Object.keys(obj)) {\n out[k] = this.walk(obj[k], new MediaTypeContext(obj, k))\n }\n return out\n }\n\n private redactString(value: string, ctx: MediaTypeContext): string {\n const minLength = ctx.signalsBinary() ? STRONG_CONTEXT_MIN_LENGTH : WEAK_CONTEXT_MIN_LENGTH\n const recognition = this.recognizer.recognize(value, minLength)\n switch (recognition.kind) {\n case 'data-url':\n return this.placeholderFor(recognition.mediaType)\n case 'raw':\n return this.placeholderFor(ctx.inferMediaType())\n case 'none':\n return value\n }\n }\n\n private placeholderFor(mediaType?: string): string {\n if (!mediaType) return '[base64 redacted]'\n if (mediaType === 'application/octet-stream') return '[base64 file redacted]'\n return `[base64 ${mediaType} redacted]`\n }\n\n private isMultimodalEnabled(): boolean {\n const val = process.env._INTERNAL_LLMA_MULTIMODAL || ''\n return val.toLowerCase() === 'true' || val === '1' || val.toLowerCase() === 'yes'\n }\n}\n","import { BinaryContentRedactor } from './sanitization/binary_content_redactor'\n\nconst redactor = new BinaryContentRedactor()\n\nexport function redactBase64DataUrl(str: string): string\nexport function redactBase64DataUrl(str: unknown): unknown\nexport function redactBase64DataUrl(str: unknown): unknown {\n return redactor.redact(str)\n}\n\nexport const sanitizeOpenAI = (data: unknown): unknown => redactor.redact(data)\nexport const sanitizeOpenAIResponse = (data: unknown): unknown => redactor.redact(data)\nexport const sanitizeAnthropic = (data: unknown): unknown => redactor.redact(data)\nexport const sanitizeGemini = (data: unknown): unknown => redactor.redact(data)\nexport const sanitizeLangChain = (data: unknown): unknown => redactor.redact(data)\n","import { PostHog } from 'posthog-node'\nimport OpenAIOrignal from 'openai'\nimport AnthropicOriginal from '@anthropic-ai/sdk'\nimport type { ChatCompletionTool } from 'openai/resources/chat/completions'\nimport type { ResponseCreateParamsWithTools } from 'openai/lib/ResponsesParser'\nimport type { Tool as GeminiTool } from '@google/genai'\nimport type {\n FormattedMessage,\n FormattedContent,\n FormattedAudioContent,\n FormattedImageContent,\n FormattedDocumentContent,\n} from './types'\nimport { v4 as uuidv4 } from 'uuid'\nimport { isString } from './typeGuards'\nimport { redactBase64DataUrl } from './sanitization'\n\ntype ChatCompletionCreateParamsBase = OpenAIOrignal.Chat.Completions.ChatCompletionCreateParams\ntype MessageCreateParams = AnthropicOriginal.Messages.MessageCreateParams\ntype ResponseCreateParams = OpenAIOrignal.Responses.ResponseCreateParams\ntype EmbeddingCreateParams = OpenAIOrignal.EmbeddingCreateParams\ntype TranscriptionCreateParams = OpenAIOrignal.Audio.Transcriptions.TranscriptionCreateParams\ntype AnthropicTool = AnthropicOriginal.Tool\n\nconst TOKEN_PROPERTY_KEYS = new Set([\n '$ai_input_tokens',\n '$ai_output_tokens',\n '$ai_cache_read_input_tokens',\n '$ai_cache_creation_input_tokens',\n '$ai_total_tokens',\n '$ai_reasoning_tokens',\n])\n\nexport function getTokensSource(posthogProperties?: Record<string, unknown>): string {\n if (posthogProperties && Object.keys(posthogProperties).some((key) => TOKEN_PROPERTY_KEYS.has(key))) {\n return 'passthrough'\n }\n return 'sdk'\n}\n\n// limit large outputs by truncating to 200kb (approx 200k bytes)\nexport const MAX_OUTPUT_SIZE = 200000\nconst STRING_FORMAT = 'utf8'\n\n// Reused across calls to avoid per-invocation allocation; truncate() runs\n// hundreds of times for prompts with many parts.\nconst sharedTextEncoder = new TextEncoder()\nconst sharedTextDecoder = new TextDecoder(STRING_FORMAT, { fatal: false })\n\nexport const utf8ByteLength = (str: string): number => sharedTextEncoder.encode(str).byteLength\n\n/**\n * Safely converts content to a string, preserving structure for objects/arrays.\n * - If content is already a string, returns it as-is\n * - If content is an object or array, stringifies it with JSON.stringify to preserve structure\n * - Otherwise, converts to string with String()\n *\n * This prevents the \"[object Object]\" bug when objects are naively converted to strings.\n *\n * @param content - The content to convert to a string\n * @returns A string representation that preserves structure for complex types\n */\nexport function toContentString(content: unknown): string {\n if (typeof content === 'string') {\n return content\n }\n if (content !== undefined && content !== null && typeof content === 'object') {\n try {\n return JSON.stringify(content)\n } catch {\n // Fallback for circular refs, BigInt, or objects with throwing toJSON\n return String(content)\n }\n }\n return String(content)\n}\n\nexport interface MonitoringEventPropertiesWithDefaults {\n distinctId?: string\n traceId: string\n properties?: Record<string, any>\n privacyMode: boolean\n groups?: Record<string, any>\n modelOverride?: string\n providerOverride?: string\n costOverride?: CostOverride\n captureImmediate?: boolean\n}\n\nexport type MonitoringEventProperties = Partial<MonitoringEventPropertiesWithDefaults>\n\nexport type MonitoringParams = {\n [K in keyof MonitoringEventProperties as `posthog${Capitalize<string & K>}`]: MonitoringEventProperties[K]\n}\n\nexport interface CostOverride {\n inputCost: number\n outputCost: number\n}\n\nexport const getModelParams = (\n params:\n | ((\n | ChatCompletionCreateParamsBase\n | MessageCreateParams\n | ResponseCreateParams\n | ResponseCreateParamsWithTools\n | EmbeddingCreateParams\n | TranscriptionCreateParams\n ) &\n MonitoringParams)\n | null\n): Record<string, any> => {\n if (!params) {\n return {}\n }\n const modelParams: Record<string, any> = {}\n const paramKeys = [\n 'temperature',\n 'max_tokens',\n 'max_completion_tokens',\n 'top_p',\n 'frequency_penalty',\n 'presence_penalty',\n 'n',\n 'stop',\n 'stream',\n 'streaming',\n 'language',\n 'response_format',\n 'timestamp_granularities',\n ] as const\n\n for (const key of paramKeys) {\n if (key in params && (params as any)[key] !== undefined) {\n modelParams[key] = (params as any)[key]\n }\n }\n return modelParams\n}\n\n/**\n * Helper to format responses (non-streaming) for consumption\n */\nexport const formatResponse = (response: any, provider: string): FormattedMessage[] => {\n if (!response) {\n return []\n }\n if (provider === 'anthropic') {\n return formatResponseAnthropic(response)\n } else if (provider === 'openai') {\n return formatResponseOpenAI(response)\n } else if (provider === 'gemini') {\n return formatResponseGemini(response)\n }\n return []\n}\n\nexport const formatResponseAnthropic = (response: any): FormattedMessage[] => {\n const output: FormattedMessage[] = []\n const content: FormattedContent = []\n\n for (const choice of response.content ?? []) {\n if (choice?.type === 'text' && choice?.text) {\n content.push({ type: 'text', text: choice.text })\n } else if (choice?.type === 'tool_use' && choice?.name && choice?.id) {\n content.push({\n type: 'function',\n id: choice.id,\n function: {\n name: choice.name,\n arguments: choice.input || {},\n },\n })\n }\n }\n\n if (content.length > 0) {\n output.push({\n role: 'assistant',\n content,\n })\n }\n\n return output\n}\n\nexport const formatResponseOpenAI = (response: any): FormattedMessage[] => {\n const output: FormattedMessage[] = []\n\n if (response.choices) {\n for (const choice of response.choices) {\n const content: FormattedContent = []\n let role = 'assistant'\n\n if (choice.message) {\n if (choice.message.role) {\n role = choice.message.role\n }\n\n if (choice.message.content) {\n content.push({ type: 'text', text: choice.message.content })\n }\n\n if (choice.message.tool_calls) {\n for (const toolCall of choice.message.tool_calls) {\n content.push({\n type: 'function',\n id: toolCall.id,\n function: {\n name: toolCall.function.name,\n arguments: toolCall.function.arguments,\n },\n })\n }\n }\n\n // Handle audio output (gpt-4o-audio-preview)\n if (choice.message.audio) {\n content.push({\n type: 'audio',\n ...choice.message.audio,\n })\n }\n }\n\n if (content.length > 0) {\n output.push({\n role,\n content,\n })\n }\n }\n }\n\n // Handle Responses API format\n if (response.output) {\n const content: FormattedContent = []\n let role = 'assistant'\n\n for (const item of response.output) {\n if (item.type === 'message') {\n role = item.role\n\n if (item.content && Array.isArray(item.content)) {\n for (const contentItem of item.content) {\n if (contentItem.type === 'output_text' && contentItem.text) {\n content.push({ type: 'text', text: contentItem.text })\n } else if (contentItem.text) {\n content.push({ type: 'text', text: contentItem.text })\n } else if (contentItem.type === 'input_image' && contentItem.image_url) {\n content.push({\n type: 'image',\n image: contentItem.image_url,\n })\n }\n }\n } else if (item.content) {\n content.push({ type: 'text', text: String(item.content) })\n }\n } else if (item.type === 'function_call') {\n content.push({\n type: 'function',\n id: item.call_id || item.id || '',\n function: {\n name: item.name,\n arguments: item.arguments || {},\n },\n })\n }\n }\n\n if (content.length > 0) {\n output.push({\n role,\n content,\n })\n }\n }\n\n return output\n}\n\nexport const buildInlineDataBlock = (\n mimeType: string,\n data: string\n): FormattedAudioContent | FormattedImageContent | FormattedDocumentContent => {\n if (mimeType.startsWith('audio/')) {\n return { type: 'audio', mime_type: mimeType, data }\n }\n if (mimeType.startsWith('image/')) {\n return { type: 'image', inline_data: { mime_type: mimeType, data } }\n }\n return { type: 'document', inline_data: { mime_type: mimeType, data } }\n}\n\nexport const formatResponseGemini = (response: any): FormattedMessage[] => {\n const output: FormattedMessage[] = []\n\n if (response.candidates && Array.isArray(response.candidates)) {\n for (const candidate of response.candidates) {\n if (candidate.content && candidate.content.parts) {\n const content: FormattedContent = []\n\n for (const part of candidate.content.parts) {\n if (part.text) {\n content.push({ type: 'text', text: part.text })\n } else if (part.functionCall) {\n content.push({\n type: 'function',\n function: {\n name: part.functionCall.name,\n arguments: part.functionCall.args,\n },\n })\n } else if (part.inlineData) {\n // Handle inline data (images, audio, documents)\n const mimeType = part.inlineData.mimeType || part.inlineData.mime_type || 'application/octet-stream'\n let data = part.inlineData.data\n\n // Handle binary data (Uint8Array/Buffer -> base64)\n if (data instanceof Uint8Array) {\n if (typeof Buffer !== 'undefined') {\n data = Buffer.from(data).toString('base64')\n } else {\n let binary = ''\n for (let i = 0; i < data.length; i++) {\n binary += String.fromCharCode(data[i])\n }\n data = btoa(binary)\n }\n }\n\n // Sanitize base64 data for images and other large inline data\n data = redactBase64DataUrl(data)\n\n content.push(buildInlineDataBlock(mimeType, data))\n }\n }\n\n if (content.length > 0) {\n output.push({\n role: 'assistant',\n content,\n })\n }\n } else if (candidate.text) {\n output.push({\n role: 'assistant',\n content: [{ type: 'text', text: candidate.text }],\n })\n }\n }\n } else if (response.text) {\n output.push({\n role: 'assistant',\n content: [{ type: 'text', text: response.text }],\n })\n }\n\n return output\n}\n\nexport const mergeSystemPrompt = (params: MessageCreateParams & MonitoringParams, provider: string): any => {\n if (provider == 'anthropic') {\n const messages = params.messages || []\n if (!(params as any).system) {\n return messages\n }\n const systemMessage = (params as any).system\n return [{ role: 'system', content: systemMessage }, ...messages]\n }\n return params.messages\n}\n\nexport const withPrivacyMode = (client: PostHog, privacyMode: boolean, input: any): any => {\n return (client as any).privacy_mode || privacyMode ? null : input\n}\n\nfunction toSafeString(input: unknown): string {\n if (input === undefined || input === null) {\n return ''\n }\n if (typeof input === 'string') {\n return input\n }\n try {\n return JSON.stringify(input)\n } catch {\n console.warn('Failed to stringify input', input)\n return ''\n }\n}\n\nexport const truncate = (input: unknown): string => {\n const str = toSafeString(input)\n if (str === '') {\n return ''\n }\n\n // Check if we need to truncate and ensure STRING_FORMAT is respected\n const buffer = sharedTextEncoder.encode(str)\n if (buffer.length <= MAX_OUTPUT_SIZE) {\n // Ensure STRING_FORMAT is respected\n return sharedTextDecoder.decode(buffer)\n }\n\n // Truncate the buffer and ensure a valid string is returned.\n // fatal: false means we get U+FFFD at the end if truncation broke the encoding.\n const truncatedBuffer = buffer.slice(0, MAX_OUTPUT_SIZE)\n let truncatedStr = sharedTextDecoder.decode(truncatedBuffer)\n if (truncatedStr.endsWith('\\uFFFD')) {\n truncatedStr = truncatedStr.slice(0, -1)\n }\n return `${truncatedStr}... [truncated]`\n}\n\n/**\n * Calculate web search count from raw API response.\n *\n * Uses a two-tier detection strategy:\n * Priority 1 (Exact Count): Count actual web search calls when available\n * Priority 2 (Binary Detection): Return 1 if web search indicators are present, 0 otherwise\n *\n * @param result - Raw API response from any provider (OpenAI, Perplexity, OpenRouter, Gemini, etc.)\n * @returns Number of web searches performed (exact count or binary 1/0)\n */\nexport function calculateWebSearchCount(result: unknown): number {\n if (!result || typeof result !== 'object') {\n return 0\n }\n\n // Priority 1: Exact Count\n // Check for OpenAI Responses API web_search_call items\n if ('output' in result && Array.isArray(result.output)) {\n let count = 0\n\n for (const item of result.output) {\n if (typeof item === 'object' && item !== null && 'type' in item && item.type === 'web_search_call') {\n count++\n }\n }\n\n if (count > 0) {\n return count\n }\n }\n\n // Priority 2: Binary Detection (1 or 0)\n\n // Check for citations at root level (Perplexity)\n if ('citations' in result && Array.isArray(result.citations) && result.citations.length > 0) {\n return 1\n }\n\n // Check for search_results at root level (Perplexity via OpenRouter)\n if ('search_results' in result && Array.isArray(result.search_results) && result.search_results.length > 0) {\n return 1\n }\n\n // Check for usage.search_context_size (Perplexity via OpenRouter)\n if ('usage' in result && typeof result.usage === 'object' && result.usage !== null) {\n if ('search_context_size' in result.usage && result.usage.search_context_size) {\n return 1\n }\n }\n\n // Check for annotations with url_citation in choices[].message or choices[].delta (OpenAI/Perplexity)\n if ('choices' in result && Array.isArray(result.choices)) {\n for (const choice of result.choices) {\n if (typeof choice === 'object' && choice !== null) {\n // Check both message (non-streaming) and delta (streaming) for annotations\n const content = ('message' in choice ? choice.message : null) || ('delta' in choice ? choice.delta : null)\n\n if (typeof content === 'object' && content !== null && 'annotations' in content) {\n const annotations = content.annotations\n\n if (Array.isArray(annotations)) {\n const hasUrlCitation = annotations.some((ann: unknown) => {\n return typeof ann === 'object' && ann !== null && 'type' in ann && ann.type === 'url_citation'\n })\n\n if (hasUrlCitation) {\n return 1\n }\n }\n }\n }\n }\n }\n\n // Check for annotations in output[].content[] (OpenAI Responses API)\n if ('output' in result && Array.isArray(result.output)) {\n for (const item of result.output) {\n if (typeof item === 'object' && item !== null && 'content' in item) {\n const content = item.content\n\n if (Array.isArray(content)) {\n for (const contentItem of content) {\n if (typeof contentItem === 'object' && contentItem !== null && 'annotations' in contentItem) {\n const annotations = contentItem.annotations\n\n if (Array.isArray(annotations)) {\n const hasUrlCitation = annotations.some((ann: unknown) => {\n return typeof ann === 'object' && ann !== null && 'type' in ann && ann.type === 'url_citation'\n })\n\n if (hasUrlCitation) {\n return 1\n }\n }\n }\n }\n }\n }\n }\n }\n\n // Check for grounding_metadata (Gemini)\n if ('candidates' in result && Array.isArray(result.candidates)) {\n for (const candidate of result.candidates) {\n if (\n typeof candidate === 'object' &&\n candidate !== null &&\n 'grounding_metadata' in candidate &&\n candidate.grounding_metadata\n ) {\n return 1\n }\n }\n }\n\n return 0\n}\n\n/**\n * Extract available tool calls from the request parameters.\n * These are the tools provided to the LLM, not the tool calls in the response.\n */\nexport const extractAvailableToolCalls = (\n provider: string,\n params: any\n): ChatCompletionTool[] | AnthropicTool[] | GeminiTool[] | null => {\n if (provider === 'anthropic') {\n if (params.tools) {\n return params.tools\n }\n\n return null\n } else if (provider === 'gemini') {\n if (params.config && params.config.tools) {\n return params.config.tools\n }\n\n return null\n } else if (provider === 'openai') {\n if (params.tools) {\n return params.tools\n }\n\n return null\n } else if (provider === 'vercel') {\n if (params.tools) {\n return params.tools\n }\n\n return null\n }\n\n return null\n}\n\nexport enum AIEvent {\n Generation = '$ai_generation',\n Embedding = '$ai_embedding',\n}\n\nexport function sanitizeValues(obj: any): any {\n if (obj === undefined || obj === null) {\n return obj\n }\n const jsonSafe = JSON.parse(JSON.stringify(obj))\n if (typeof jsonSafe === 'string') {\n // Sanitize lone surrogates by round-tripping through UTF-8\n return new TextDecoder().decode(new TextEncoder().encode(jsonSafe))\n } else if (Array.isArray(jsonSafe)) {\n return jsonSafe.map(sanitizeValues)\n } else if (jsonSafe && typeof jsonSafe === 'object') {\n return Object.fromEntries(Object.entries(jsonSafe).map(([k, v]) => [k, sanitizeValues(v)]))\n }\n return jsonSafe\n}\n\nconst POSTHOG_PARAMS_MAP: Record<keyof MonitoringParams, string> = {\n posthogDistinctId: 'distinctId',\n posthogTraceId: 'traceId',\n posthogProperties: 'properties',\n posthogPrivacyMode: 'privacyMode',\n posthogGroups: 'groups',\n posthogModelOverride: 'modelOverride',\n posthogProviderOverride: 'providerOverride',\n posthogCostOverride: 'costOverride',\n posthogCaptureImmediate: 'captureImmediate',\n}\n\nexport function extractPosthogParams<T>(body: T & MonitoringParams): {\n providerParams: T\n posthogParams: MonitoringEventPropertiesWithDefaults\n} {\n const providerParams: Record<string, unknown> = {}\n const posthogParams: Record<string, unknown> = {}\n\n for (const [key, value] of Object.entries(body)) {\n if (POSTHOG_PARAMS_MAP[key as keyof MonitoringParams]) {\n posthogParams[POSTHOG_PARAMS_MAP[key as keyof MonitoringParams]] = value\n } else if (key.startsWith('posthog')) {\n console.warn(`Unknown Posthog parameter ${key}`)\n } else {\n providerParams[key] = value\n }\n }\n\n return {\n providerParams: providerParams as T,\n posthogParams: addDefaults(posthogParams),\n }\n}\n\nfunction addDefaults(params: MonitoringEventProperties): MonitoringEventPropertiesWithDefaults {\n return {\n ...params,\n privacyMode: params.privacyMode ?? false,\n traceId: params.traceId ?? uuidv4(),\n }\n}\n\nexport function formatOpenAIResponsesInput(input: unknown, instructions?: string | null): FormattedMessage[] {\n const messages: FormattedMessage[] = []\n\n if (instructions) {\n messages.push({\n role: 'system',\n content: instructions,\n })\n }\n\n if (Array.isArray(input)) {\n for (const item of input) {\n if (typeof item === 'string') {\n messages.push({ role: 'user', content: item })\n } else if (item && typeof item === 'object') {\n const obj = item as Record<string, unknown>\n const role = isString(obj.role) ? obj.role : 'user'\n\n // Handle content properly - preserve structure for objects/arrays\n const content = obj.content ?? obj.text ?? item\n messages.push({ role, content: toContentString(content) })\n } else {\n messages.push({ role: 'user', content: toContentString(item) })\n }\n }\n } else if (typeof input === 'string') {\n messages.push({ role: 'user', content: input })\n } else if (input) {\n messages.push({ role: 'user', content: toContentString(input) })\n }\n\n return messages\n}\n","import { sanitizeValues } from './utils'\n\nconst DEFAULT_MAX_DEPTH = 3\nconst MAX_STACK_LINES = 20\n\nexport function serializeError(value: unknown, depth = DEFAULT_MAX_DEPTH): unknown {\n if (depth < 0 || value === null || typeof value !== 'object') {\n return value\n }\n if (value instanceof Error) {\n const out: Record<string, unknown> = {\n name: value.name,\n message: value.message,\n stack: truncateStack(value.stack),\n }\n for (const key of Object.keys(value)) {\n out[key] = serializeError((value as unknown as Record<string, unknown>)[key], depth - 1)\n }\n if (value.cause !== undefined) {\n out.cause = serializeError(value.cause, depth - 1)\n }\n return out\n }\n if (Array.isArray(value)) {\n return value.map((item) => serializeError(item, depth - 1))\n }\n return value\n}\n\nexport function stringifyError(error: unknown): string {\n try {\n return JSON.stringify(sanitizeValues(serializeError(error)))\n } catch {\n if (error instanceof Error) {\n return JSON.stringify({ name: error.name, message: error.message })\n }\n return JSON.stringify({ message: String(error) })\n }\n}\n\nfunction truncateStack(stack: string | undefined): string | undefined {\n if (!stack) {\n return stack\n }\n const lines = stack.split('\\n')\n if (lines.length <= MAX_STACK_LINES) {\n return stack\n }\n return [...lines.slice(0, MAX_STACK_LINES), '... (truncated)'].join('\\n')\n}\n","import { EventMessage, PostHog } from 'posthog-node'\nimport type { ChatCompletionTool } from 'openai/resources/chat/completions'\nimport type { Tool as GeminiTool } from '@google/genai'\nimport AnthropicOriginal from '@anthropic-ai/sdk'\nimport { v4 as uuidv4 } from 'uuid'\nimport { uuidv7, ErrorTracking as CoreErrorTracking } from '@posthog/core'\nimport { version } from '../package.json'\nimport type { TokenUsage } from './types'\nimport { stringifyError } from './serializeError'\nimport { AIEvent, CostOverride, getTokensSource, sanitizeValues, withPrivacyMode } from './utils'\n\ntype AnthropicTool = AnthropicOriginal.Tool\n\n/**\n * Options for `captureAiGeneration`. Mirrors the `$ai_generation` event shape\n * directly so that any caller — first-party SDK wrappers and external code\n * alike — produces an identical event.\n */\nexport interface CaptureAiGenerationOptions {\n distinctId?: string\n /** Auto-generated when omitted. */\n traceId?: string\n /** Defaults to `$ai_generation`. */\n eventType?: AIEvent\n\n /** Required for the event to be useful, but accepted as optional so SDK wrappers can pass through whatever they detect. */\n model?: string\n provider: string\n input: unknown\n output: unknown\n\n /** Maps to `$ai_model_parameters` (temperature, max_tokens, top_p, …). */\n modelParameters?: Record<string, unknown>\n\n baseURL?: string\n httpStatus?: number\n /** Wall-clock latency in seconds. */\n latency?: number\n /** Time from request start to the first streamed token, in seconds. */\n timeToFirstToken?: number\n\n usage?: TokenUsage\n\n /** Extra event properties merged into the captured event. */\n properties?: Record<string, unknown>\n /** Mapping of group type to group id, matching `EventMessage.groups`. */\n groups?: Record<string, string | number>\n privacyMode?: boolean\n\n /**\n * For SDK wrappers: overrides the auto-detected model. External callers\n * should pass `model` directly instead.\n */\n modelOverride?: string\n /**\n * For SDK wrappers: overrides the auto-detected provider. External callers\n * should pass `provider` directly instead.\n */\n providerOverride?: string\n costOverride?: CostOverride\n\n tools?: ChatCompletionTool[] | AnthropicTool[] | GeminiTool[] | null\n stopReason?: string\n /** When set, the event is captured as an error. */\n error?: unknown\n\n /** Awaits delivery instead of batching. Useful in serverless environments. */\n captureImmediate?: boolean\n}\n\n/**\n * Capture an `$ai_generation` (or `$ai_embedding`) event to PostHog.\n *\n * This is the canonical primitive that every `@posthog/ai` wrapper\n * (`withTracing`, `OpenAI`, `Anthropic`, `GoogleGenAI`, …) funnels through, so\n * external code can use it directly to instrument LLM calls made through\n * arbitrary clients (Cloudflare Workers AI, custom HTTP, etc.) and get the\n * same events the SDK wrappers produce.\n *\n * When `error` is set, the event is captured as an error. If the error is an\n * object, it is mutated in place to set `__posthog_previously_captured_error`\n * so callers can re-throw the original error reference safely.\n */\nexport const captureAiGeneration = async (client: PostHog, options: CaptureAiGenerationOptions): Promise<void> => {\n if (!client.capture) {\n return\n }\n\n const traceId = options.traceId ?? uuidv4()\n const eventType = options.eventType ?? AIEvent.Generation\n const privacyMode = options.privacyMode ?? false\n const usage = options.usage ?? {}\n\n const safeInput = sanitizeValues(options.input)\n const safeOutput = sanitizeValues(options.output)\n\n let httpStatus = options.httpStatus\n let errorData: Record<string, unknown> = {}\n if (options.error) {\n if (httpStatus === undefined) {\n if (typeof options.error === 'object' && 'status' in options.error && typeof options.error.status === 'number') {\n httpStatus = options.error.status\n } else {\n httpStatus = 500\n }\n }\n\n let exceptionId: string | undefined\n if (client.options?.enableExceptionAutocapture) {\n exceptionId = uuidv7()\n client.captureException(options.error, undefined, { $ai_trace_id: traceId }, exceptionId)\n if (typeof options.error === 'object') {\n ;(options.error as CoreErrorTracking.PreviouslyCapturedError).__posthog_previously_captured_error = true\n }\n }\n\n errorData = {\n $ai_is_error: true,\n $ai_error: stringifyError(options.error),\n $exception_event_id: exceptionId,\n }\n }\n httpStatus = httpStatus ?? 200\n\n let costOverrideData: Record<string, number> = {}\n if (options.costOverride) {\n const inputCostUSD = (options.costOverride.inputCost ?? 0) * (usage.inputTokens ?? 0)\n const outputCostUSD = (options.costOverride.outputCost ?? 0) * (usage.outputTokens ?? 0)\n costOverrideData = {\n $ai_input_cost_usd: inputCostUSD,\n $ai_output_cost_usd: outputCostUSD,\n $ai_total_cost_usd: inputCostUSD + outputCostUSD,\n }\n }\n\n const additionalTokenValues = {\n ...(usage.reasoningTokens ? { $ai_reasoning_tokens: usage.reasoningTokens } : {}),\n ...(usage.cacheReadInputTokens ? { $ai_cache_read_input_tokens: usage.cacheReadInputTokens } : {}),\n ...(usage.cacheCreationInputTokens ? { $ai_cache_creation_input_tokens: usage.cacheCreationInputTokens } : {}),\n ...(usage.webSearchCount ? { $ai_web_search_count: usage.webSearchCount } : {}),\n ...(usage.rawUsage ? { $ai_usage: usage.rawUsage } : {}),\n }\n\n const properties: Record<string, unknown> = {\n $ai_lib: 'posthog-ai',\n $ai_lib_version: version,\n $ai_provider: options.providerOverride ?? options.provider,\n $ai_model: options.modelOverride ?? options.model,\n $ai_model_parameters: options.modelParameters ?? {},\n $ai_input: withPrivacyMode(client, privacyMode, safeInput),\n $ai_output_choices: withPrivacyMode(client, privacyMode, safeOutput),\n $ai_http_status: httpStatus,\n $ai_input_tokens: usage.inputTokens ?? 0,\n ...(usage.outputTokens !== undefined ? { $ai_output_tokens: usage.outputTokens } : {}),\n ...additionalTokenValues,\n $ai_latency: options.latency ?? 0,\n ...(options.timeToFirstToken !== undefined ? { $ai_time_to_first_token: options.timeToFirstToken } : {}),\n $ai_trace_id: traceId,\n $ai_base_url: options.baseURL ?? '',\n ...options.properties,\n $ai_tokens_source: getTokensSource(options.properties),\n ...(options.distinctId ? {} : { $process_person_profile: false }),\n ...(options.stopReason ? { $ai_stop_reason: options.stopReason } : {}),\n ...(options.tools ? { $ai_tools: options.tools } : {}),\n ...errorData,\n ...costOverrideData,\n }\n\n const event: EventMessage = {\n distinctId: options.distinctId ?? traceId,\n event: eventType,\n properties,\n groups: options.groups,\n }\n\n if (options.captureImmediate) {\n await client.captureImmediate(event)\n } else {\n client.capture(event)\n }\n}\n","import type {\n LanguageModelV2,\n LanguageModelV2CallOptions,\n LanguageModelV2Content,\n LanguageModelV2Prompt,\n LanguageModelV2StreamPart,\n LanguageModelV3,\n LanguageModelV3CallOptions,\n LanguageModelV3Content,\n LanguageModelV3Prompt,\n LanguageModelV3StreamPart,\n} from '@ai-sdk/provider'\nimport { v4 as uuidv4 } from 'uuid'\nimport { PostHog } from 'posthog-node'\nimport {\n CostOverride,\n truncate,\n MAX_OUTPUT_SIZE,\n utf8ByteLength,\n extractAvailableToolCalls,\n toContentString,\n calculateWebSearchCount,\n getModelParams,\n} from '../utils'\nimport { captureAiGeneration } from '../captureAiGeneration'\nimport { redactBase64DataUrl } from '../sanitization'\nimport { isString } from '../typeGuards'\n\n// Union types for dual version support\ntype LanguageModel = LanguageModelV2 | LanguageModelV3\ntype LanguageModelCallOptions = LanguageModelV2CallOptions | LanguageModelV3CallOptions\ntype LanguageModelPrompt = LanguageModelV2Prompt | LanguageModelV3Prompt\ntype LanguageModelContent = LanguageModelV2Content | LanguageModelV3Content\ntype LanguageModelStreamPart = LanguageModelV2StreamPart | LanguageModelV3StreamPart\n\n// Type guards\nfunction isV3Model(model: LanguageModel): model is LanguageModelV3 {\n return model.specificationVersion === 'v3'\n}\n\nfunction isV2Model(model: LanguageModel): model is LanguageModelV2 {\n return model.specificationVersion === 'v2'\n}\n\ninterface ClientOptions {\n posthogDistinctId?: string\n posthogTraceId?: string\n posthogProperties?: Record<string, any>\n posthogPrivacyMode?: boolean\n posthogGroups?: Record<string, any>\n posthogModelOverride?: string\n posthogProviderOverride?: string\n posthogCostOverride?: CostOverride\n posthogCaptureImmediate?: boolean\n}\n\ninterface PostHogInput {\n role: string\n type?: string\n content?:\n | string\n | {\n [key: string]: any\n }\n}\n\n// Content types for the output array\ntype OutputContentItem =\n | { type: 'text'; text: string }\n | { type: 'reasoning'; text: string }\n | { type: 'tool-call'; id: string; function: { name: string; arguments: string } }\n | { type: 'file'; name: string; mediaType: string; data: string }\n | { type: 'source'; sourceType: string; id: string; url: string; title: string }\n\nconst mapVercelParams = (params: any): Record<string, any> => {\n return {\n temperature: params.temperature,\n max_output_tokens: params.maxOutputTokens,\n top_p: params.topP,\n frequency_penalty: params.frequencyPenalty,\n presence_penalty: params.presencePenalty,\n stop: params.stopSequences,\n stream: params.stream,\n }\n}\n\nconst mapVercelPrompt = (messages: LanguageModelPrompt): PostHogInput[] => {\n // Map and truncate individual content\n const inputs: PostHogInput[] = messages.map((message) => {\n let content: any\n\n // Handle system role which has string content\n if (message.role === 'system') {\n content = [\n {\n type: 'text',\n text: truncate(toContentString(message.content)),\n },\n ]\n } else {\n // Handle other roles which have array content\n if (Array.isArray(message.content)) {\n content = message.content.map((c: any) => {\n if (c.type === 'text') {\n return {\n type: 'text',\n text: truncate(c.text),\n }\n } else if (c.type === 'file') {\n // For file type, check if it's a data URL and redact if needed\n let fileData: string\n\n const contentData: unknown = c.data\n\n if (contentData instanceof URL) {\n fileData = contentData.toString()\n } else if (isString(contentData)) {\n // Redact base64 data URLs and raw base64 to prevent oversized events\n fileData = redactBase64DataUrl(contentData)\n } else {\n fileData = 'raw files not supported'\n }\n\n return {\n type: 'file',\n file: fileData,\n mediaType: c.mediaType,\n }\n } else if (c.type === 'reasoning') {\n return {\n type: 'reasoning',\n text: truncate(c.reasoning),\n }\n } else if (c.type === 'tool-call') {\n return {\n type: 'tool-call',\n toolCallId: c.toolCallId,\n toolName: c.toolName,\n input: c.input,\n }\n } else if (c.type === 'tool-result') {\n return {\n type: 'tool-result',\n toolCallId: c.toolCallId,\n toolName: c.toolName,\n output: c.output,\n isError: c.isError,\n }\n }\n return {\n type: 'text',\n text: '',\n }\n })\n } else {\n // Fallback for non-array content\n content = [\n {\n type: 'text',\n text: truncate(toContentString(message.content)),\n },\n ]\n }\n }\n\n return {\n role: message.role,\n content,\n }\n })\n\n try {\n // Trim the inputs array until its serialized JSON size fits within MAX_OUTPUT_SIZE.\n // Pre-compute each message's byte size once so we can shift by accumulated budget\n // in a single linear pass, instead of re-stringifying the whole array per iteration.\n const messageSizes = inputs.map((m) => utf8ByteLength(JSON.stringify(m)))\n // Account for the surrounding `[` `]` plus a comma between each pair of elements.\n let totalBytes = 2 + Math.max(0, messageSizes.length - 1)\n for (const size of messageSizes) {\n totalBytes += size\n }\n let removedCount = 0\n while (totalBytes > MAX_OUTPUT_SIZE && removedCount < messageSizes.length) {\n totalBytes -= messageSizes[removedCount]\n // Each removed message past the first also drops the comma that joined it.\n if (removedCount < messageSizes.length - 1) {\n totalBytes -= 1\n }\n removedCount++\n }\n if (removedCount > 0) {\n inputs.splice(0, removedCount)\n // Add one placeholder to indicate how many were removed\n inputs.unshift({\n role: 'posthog',\n content: `[${removedCount} message${removedCount === 1 ? '' : 's'} removed due to size limit]`,\n })\n }\n } catch (error) {\n console.error('Error stringifying inputs', error)\n return [{ role: 'posthog', content: 'An error occurred while processing your request. Please try again.' }]\n }\n return inputs\n}\n\nconst mapVercelOutput = (result: LanguageModelContent[]): PostHogInput[] => {\n const content: OutputContentItem[] = result.map((item) => {\n if (item.type === 'text') {\n return { type: 'text', text: truncate(item.text) }\n }\n if (item.type === 'tool-call') {\n const toolCall = item as { input?: unknown; args?: unknown; arguments?: unknown }\n const rawArgs = toolCall.input ?? toolCall.args ?? toolCall.arguments ?? {}\n return {\n type: 'tool-call',\n id: item.toolCallId,\n function: {\n name: item.toolName,\n arguments: typeof rawArgs === 'string' ? rawArgs : JSON.stringify(rawArgs),\n },\n }\n }\n if (item.type === 'reasoning') {\n return { type: 'reasoning', text: truncate(item.text) }\n }\n if (item.type === 'file') {\n // Handle files similar to input mapping - avoid large base64 data\n let fileData: string\n if (item.data instanceof URL) {\n fileData = item.data.toString()\n } else if (typeof item.data === 'string') {\n fileData = redactBase64DataUrl(item.data)\n\n // If not redacted and still large, replace with size indicator\n if (fileData === item.data && item.data.length > 1000) {\n fileData = `[${item.mediaType} file - ${item.data.length} bytes]`\n }\n } else {\n fileData = `[binary ${item.mediaType} file]`\n }\n\n return {\n type: 'file',\n name: 'generated_file',\n mediaType: item.mediaType,\n data: fileData,\n }\n }\n if (item.type === 'source') {\n return {\n type: 'source',\n sourceType: item.sourceType,\n id: item.id,\n url: (item as any).url || '',\n title: item.title || '',\n }\n }\n // Fallback for unknown types - try to extract text if possible\n return { type: 'text', text: truncate(JSON.stringify(item)) }\n })\n\n if (content.length > 0) {\n return [\n {\n role: 'assistant',\n content: content.length === 1 && content[0].type === 'text' ? content[0].text : content,\n },\n ]\n }\n // otherwise stringify and truncate\n try {\n const jsonOutput = JSON.stringify(result)\n return [{ content: truncate(jsonOutput), role: 'assistant' }]\n } catch {\n console.error('Error stringifying output')\n return []\n }\n}\n\nconst extractProvider = (model: LanguageModel): string => {\n const provider = model.provider.toLowerCase()\n const providerName = provider.split('.')[0]\n return providerName\n}\n\n// Extract web search count from provider metadata (works for both V2 and V3)\nconst extractWebSearchCount = (providerMetadata: unknown, usage: any): number => {\n // Try Anthropic-specific extraction\n if (\n providerMetadata &&\n typeof providerMetadata === 'object' &&\n 'anthropic' in providerMetadata &&\n providerMetadata.anthropic &&\n typeof providerMetadata.anthropic === 'object' &&\n 'server_tool_use' in providerMetadata.anthropic\n ) {\n const serverToolUse = providerMetadata.anthropic.server_tool_use\n if (\n serverToolUse &&\n typeof serverToolUse === 'object' &&\n 'web_search_requests' in serverToolUse &&\n typeof serverToolUse.web_search_requests === 'number'\n ) {\n return serverToolUse.web_search_requests\n }\n }\n\n // Fall back to generic calculation\n return calculateWebSearchCount({\n usage,\n providerMetadata,\n })\n}\n\n// Helper to extract numeric token value from V2 (number) or V3 (object with .total) usage formats\nconst extractTokenCount = (value: unknown): number | undefined => {\n if (typeof value === 'number') {\n return value\n }\n if (\n value &&\n typeof value === 'object' &&\n 'total' in value &&\n typeof (value as { total: unknown }).total === 'number'\n ) {\n return (value as { total: number }).total\n }\n return undefined\n}\n\n// Helper to extract reasoning tokens from V2 (usage.reasoningTokens) or V3 (usage.outputTokens.reasoning)\nconst extractReasoningTokens = (usage: Record<string, unknown>): unknown => {\n // V2 style: top-level reasoningTokens\n if ('reasoningTokens' in usage) {\n return usage.reasoningTokens\n }\n // V3 style: nested in outputTokens.reasoning\n if (\n 'outputTokens' in usage &&\n usage.outputTokens &&\n typeof usage.outputTokens === 'object' &&\n 'reasoning' in usage.outputTokens\n ) {\n return (usage.outputTokens as { reasoning: unknown }).reasoning\n }\n return undefined\n}\n\n// Helper to extract cached input tokens from V2 (usage.cachedInputTokens) or V3 (usage.inputTokens.cacheRead)\nconst extractCacheReadTokens = (usage: Record<string, unknown>): unknown => {\n // V2 style: top-level cachedInputTokens\n if ('cachedInputTokens' in usage) {\n return usage.cachedInputTokens\n }\n // V3 style: nested in inputTokens.cacheRead\n if (\n 'inputTokens' in usage &&\n usage.inputTokens &&\n typeof usage.inputTokens === 'object' &&\n 'cacheRead' in usage.inputTokens\n ) {\n return (usage.inputTokens as { cacheRead: unknown }).cacheRead\n }\n return undefined\n}\n\n// Helper to extract cache write tokens from V3 (usage.inputTokens.cacheWrite). Providers like\n// Amazon Bedrock populate this standardized field instead of providerMetadata.anthropic.\nconst extractCacheWriteTokens = (usage: Record<string, unknown>): unknown => {\n if (\n 'inputTokens' in usage &&\n usage.inputTokens &&\n typeof usage.inputTokens === 'object' &&\n 'cacheWrite' in usage.inputTokens\n ) {\n return (usage.inputTokens as { cacheWrite: unknown }).cacheWrite\n }\n return undefined\n}\n\n// Extract additional token values from provider metadata, with a V3 standardized fallback\n// (e.g. Amazon Bedrock exposes cache write tokens via usage.inputTokens.cacheWrite rather\n// than providerMetadata.anthropic.cacheCreationInputTokens). A cacheWrite of 0 is treated\n// as absent so we preserve the pre-fallback event shape on providers that simply omit the\n// field — consumers downstream saw `$ai_cache_creation_input_tokens` missing, not 0.\nconst extractAdditionalTokenValues = (providerMetadata: unknown, usage: unknown): Record<string, any> => {\n if (\n providerMetadata &&\n typeof providerMetadata === 'object' &&\n 'anthropic'