UNPKG

@posthog/ai

Version:
1 lines 78.6 kB
{"version":3,"file":"index.mjs","sources":["../../src/sanitization/base64_recognizer.ts","../../src/sanitization/media_type_context.ts","../../src/sanitization/binary_content_redactor.ts","../../src/sanitization.ts","../../src/utils.ts","../../src/serializeError.ts","../../src/captureAiGeneration.ts","../../src/anthropic/index.ts"],"sourcesContent":["const DATA_URL_PREFIX_RE = /^data:([^;,\\s]+)(?:;[^;,\\s]+)*;base64,/i\nconst BASE64_ALPHABET_RE = /^[A-Za-z0-9+/_=-]+$/\n\nexport type Base64Recognition = { kind: 'data-url'; mediaType: string } | { kind: 'raw' } | { kind: 'none' }\n\nexport class Base64Recognizer {\n recognize(value: string, minLength: number): Base64Recognition {\n const dataUrl = DATA_URL_PREFIX_RE.exec(value)\n if (dataUrl) return { kind: 'data-url', mediaType: dataUrl[1] }\n\n if (value.length < minLength) return { kind: 'none' }\n\n const confidencePrefix = value.slice(0, minLength)\n if (BASE64_ALPHABET_RE.test(confidencePrefix)) {\n return { kind: 'raw' }\n } else {\n return { kind: 'none' }\n }\n }\n}\n","const MIME_HINT_KEYS = ['mediaType', 'media_type', 'mimeType', 'mime_type'] as const\n\nconst STRONG_CONTEXT_KEYS = new Set([\n 'data',\n 'file_data',\n 'fileData',\n 'image_url',\n 'imageUrl',\n 'video_url',\n 'videoUrl',\n 'audio',\n 'audio_data',\n 'audioData',\n 'inline_data',\n 'inlineData',\n 'source',\n 'result',\n])\n\nconst STRONG_CONTEXT_TYPES = new Set([\n 'image',\n 'image_url',\n 'input_image',\n 'audio',\n 'input_audio',\n 'video',\n 'video_url',\n 'file',\n 'input_file',\n 'document',\n 'media',\n 'file-data',\n])\n\nconst FILE_FAMILY_TYPES = new Set(['file', 'input_file', 'document', 'media', 'file-data'])\n\nconst KNOWN_AUDIO_FORMATS = new Set(['wav', 'mp3', 'ogg', 'flac', 'm4a', 'aac', 'webm'])\n\nexport class MediaTypeContext {\n static readonly EMPTY = new MediaTypeContext(undefined, undefined)\n\n constructor(\n private readonly parent: Record<string, unknown> | undefined,\n private readonly key: string | undefined\n ) {}\n\n inferMediaType(): string | undefined {\n return (\n this.inferFromSiblingMime() ?? this.inferFromSiblingFormat() ?? this.inferFromParentType() ?? this.inferFromKey()\n )\n }\n\n inferFromSiblingMime(): string | undefined {\n if (!this.parent) return undefined\n for (const hint of MIME_HINT_KEYS) {\n const v = this.parent[hint]\n if (typeof v === 'string') return v\n }\n return undefined\n }\n\n inferFromSiblingFormat(): string | undefined {\n if (!this.parent) return undefined\n const fmt = this.parent.format\n if (typeof fmt === 'string' && KNOWN_AUDIO_FORMATS.has(fmt.toLowerCase())) {\n return `audio/${fmt.toLowerCase()}`\n }\n return undefined\n }\n\n inferFromParentType(): string | undefined {\n if (!this.parent) return undefined\n const t = this.parent.type\n if (typeof t !== 'string') return undefined\n if (t === 'image' || t === 'image_url' || t === 'input_image') return 'image'\n if (t === 'audio' || t === 'input_audio') return 'audio'\n if (t === 'video' || t === 'video_url') return 'video'\n if (FILE_FAMILY_TYPES.has(t)) return 'application/octet-stream'\n return undefined\n }\n\n inferFromKey(): string | undefined {\n if (!this.key) return undefined\n const key = this.key.toLowerCase()\n if (key.includes('audio')) return 'audio'\n if (key.includes('video')) return 'video'\n if (key.includes('image')) return 'image'\n if (key.includes('file') || key.includes('document')) return 'application/octet-stream'\n return undefined\n }\n\n signalsBinary(): boolean {\n if (this.parent) {\n for (const hint of MIME_HINT_KEYS) {\n if (typeof this.parent[hint] === 'string') return true\n }\n const fmt = this.parent.format\n if (typeof fmt === 'string' && KNOWN_AUDIO_FORMATS.has(fmt.toLowerCase())) return true\n const t = this.parent.type\n if (typeof t === 'string' && STRONG_CONTEXT_TYPES.has(t)) return true\n }\n if (this.key && STRONG_CONTEXT_KEYS.has(this.key)) return true\n return false\n }\n}\n","import { Base64Recognizer } from './base64_recognizer'\nimport { MediaTypeContext } from './media_type_context'\n\nconst STRONG_CONTEXT_MIN_LENGTH = 64\nconst WEAK_CONTEXT_MIN_LENGTH = 1024\n\nexport class BinaryContentRedactor {\n private visited: WeakSet<object> = new WeakSet()\n\n constructor(private readonly recognizer: Base64Recognizer = new Base64Recognizer()) {}\n\n redact<T>(value: T): T\n redact(value: unknown): unknown {\n if (this.isMultimodalEnabled()) return value\n this.visited = new WeakSet()\n return this.walk(value, MediaTypeContext.EMPTY)\n }\n\n private walk(value: unknown, ctx: MediaTypeContext): unknown {\n if (value === null || value === undefined) return value\n if (typeof value === 'string') return this.redactString(value, ctx)\n if (typeof value !== 'object') return value\n\n // Buffer extends Uint8Array, so this branch catches both.\n if (typeof Uint8Array !== 'undefined' && value instanceof Uint8Array) {\n return this.placeholderFor(ctx.inferMediaType())\n }\n\n if (this.visited.has(value)) return null\n this.visited.add(value)\n\n if (Array.isArray(value)) {\n return value.map((item) => this.walk(item, ctx))\n }\n\n const obj = value as Record<string, unknown>\n const out: Record<string, unknown> = {}\n for (const k of Object.keys(obj)) {\n out[k] = this.walk(obj[k], new MediaTypeContext(obj, k))\n }\n return out\n }\n\n private redactString(value: string, ctx: MediaTypeContext): string {\n const minLength = ctx.signalsBinary() ? STRONG_CONTEXT_MIN_LENGTH : WEAK_CONTEXT_MIN_LENGTH\n const recognition = this.recognizer.recognize(value, minLength)\n switch (recognition.kind) {\n case 'data-url':\n return this.placeholderFor(recognition.mediaType)\n case 'raw':\n return this.placeholderFor(ctx.inferMediaType())\n case 'none':\n return value\n }\n }\n\n private placeholderFor(mediaType?: string): string {\n if (!mediaType) return '[base64 redacted]'\n if (mediaType === 'application/octet-stream') return '[base64 file redacted]'\n return `[base64 ${mediaType} redacted]`\n }\n\n private isMultimodalEnabled(): boolean {\n const val = process.env._INTERNAL_LLMA_MULTIMODAL || ''\n return val.toLowerCase() === 'true' || val === '1' || val.toLowerCase() === 'yes'\n }\n}\n","import { BinaryContentRedactor } from './sanitization/binary_content_redactor'\n\nconst redactor = new BinaryContentRedactor()\n\nexport function redactBase64DataUrl(str: string): string\nexport function redactBase64DataUrl(str: unknown): unknown\nexport function redactBase64DataUrl(str: unknown): unknown {\n return redactor.redact(str)\n}\n\nexport const sanitizeOpenAI = (data: unknown): unknown => redactor.redact(data)\nexport const sanitizeOpenAIResponse = (data: unknown): unknown => redactor.redact(data)\nexport const sanitizeAnthropic = (data: unknown): unknown => redactor.redact(data)\nexport const sanitizeGemini = (data: unknown): unknown => redactor.redact(data)\nexport const sanitizeLangChain = (data: unknown): unknown => redactor.redact(data)\n","import { PostHog } from 'posthog-node'\nimport OpenAIOrignal from 'openai'\nimport AnthropicOriginal from '@anthropic-ai/sdk'\nimport type { ChatCompletionTool } from 'openai/resources/chat/completions'\nimport type { ResponseCreateParamsWithTools } from 'openai/lib/ResponsesParser'\nimport type { Tool as GeminiTool } from '@google/genai'\nimport type {\n FormattedMessage,\n FormattedContent,\n FormattedAudioContent,\n FormattedImageContent,\n FormattedDocumentContent,\n} from './types'\nimport { v4 as uuidv4 } from 'uuid'\nimport { isString } from './typeGuards'\nimport { redactBase64DataUrl } from './sanitization'\n\ntype ChatCompletionCreateParamsBase = OpenAIOrignal.Chat.Completions.ChatCompletionCreateParams\ntype MessageCreateParams = AnthropicOriginal.Messages.MessageCreateParams\ntype ResponseCreateParams = OpenAIOrignal.Responses.ResponseCreateParams\ntype EmbeddingCreateParams = OpenAIOrignal.EmbeddingCreateParams\ntype TranscriptionCreateParams = OpenAIOrignal.Audio.Transcriptions.TranscriptionCreateParams\ntype AnthropicTool = AnthropicOriginal.Tool\n\nconst TOKEN_PROPERTY_KEYS = new Set([\n '$ai_input_tokens',\n '$ai_output_tokens',\n '$ai_cache_read_input_tokens',\n '$ai_cache_creation_input_tokens',\n '$ai_total_tokens',\n '$ai_reasoning_tokens',\n])\n\nexport function getTokensSource(posthogProperties?: Record<string, unknown>): string {\n if (posthogProperties && Object.keys(posthogProperties).some((key) => TOKEN_PROPERTY_KEYS.has(key))) {\n return 'passthrough'\n }\n return 'sdk'\n}\n\n// limit large outputs by truncating to 200kb (approx 200k bytes)\nexport const MAX_OUTPUT_SIZE = 200000\nconst STRING_FORMAT = 'utf8'\n\n// Reused across calls to avoid per-invocation allocation; truncate() runs\n// hundreds of times for prompts with many parts.\nconst sharedTextEncoder = new TextEncoder()\nconst sharedTextDecoder = new TextDecoder(STRING_FORMAT, { fatal: false })\n\nexport const utf8ByteLength = (str: string): number => sharedTextEncoder.encode(str).byteLength\n\n/**\n * Safely converts content to a string, preserving structure for objects/arrays.\n * - If content is already a string, returns it as-is\n * - If content is an object or array, stringifies it with JSON.stringify to preserve structure\n * - Otherwise, converts to string with String()\n *\n * This prevents the \"[object Object]\" bug when objects are naively converted to strings.\n *\n * @param content - The content to convert to a string\n * @returns A string representation that preserves structure for complex types\n */\nexport function toContentString(content: unknown): string {\n if (typeof content === 'string') {\n return content\n }\n if (content !== undefined && content !== null && typeof content === 'object') {\n try {\n return JSON.stringify(content)\n } catch {\n // Fallback for circular refs, BigInt, or objects with throwing toJSON\n return String(content)\n }\n }\n return String(content)\n}\n\nexport interface MonitoringEventPropertiesWithDefaults {\n distinctId?: string\n traceId: string\n properties?: Record<string, any>\n privacyMode: boolean\n groups?: Record<string, any>\n modelOverride?: string\n providerOverride?: string\n costOverride?: CostOverride\n captureImmediate?: boolean\n}\n\nexport type MonitoringEventProperties = Partial<MonitoringEventPropertiesWithDefaults>\n\nexport type MonitoringParams = {\n [K in keyof MonitoringEventProperties as `posthog${Capitalize<string & K>}`]: MonitoringEventProperties[K]\n}\n\nexport interface CostOverride {\n inputCost: number\n outputCost: number\n}\n\nexport const getModelParams = (\n params:\n | ((\n | ChatCompletionCreateParamsBase\n | MessageCreateParams\n | ResponseCreateParams\n | ResponseCreateParamsWithTools\n | EmbeddingCreateParams\n | TranscriptionCreateParams\n ) &\n MonitoringParams)\n | null\n): Record<string, any> => {\n if (!params) {\n return {}\n }\n const modelParams: Record<string, any> = {}\n const paramKeys = [\n 'temperature',\n 'max_tokens',\n 'max_completion_tokens',\n 'top_p',\n 'frequency_penalty',\n 'presence_penalty',\n 'n',\n 'stop',\n 'stream',\n 'streaming',\n 'language',\n 'response_format',\n 'timestamp_granularities',\n ] as const\n\n for (const key of paramKeys) {\n if (key in params && (params as any)[key] !== undefined) {\n modelParams[key] = (params as any)[key]\n }\n }\n return modelParams\n}\n\n/**\n * Helper to format responses (non-streaming) for consumption\n */\nexport const formatResponse = (response: any, provider: string): FormattedMessage[] => {\n if (!response) {\n return []\n }\n if (provider === 'anthropic') {\n return formatResponseAnthropic(response)\n } else if (provider === 'openai') {\n return formatResponseOpenAI(response)\n } else if (provider === 'gemini') {\n return formatResponseGemini(response)\n }\n return []\n}\n\nexport const formatResponseAnthropic = (response: any): FormattedMessage[] => {\n const output: FormattedMessage[] = []\n const content: FormattedContent = []\n\n for (const choice of response.content ?? []) {\n if (choice?.type === 'text' && choice?.text) {\n content.push({ type: 'text', text: choice.text })\n } else if (choice?.type === 'tool_use' && choice?.name && choice?.id) {\n content.push({\n type: 'function',\n id: choice.id,\n function: {\n name: choice.name,\n arguments: choice.input || {},\n },\n })\n }\n }\n\n if (content.length > 0) {\n output.push({\n role: 'assistant',\n content,\n })\n }\n\n return output\n}\n\nexport const formatResponseOpenAI = (response: any): FormattedMessage[] => {\n const output: FormattedMessage[] = []\n\n if (response.choices) {\n for (const choice of response.choices) {\n const content: FormattedContent = []\n let role = 'assistant'\n\n if (choice.message) {\n if (choice.message.role) {\n role = choice.message.role\n }\n\n if (choice.message.content) {\n content.push({ type: 'text', text: choice.message.content })\n }\n\n if (choice.message.tool_calls) {\n for (const toolCall of choice.message.tool_calls) {\n content.push({\n type: 'function',\n id: toolCall.id,\n function: {\n name: toolCall.function.name,\n arguments: toolCall.function.arguments,\n },\n })\n }\n }\n\n // Handle audio output (gpt-4o-audio-preview)\n if (choice.message.audio) {\n content.push({\n type: 'audio',\n ...choice.message.audio,\n })\n }\n }\n\n if (content.length > 0) {\n output.push({\n role,\n content,\n })\n }\n }\n }\n\n // Handle Responses API format\n if (response.output) {\n const content: FormattedContent = []\n let role = 'assistant'\n\n for (const item of response.output) {\n if (item.type === 'message') {\n role = item.role\n\n if (item.content && Array.isArray(item.content)) {\n for (const contentItem of item.content) {\n if (contentItem.type === 'output_text' && contentItem.text) {\n content.push({ type: 'text', text: contentItem.text })\n } else if (contentItem.text) {\n content.push({ type: 'text', text: contentItem.text })\n } else if (contentItem.type === 'input_image' && contentItem.image_url) {\n content.push({\n type: 'image',\n image: contentItem.image_url,\n })\n }\n }\n } else if (item.content) {\n content.push({ type: 'text', text: String(item.content) })\n }\n } else if (item.type === 'function_call') {\n content.push({\n type: 'function',\n id: item.call_id || item.id || '',\n function: {\n name: item.name,\n arguments: item.arguments || {},\n },\n })\n }\n }\n\n if (content.length > 0) {\n output.push({\n role,\n content,\n })\n }\n }\n\n return output\n}\n\nexport const buildInlineDataBlock = (\n mimeType: string,\n data: string\n): FormattedAudioContent | FormattedImageContent | FormattedDocumentContent => {\n if (mimeType.startsWith('audio/')) {\n return { type: 'audio', mime_type: mimeType, data }\n }\n if (mimeType.startsWith('image/')) {\n return { type: 'image', inline_data: { mime_type: mimeType, data } }\n }\n return { type: 'document', inline_data: { mime_type: mimeType, data } }\n}\n\nexport const formatResponseGemini = (response: any): FormattedMessage[] => {\n const output: FormattedMessage[] = []\n\n if (response.candidates && Array.isArray(response.candidates)) {\n for (const candidate of response.candidates) {\n if (candidate.content && candidate.content.parts) {\n const content: FormattedContent = []\n\n for (const part of candidate.content.parts) {\n if (part.text) {\n content.push({ type: 'text', text: part.text })\n } else if (part.functionCall) {\n content.push({\n type: 'function',\n function: {\n name: part.functionCall.name,\n arguments: part.functionCall.args,\n },\n })\n } else if (part.inlineData) {\n // Handle inline data (images, audio, documents)\n const mimeType = part.inlineData.mimeType || part.inlineData.mime_type || 'application/octet-stream'\n let data = part.inlineData.data\n\n // Handle binary data (Uint8Array/Buffer -> base64)\n if (data instanceof Uint8Array) {\n if (typeof Buffer !== 'undefined') {\n data = Buffer.from(data).toString('base64')\n } else {\n let binary = ''\n for (let i = 0; i < data.length; i++) {\n binary += String.fromCharCode(data[i])\n }\n data = btoa(binary)\n }\n }\n\n // Sanitize base64 data for images and other large inline data\n data = redactBase64DataUrl(data)\n\n content.push(buildInlineDataBlock(mimeType, data))\n }\n }\n\n if (content.length > 0) {\n output.push({\n role: 'assistant',\n content,\n })\n }\n } else if (candidate.text) {\n output.push({\n role: 'assistant',\n content: [{ type: 'text', text: candidate.text }],\n })\n }\n }\n } else if (response.text) {\n output.push({\n role: 'assistant',\n content: [{ type: 'text', text: response.text }],\n })\n }\n\n return output\n}\n\nexport const mergeSystemPrompt = (params: MessageCreateParams & MonitoringParams, provider: string): any => {\n if (provider == 'anthropic') {\n const messages = params.messages || []\n if (!(params as any).system) {\n return messages\n }\n const systemMessage = (params as any).system\n return [{ role: 'system', content: systemMessage }, ...messages]\n }\n return params.messages\n}\n\nexport const withPrivacyMode = (client: PostHog, privacyMode: boolean, input: any): any => {\n return (client as any).privacy_mode || privacyMode ? null : input\n}\n\nfunction toSafeString(input: unknown): string {\n if (input === undefined || input === null) {\n return ''\n }\n if (typeof input === 'string') {\n return input\n }\n try {\n return JSON.stringify(input)\n } catch {\n console.warn('Failed to stringify input', input)\n return ''\n }\n}\n\nexport const truncate = (input: unknown): string => {\n const str = toSafeString(input)\n if (str === '') {\n return ''\n }\n\n // Check if we need to truncate and ensure STRING_FORMAT is respected\n const buffer = sharedTextEncoder.encode(str)\n if (buffer.length <= MAX_OUTPUT_SIZE) {\n // Ensure STRING_FORMAT is respected\n return sharedTextDecoder.decode(buffer)\n }\n\n // Truncate the buffer and ensure a valid string is returned.\n // fatal: false means we get U+FFFD at the end if truncation broke the encoding.\n const truncatedBuffer = buffer.slice(0, MAX_OUTPUT_SIZE)\n let truncatedStr = sharedTextDecoder.decode(truncatedBuffer)\n if (truncatedStr.endsWith('\\uFFFD')) {\n truncatedStr = truncatedStr.slice(0, -1)\n }\n return `${truncatedStr}... [truncated]`\n}\n\n/**\n * Calculate web search count from raw API response.\n *\n * Uses a two-tier detection strategy:\n * Priority 1 (Exact Count): Count actual web search calls when available\n * Priority 2 (Binary Detection): Return 1 if web search indicators are present, 0 otherwise\n *\n * @param result - Raw API response from any provider (OpenAI, Perplexity, OpenRouter, Gemini, etc.)\n * @returns Number of web searches performed (exact count or binary 1/0)\n */\nexport function calculateWebSearchCount(result: unknown): number {\n if (!result || typeof result !== 'object') {\n return 0\n }\n\n // Priority 1: Exact Count\n // Check for OpenAI Responses API web_search_call items\n if ('output' in result && Array.isArray(result.output)) {\n let count = 0\n\n for (const item of result.output) {\n if (typeof item === 'object' && item !== null && 'type' in item && item.type === 'web_search_call') {\n count++\n }\n }\n\n if (count > 0) {\n return count\n }\n }\n\n // Priority 2: Binary Detection (1 or 0)\n\n // Check for citations at root level (Perplexity)\n if ('citations' in result && Array.isArray(result.citations) && result.citations.length > 0) {\n return 1\n }\n\n // Check for search_results at root level (Perplexity via OpenRouter)\n if ('search_results' in result && Array.isArray(result.search_results) && result.search_results.length > 0) {\n return 1\n }\n\n // Check for usage.search_context_size (Perplexity via OpenRouter)\n if ('usage' in result && typeof result.usage === 'object' && result.usage !== null) {\n if ('search_context_size' in result.usage && result.usage.search_context_size) {\n return 1\n }\n }\n\n // Check for annotations with url_citation in choices[].message or choices[].delta (OpenAI/Perplexity)\n if ('choices' in result && Array.isArray(result.choices)) {\n for (const choice of result.choices) {\n if (typeof choice === 'object' && choice !== null) {\n // Check both message (non-streaming) and delta (streaming) for annotations\n const content = ('message' in choice ? choice.message : null) || ('delta' in choice ? choice.delta : null)\n\n if (typeof content === 'object' && content !== null && 'annotations' in content) {\n const annotations = content.annotations\n\n if (Array.isArray(annotations)) {\n const hasUrlCitation = annotations.some((ann: unknown) => {\n return typeof ann === 'object' && ann !== null && 'type' in ann && ann.type === 'url_citation'\n })\n\n if (hasUrlCitation) {\n return 1\n }\n }\n }\n }\n }\n }\n\n // Check for annotations in output[].content[] (OpenAI Responses API)\n if ('output' in result && Array.isArray(result.output)) {\n for (const item of result.output) {\n if (typeof item === 'object' && item !== null && 'content' in item) {\n const content = item.content\n\n if (Array.isArray(content)) {\n for (const contentItem of content) {\n if (typeof contentItem === 'object' && contentItem !== null && 'annotations' in contentItem) {\n const annotations = contentItem.annotations\n\n if (Array.isArray(annotations)) {\n const hasUrlCitation = annotations.some((ann: unknown) => {\n return typeof ann === 'object' && ann !== null && 'type' in ann && ann.type === 'url_citation'\n })\n\n if (hasUrlCitation) {\n return 1\n }\n }\n }\n }\n }\n }\n }\n }\n\n // Check for grounding_metadata (Gemini)\n if ('candidates' in result && Array.isArray(result.candidates)) {\n for (const candidate of result.candidates) {\n if (\n typeof candidate === 'object' &&\n candidate !== null &&\n 'grounding_metadata' in candidate &&\n candidate.grounding_metadata\n ) {\n return 1\n }\n }\n }\n\n return 0\n}\n\n/**\n * Extract available tool calls from the request parameters.\n * These are the tools provided to the LLM, not the tool calls in the response.\n */\nexport const extractAvailableToolCalls = (\n provider: string,\n params: any\n): ChatCompletionTool[] | AnthropicTool[] | GeminiTool[] | null => {\n if (provider === 'anthropic') {\n if (params.tools) {\n return params.tools\n }\n\n return null\n } else if (provider === 'gemini') {\n if (params.config && params.config.tools) {\n return params.config.tools\n }\n\n return null\n } else if (provider === 'openai') {\n if (params.tools) {\n return params.tools\n }\n\n return null\n } else if (provider === 'vercel') {\n if (params.tools) {\n return params.tools\n }\n\n return null\n }\n\n return null\n}\n\nexport enum AIEvent {\n Generation = '$ai_generation',\n Embedding = '$ai_embedding',\n}\n\nexport function sanitizeValues(obj: any): any {\n if (obj === undefined || obj === null) {\n return obj\n }\n const jsonSafe = JSON.parse(JSON.stringify(obj))\n if (typeof jsonSafe === 'string') {\n // Sanitize lone surrogates by round-tripping through UTF-8\n return new TextDecoder().decode(new TextEncoder().encode(jsonSafe))\n } else if (Array.isArray(jsonSafe)) {\n return jsonSafe.map(sanitizeValues)\n } else if (jsonSafe && typeof jsonSafe === 'object') {\n return Object.fromEntries(Object.entries(jsonSafe).map(([k, v]) => [k, sanitizeValues(v)]))\n }\n return jsonSafe\n}\n\nconst POSTHOG_PARAMS_MAP: Record<keyof MonitoringParams, string> = {\n posthogDistinctId: 'distinctId',\n posthogTraceId: 'traceId',\n posthogProperties: 'properties',\n posthogPrivacyMode: 'privacyMode',\n posthogGroups: 'groups',\n posthogModelOverride: 'modelOverride',\n posthogProviderOverride: 'providerOverride',\n posthogCostOverride: 'costOverride',\n posthogCaptureImmediate: 'captureImmediate',\n}\n\nexport function extractPosthogParams<T>(body: T & MonitoringParams): {\n providerParams: T\n posthogParams: MonitoringEventPropertiesWithDefaults\n} {\n const providerParams: Record<string, unknown> = {}\n const posthogParams: Record<string, unknown> = {}\n\n for (const [key, value] of Object.entries(body)) {\n if (POSTHOG_PARAMS_MAP[key as keyof MonitoringParams]) {\n posthogParams[POSTHOG_PARAMS_MAP[key as keyof MonitoringParams]] = value\n } else if (key.startsWith('posthog')) {\n console.warn(`Unknown Posthog parameter ${key}`)\n } else {\n providerParams[key] = value\n }\n }\n\n return {\n providerParams: providerParams as T,\n posthogParams: addDefaults(posthogParams),\n }\n}\n\nfunction addDefaults(params: MonitoringEventProperties): MonitoringEventPropertiesWithDefaults {\n return {\n ...params,\n privacyMode: params.privacyMode ?? false,\n traceId: params.traceId ?? uuidv4(),\n }\n}\n\nexport function formatOpenAIResponsesInput(input: unknown, instructions?: string | null): FormattedMessage[] {\n const messages: FormattedMessage[] = []\n\n if (instructions) {\n messages.push({\n role: 'system',\n content: instructions,\n })\n }\n\n if (Array.isArray(input)) {\n for (const item of input) {\n if (typeof item === 'string') {\n messages.push({ role: 'user', content: item })\n } else if (item && typeof item === 'object') {\n const obj = item as Record<string, unknown>\n const role = isString(obj.role) ? obj.role : 'user'\n\n // Handle content properly - preserve structure for objects/arrays\n const content = obj.content ?? obj.text ?? item\n messages.push({ role, content: toContentString(content) })\n } else {\n messages.push({ role: 'user', content: toContentString(item) })\n }\n }\n } else if (typeof input === 'string') {\n messages.push({ role: 'user', content: input })\n } else if (input) {\n messages.push({ role: 'user', content: toContentString(input) })\n }\n\n return messages\n}\n","import { sanitizeValues } from './utils'\n\nconst DEFAULT_MAX_DEPTH = 3\nconst MAX_STACK_LINES = 20\n\nexport function serializeError(value: unknown, depth = DEFAULT_MAX_DEPTH): unknown {\n if (depth < 0 || value === null || typeof value !== 'object') {\n return value\n }\n if (value instanceof Error) {\n const out: Record<string, unknown> = {\n name: value.name,\n message: value.message,\n stack: truncateStack(value.stack),\n }\n for (const key of Object.keys(value)) {\n out[key] = serializeError((value as unknown as Record<string, unknown>)[key], depth - 1)\n }\n if (value.cause !== undefined) {\n out.cause = serializeError(value.cause, depth - 1)\n }\n return out\n }\n if (Array.isArray(value)) {\n return value.map((item) => serializeError(item, depth - 1))\n }\n return value\n}\n\nexport function stringifyError(error: unknown): string {\n try {\n return JSON.stringify(sanitizeValues(serializeError(error)))\n } catch {\n if (error instanceof Error) {\n return JSON.stringify({ name: error.name, message: error.message })\n }\n return JSON.stringify({ message: String(error) })\n }\n}\n\nfunction truncateStack(stack: string | undefined): string | undefined {\n if (!stack) {\n return stack\n }\n const lines = stack.split('\\n')\n if (lines.length <= MAX_STACK_LINES) {\n return stack\n }\n return [...lines.slice(0, MAX_STACK_LINES), '... (truncated)'].join('\\n')\n}\n","import { EventMessage, PostHog } from 'posthog-node'\nimport type { ChatCompletionTool } from 'openai/resources/chat/completions'\nimport type { Tool as GeminiTool } from '@google/genai'\nimport AnthropicOriginal from '@anthropic-ai/sdk'\nimport { v4 as uuidv4 } from 'uuid'\nimport { uuidv7, ErrorTracking as CoreErrorTracking } from '@posthog/core'\nimport { version } from '../package.json'\nimport type { TokenUsage } from './types'\nimport { stringifyError } from './serializeError'\nimport { AIEvent, CostOverride, getTokensSource, sanitizeValues, withPrivacyMode } from './utils'\n\ntype AnthropicTool = AnthropicOriginal.Tool\n\n/**\n * Options for `captureAiGeneration`. Mirrors the `$ai_generation` event shape\n * directly so that any caller — first-party SDK wrappers and external code\n * alike — produces an identical event.\n */\nexport interface CaptureAiGenerationOptions {\n distinctId?: string\n /** Auto-generated when omitted. */\n traceId?: string\n /** Defaults to `$ai_generation`. */\n eventType?: AIEvent\n\n /** Required for the event to be useful, but accepted as optional so SDK wrappers can pass through whatever they detect. */\n model?: string\n provider: string\n input: unknown\n output: unknown\n\n /** Maps to `$ai_model_parameters` (temperature, max_tokens, top_p, …). */\n modelParameters?: Record<string, unknown>\n\n baseURL?: string\n httpStatus?: number\n /** Wall-clock latency in seconds. */\n latency?: number\n /** Time from request start to the first streamed token, in seconds. */\n timeToFirstToken?: number\n\n usage?: TokenUsage\n\n /** Extra event properties merged into the captured event. */\n properties?: Record<string, unknown>\n /** Mapping of group type to group id, matching `EventMessage.groups`. */\n groups?: Record<string, string | number>\n privacyMode?: boolean\n\n /**\n * For SDK wrappers: overrides the auto-detected model. External callers\n * should pass `model` directly instead.\n */\n modelOverride?: string\n /**\n * For SDK wrappers: overrides the auto-detected provider. External callers\n * should pass `provider` directly instead.\n */\n providerOverride?: string\n costOverride?: CostOverride\n\n tools?: ChatCompletionTool[] | AnthropicTool[] | GeminiTool[] | null\n stopReason?: string\n /** When set, the event is captured as an error. */\n error?: unknown\n\n /** Awaits delivery instead of batching. Useful in serverless environments. */\n captureImmediate?: boolean\n}\n\n/**\n * Capture an `$ai_generation` (or `$ai_embedding`) event to PostHog.\n *\n * This is the canonical primitive that every `@posthog/ai` wrapper\n * (`withTracing`, `OpenAI`, `Anthropic`, `GoogleGenAI`, …) funnels through, so\n * external code can use it directly to instrument LLM calls made through\n * arbitrary clients (Cloudflare Workers AI, custom HTTP, etc.) and get the\n * same events the SDK wrappers produce.\n *\n * When `error` is set, the event is captured as an error. If the error is an\n * object, it is mutated in place to set `__posthog_previously_captured_error`\n * so callers can re-throw the original error reference safely.\n */\nexport const captureAiGeneration = async (client: PostHog, options: CaptureAiGenerationOptions): Promise<void> => {\n if (!client.capture) {\n return\n }\n\n const traceId = options.traceId ?? uuidv4()\n const eventType = options.eventType ?? AIEvent.Generation\n const privacyMode = options.privacyMode ?? false\n const usage = options.usage ?? {}\n\n const safeInput = sanitizeValues(options.input)\n const safeOutput = sanitizeValues(options.output)\n\n let httpStatus = options.httpStatus\n let errorData: Record<string, unknown> = {}\n if (options.error) {\n if (httpStatus === undefined) {\n if (typeof options.error === 'object' && 'status' in options.error && typeof options.error.status === 'number') {\n httpStatus = options.error.status\n } else {\n httpStatus = 500\n }\n }\n\n let exceptionId: string | undefined\n if (client.options?.enableExceptionAutocapture) {\n exceptionId = uuidv7()\n client.captureException(options.error, undefined, { $ai_trace_id: traceId }, exceptionId)\n if (typeof options.error === 'object') {\n ;(options.error as CoreErrorTracking.PreviouslyCapturedError).__posthog_previously_captured_error = true\n }\n }\n\n errorData = {\n $ai_is_error: true,\n $ai_error: stringifyError(options.error),\n $exception_event_id: exceptionId,\n }\n }\n httpStatus = httpStatus ?? 200\n\n let costOverrideData: Record<string, number> = {}\n if (options.costOverride) {\n const inputCostUSD = (options.costOverride.inputCost ?? 0) * (usage.inputTokens ?? 0)\n const outputCostUSD = (options.costOverride.outputCost ?? 0) * (usage.outputTokens ?? 0)\n costOverrideData = {\n $ai_input_cost_usd: inputCostUSD,\n $ai_output_cost_usd: outputCostUSD,\n $ai_total_cost_usd: inputCostUSD + outputCostUSD,\n }\n }\n\n const additionalTokenValues = {\n ...(usage.reasoningTokens ? { $ai_reasoning_tokens: usage.reasoningTokens } : {}),\n ...(usage.cacheReadInputTokens ? { $ai_cache_read_input_tokens: usage.cacheReadInputTokens } : {}),\n ...(usage.cacheCreationInputTokens ? { $ai_cache_creation_input_tokens: usage.cacheCreationInputTokens } : {}),\n ...(usage.webSearchCount ? { $ai_web_search_count: usage.webSearchCount } : {}),\n ...(usage.rawUsage ? { $ai_usage: usage.rawUsage } : {}),\n }\n\n const properties: Record<string, unknown> = {\n $ai_lib: 'posthog-ai',\n $ai_lib_version: version,\n $ai_provider: options.providerOverride ?? options.provider,\n $ai_model: options.modelOverride ?? options.model,\n $ai_model_parameters: options.modelParameters ?? {},\n $ai_input: withPrivacyMode(client, privacyMode, safeInput),\n $ai_output_choices: withPrivacyMode(client, privacyMode, safeOutput),\n $ai_http_status: httpStatus,\n $ai_input_tokens: usage.inputTokens ?? 0,\n ...(usage.outputTokens !== undefined ? { $ai_output_tokens: usage.outputTokens } : {}),\n ...additionalTokenValues,\n $ai_latency: options.latency ?? 0,\n ...(options.timeToFirstToken !== undefined ? { $ai_time_to_first_token: options.timeToFirstToken } : {}),\n $ai_trace_id: traceId,\n $ai_base_url: options.baseURL ?? '',\n ...options.properties,\n $ai_tokens_source: getTokensSource(options.properties),\n ...(options.distinctId ? {} : { $process_person_profile: false }),\n ...(options.stopReason ? { $ai_stop_reason: options.stopReason } : {}),\n ...(options.tools ? { $ai_tools: options.tools } : {}),\n ...errorData,\n ...costOverrideData,\n }\n\n const event: EventMessage = {\n distinctId: options.distinctId ?? traceId,\n event: eventType,\n properties,\n groups: options.groups,\n }\n\n if (options.captureImmediate) {\n await client.captureImmediate(event)\n } else {\n client.capture(event)\n }\n}\n","import AnthropicOriginal, { APIPromise } from '@anthropic-ai/sdk'\nimport { PostHog } from 'posthog-node'\nimport {\n formatResponseAnthropic,\n mergeSystemPrompt,\n MonitoringParams,\n extractAvailableToolCalls,\n extractPosthogParams,\n getModelParams,\n} from '../utils'\nimport { captureAiGeneration } from '../captureAiGeneration'\nimport type { FormattedContentItem, FormattedTextContent, FormattedFunctionCall, FormattedMessage } from '../types'\n\ntype MessageCreateParamsNonStreaming = AnthropicOriginal.Messages.MessageCreateParamsNonStreaming\ntype MessageCreateParamsStreaming = AnthropicOriginal.Messages.MessageCreateParamsStreaming\ntype MessageCreateParams = AnthropicOriginal.Messages.MessageCreateParams\ntype Message = AnthropicOriginal.Messages.Message\ntype RawMessageStreamEvent = AnthropicOriginal.Messages.RawMessageStreamEvent\ntype MessageCreateParamsBase = AnthropicOriginal.Messages.MessageCreateParams\ntype RequestOptions = AnthropicOriginal.RequestOptions\nimport type { Stream } from '@anthropic-ai/sdk/streaming'\nimport { sanitizeAnthropic } from '../sanitization'\n\ninterface ToolInProgress {\n block: FormattedFunctionCall\n inputString: string\n}\n\ninterface MonitoringAnthropicConfig {\n apiKey: string\n posthog: PostHog\n baseURL?: string\n}\n\nexport class PostHogAnthropic extends AnthropicOriginal {\n private readonly phClient: PostHog\n public messages: WrappedMessages\n\n constructor(config: MonitoringAnthropicConfig) {\n const { posthog, ...anthropicConfig } = config\n super(anthropicConfig)\n this.phClient = posthog\n this.messages = new WrappedMessages(this, this.phClient)\n }\n}\n\nexport class WrappedMessages extends AnthropicOriginal.Messages {\n private readonly phClient: PostHog\n private readonly baseURL: string\n\n constructor(parentClient: PostHogAnthropic, phClient: PostHog) {\n super(parentClient)\n this.phClient = phClient\n this.baseURL = parentClient.baseURL\n }\n\n public create(body: MessageCreateParamsNonStreaming, options?: RequestOptions): APIPromise<Message>\n public create(\n body: MessageCreateParamsStreaming & MonitoringParams,\n options?: RequestOptions\n ): APIPromise<Stream<RawMessageStreamEvent>>\n public create(\n body: MessageCreateParamsBase & MonitoringParams,\n options?: RequestOptions\n ): APIPromise<Stream<RawMessageStreamEvent> | Message>\n public create(\n body: MessageCreateParams & MonitoringParams,\n options?: RequestOptions\n ): APIPromise<Message> | APIPromise<Stream<RawMessageStreamEvent>> {\n const { providerParams: anthropicParams, posthogParams } = extractPosthogParams(body)\n const startTime = Date.now()\n\n const parentPromise = super.create(anthropicParams, options)\n\n if (anthropicParams.stream) {\n return parentPromise.then((value) => {\n let accumulatedContent = ''\n const contentBlocks: FormattedContentItem[] = []\n const toolsInProgress: Map<string, ToolInProgress> = new Map()\n let currentTextBlock: FormattedTextContent | null = null\n let firstTokenTime: number | undefined\n let stopReason: string | undefined\n\n const usage: {\n inputTokens: number\n outputTokens: number\n cacheCreationInputTokens?: number\n cacheReadInputTokens?: number\n webSearchCount?: number\n rawUsage?: unknown\n } = {\n inputTokens: 0,\n outputTokens: 0,\n cacheCreationInputTokens: 0,\n cacheReadInputTokens: 0,\n webSearchCount: 0,\n }\n let lastRawUsage: unknown\n if ('tee' in value) {\n const [stream1, stream2] = value.tee()\n ;(async () => {\n try {\n for await (const chunk of stream1) {\n // Handle content block start events\n if (chunk.type === 'content_block_start') {\n if (chunk.content_block?.type === 'text') {\n currentTextBlock = {\n type: 'text',\n text: '',\n }\n\n contentBlocks.push(currentTextBlock)\n } else if (chunk.content_block?.type === 'tool_use') {\n if (firstTokenTime === undefined) {\n firstTokenTime = Date.now()\n }\n\n const toolBlock: FormattedFunctionCall = {\n type: 'function',\n id: chunk.content_block.id,\n function: {\n name: chunk.content_block.name,\n arguments: {},\n },\n }\n\n contentBlocks.push(toolBlock)\n\n toolsInProgress.set(chunk.content_block.id, {\n block: toolBlock,\n inputString: '',\n })\n\n currentTextBlock = null\n }\n }\n\n // Handle text delta events\n if ('delta' in chunk) {\n if ('text' in chunk.delta) {\n const delta = chunk.delta.text\n\n if (firstTokenTime === undefined) {\n firstTokenTime = Date.now()\n }\n\n accumulatedContent += delta\n\n if (currentTextBlock) {\n currentTextBlock.text += delta\n }\n }\n }\n\n // Handle tool input delta events\n if (chunk.type === 'content_block_delta' && chunk.delta?.type === 'input_json_delta') {\n const block = chunk.index !== undefined ? contentBlocks[chunk.index] : undefined\n const toolId = block?.type === 'function' ? block.id : undefined\n\n if (toolId && toolsInProgress.has(toolId)) {\n const tool = toolsInProgress.get(toolId)\n if (tool) {\n tool.inputString += chunk.delta.partial_json || ''\n }\n }\n }\n\n // Handle content block stop events\n if (chunk.type === 'content_block_stop') {\n currentTextBlock = null\n\n // Parse accumulated tool input\n if (chunk.index !== undefined) {\n const block = contentBlocks[chunk.index]\n\n if (block?.type === 'function' && block.id && toolsInProgress.has(block.id)) {\n const tool = toolsInProgress.get(block.id)\n if (tool) {\n try {\n block.function.arguments = JSON.parse(tool.inputString)\n } catch (e) {\n // Keep empty object if parsing fails\n console.error('Error parsing tool input:', e)\n }\n }\n toolsInProgress.delete(block.id)\n }\n }\n }\n\n if (chunk.type == 'message_start') {\n lastRawUsage = chunk.message.usage\n usage.inputTokens = chunk.message.usage.input_tokens ?? 0\n usage.cacheCreationInputTokens = chunk.message.usage.cache_creation_input_tokens ?? 0\n usage.cacheReadInputTokens = chunk.message.usage.cache_read_input_tokens ?? 0\n usage.webSearchCount = chunk.message.usage.server_tool_use?.web_search_requests ?? 0\n }\n if ('usage' in chunk) {\n lastRawUsage = chunk.usage\n usage.outputTokens = chunk.usage.output_tokens ?? 0\n // Update web search count if present in delta\n if (chunk.usage.server_tool_use?.web_search_requests !== undefined) {\n usage.webSearchCount = chunk.usage.server_tool_use.web_search_requests\n }\n }\n\n if (chunk.type === 'message_delta' && 'delta' in chunk) {\n const delta = chunk.delta\n if ('stop_reason' in delta && typeof delta.stop_reason === 'string' && delta.stop_reason) {\n stopReason = delta.stop_reason\n }\n }\n }\n usage.rawUsage = lastRawUsage\n\n const latency = (Date.now() - startTime) / 1000\n const timeToFirstToken = firstTokenTime !== undefined ? (firstTokenTime - startTime) / 1000 : undefined\n\n const availableTools = extractAvailableToolCalls('anthropic', anthropicParams)\n\n // Format output to match non-streaming version\n const formattedOutput: FormattedMessage[] =\n contentBlocks.length > 0\n ? [\n {\n role: 'assistant',\n content: contentBlocks,\n },\n ]\n : [\n {\n role: 'assistant',\n content: [{ type: 'text', text: accumulatedContent }],\n },\n ]\n\n await captureAiGeneration(this.phClient, {\n ...posthogParams,\n model: anthropicParams.model,\n provider: 'anthropic',\n input: sanitizeAnthropic(mergeSystemPrompt(anthropicParams, 'anthropic')),\n output: formattedOutput,\n latency,\n timeToFirstToken,\n baseURL: this.baseURL,\n modelParameters: getModelParams(body),\n httpStatus: 200,\n usage,\n stopReason,\n tools: availableTools,\n })\n } catch (error: unknown) {\n await captureAiGeneration(this.phClient, {\n ...posthogParams,\n model: anthropicParams.model,\n provider: 'anthropic',\n input: sanitizeAnthropic(mergeSystemPrompt(anthropicParams, 'anthropic')),\n output: [],\n latency: 0,\n baseURL: this.baseURL,\n modelParameters: getModelParams(body),\n usage: {\n inputTokens: 0,\n outputTokens: 0,\n },\n error: error,\n })\n throw error\n }\n })()\n\n // Return the other stream to the user\n return stream2\n }\n return value\n }) as APIPromise<Stream<RawMessageStreamEvent>>\n } else {\n const wrappedPromise = parentPromise.then(\n async (result) => {\n if ('content' in result) {\n const latency = (Date.now() - startTime) / 1000\n\n const availableTools = extractAvailableToolCalls('anthropic', anthropicParams)\n\n await captureAiGeneration(this.phClient, {\n ...posthogParams,\n model: anthropicParams.model,\n provider: 'anthropic',\n input: sanitizeAnthropic(mergeSystemPrompt(anthropicParams, 'anthropic')),\n output: formatResponseAnthropic(result),\n latency,\n baseURL: this.baseURL,\n modelParameters: getModelParams(body),\n httpStatus: 200,\n usage: {\n inputTokens: result.usage.input_tokens ?? 0,\n outputTokens: result.usage.output_tokens ?? 0,\n cacheCreationInputTokens: result.usage.cache_creation_input_tokens ?? 0,\n cacheReadInputTokens: result.usage.cache_read_input_tokens ?? 0,\n webSearchCount: result.usage.server_tool_use?.web_search_requests ?? 0,\n rawUsage: result.usage,\n },\n stopReason: result.stop_reason ?? undefined,\n tools: availableTools,\n })\n }\n return result\n },\n async (error: any) => {\n await captureAiGeneration(this.phClient, {\n ...posthogParams,\n model: anthropicParams.model,\n provider: 'anthropic',\n input: sanitizeAnthropic(mergeSystemPrompt(anthropicParams, 'anthropic')),\n output: [],\n latency: 0,\n baseURL: this.baseURL,\n modelParameters: getModelParams(body),\n httpStatus: error?.status ? error.status : 500,\n usage: {\n inputTokens: 0,\n outputTokens: 0,\n },\n error: error,\n })\n throw error\n }\n ) as APIPromise<Message>\n\n return wrappedPromise\n }\n }\n}\n\nexport default PostHogAnthropic\n\nexport { PostHogAnthropic as Anthropic }\n"],"names":["DATA_URL_PREFIX_RE","BASE64_ALPHABET_RE","Base64Recognizer","recognize","value","minLength","dataUrl","exec","kind","mediaType","length","confidencePrefix","slice","test","MIME_HINT_KEYS","STRONG_CONTEXT_KEYS","Set","STRONG_CONTEXT_TY