UNPKG

@posthog/ai

Version:
1 lines 76.4 kB
{"version":3,"file":"index.cjs","sources":["../../src/typeGuards.ts","../../src/sanitization.ts","../../src/utils.ts","../../src/gemini/index.ts"],"sourcesContent":["// Type guards for safer type checking\n\nexport const isString = (value: unknown): value is string => {\n return typeof value === 'string'\n}\n\nexport const isObject = (value: unknown): value is Record<string, unknown> => {\n return value !== null && typeof value === 'object' && !Array.isArray(value)\n}\n","import { isString, isObject } from './typeGuards'\n\nconst REDACTED_IMAGE_PLACEHOLDER = '[base64 image redacted]'\n\n// ============================================\n// Multimodal Feature Toggle\n// ============================================\n\nconst isMultimodalEnabled = (): boolean => {\n const val = process.env._INTERNAL_LLMA_MULTIMODAL || ''\n return val.toLowerCase() === 'true' || val === '1' || val.toLowerCase() === 'yes'\n}\n\n// ============================================\n// Base64 Detection Helpers\n// ============================================\n\nconst isBase64DataUrl = (str: string): boolean => {\n return /^data:([^;]+);base64,/.test(str)\n}\n\nconst isValidUrl = (str: string): boolean => {\n try {\n new URL(str)\n return true\n } catch {\n // Not an absolute URL, check if it's a relative URL or path\n return str.startsWith('/') || str.startsWith('./') || str.startsWith('../')\n }\n}\n\nconst isRawBase64 = (str: string): boolean => {\n // Skip if it's a valid URL or path\n if (isValidUrl(str)) {\n return false\n }\n\n // Check if it's a valid base64 string\n // Base64 images are typically at least a few hundred chars, but we'll be conservative\n return str.length > 20 && /^[A-Za-z0-9+/]+=*$/.test(str)\n}\n\nexport function redactBase64DataUrl(str: string): string\nexport function redactBase64DataUrl(str: unknown): unknown\nexport function redactBase64DataUrl(str: unknown): unknown {\n if (isMultimodalEnabled()) return str\n if (!isString(str)) return str\n\n // Check for data URL format\n if (isBase64DataUrl(str)) {\n return REDACTED_IMAGE_PLACEHOLDER\n }\n\n // Check for raw base64 (Vercel sends raw base64 for inline images)\n if (isRawBase64(str)) {\n return REDACTED_IMAGE_PLACEHOLDER\n }\n\n return str\n}\n\n// ============================================\n// Common Message Processing\n// ============================================\n\ntype ContentTransformer = (item: unknown) => unknown\n\nconst processMessages = (messages: unknown, transformContent: ContentTransformer): unknown => {\n if (!messages) return messages\n\n const processContent = (content: unknown): unknown => {\n if (typeof content === 'string') return content\n\n if (!content) return content\n\n if (Array.isArray(content)) {\n return content.map(transformContent)\n }\n\n // Handle single object content\n return transformContent(content)\n }\n\n const processMessage = (msg: unknown): unknown => {\n if (!isObject(msg) || !('content' in msg)) return msg\n return { ...msg, content: processContent(msg.content) }\n }\n\n // Handle both arrays and single messages\n if (Array.isArray(messages)) {\n return messages.map(processMessage)\n }\n\n return processMessage(messages)\n}\n\n// ============================================\n// Provider-Specific Image Sanitizers\n// ============================================\n\nconst sanitizeOpenAIImage = (item: unknown): unknown => {\n if (!isObject(item)) return item\n\n // Handle image_url format\n if (item.type === 'image_url' && 'image_url' in item && isObject(item.image_url) && 'url' in item.image_url) {\n return {\n ...item,\n image_url: {\n ...item.image_url,\n url: redactBase64DataUrl(item.image_url.url),\n },\n }\n }\n\n // Handle audio format\n if (item.type === 'audio' && 'data' in item) {\n if (isMultimodalEnabled()) return item\n return { ...item, data: REDACTED_IMAGE_PLACEHOLDER }\n }\n\n return item\n}\n\nconst sanitizeOpenAIResponseImage = (item: unknown): unknown => {\n if (!isObject(item)) return item\n\n // Handle input_image format\n if (item.type === 'input_image' && 'image_url' in item) {\n return {\n ...item,\n image_url: redactBase64DataUrl(item.image_url),\n }\n }\n\n return item\n}\n\nconst sanitizeAnthropicImage = (item: unknown): unknown => {\n if (isMultimodalEnabled()) return item\n if (!isObject(item)) return item\n\n // Handle Anthropic's image and document formats (same structure, different type field)\n if (\n (item.type === 'image' || item.type === 'document') &&\n 'source' in item &&\n isObject(item.source) &&\n item.source.type === 'base64' &&\n 'data' in item.source\n ) {\n return {\n ...item,\n source: {\n ...item.source,\n data: REDACTED_IMAGE_PLACEHOLDER,\n },\n }\n }\n\n return item\n}\n\nconst sanitizeGeminiPart = (part: unknown): unknown => {\n if (isMultimodalEnabled()) return part\n if (!isObject(part)) return part\n\n // Handle Gemini's inline data format (images, audio, PDFs all use inlineData)\n if ('inlineData' in part && isObject(part.inlineData) && 'data' in part.inlineData) {\n return {\n ...part,\n inlineData: {\n ...part.inlineData,\n data: REDACTED_IMAGE_PLACEHOLDER,\n },\n }\n }\n\n return part\n}\n\nconst processGeminiItem = (item: unknown): unknown => {\n if (!isObject(item)) return item\n\n // If it has parts, process them\n if ('parts' in item && item.parts) {\n const parts = Array.isArray(item.parts) ? item.parts.map(sanitizeGeminiPart) : sanitizeGeminiPart(item.parts)\n\n return { ...item, parts }\n }\n\n return item\n}\n\nconst sanitizeLangChainImage = (item: unknown): unknown => {\n if (!isObject(item)) return item\n\n // OpenAI style\n if (item.type === 'image_url' && 'image_url' in item && isObject(item.image_url) && 'url' in item.image_url) {\n return {\n ...item,\n image_url: {\n ...item.image_url,\n url: redactBase64DataUrl(item.image_url.url),\n },\n }\n }\n\n // Direct image with data field\n if (item.type === 'image' && 'data' in item) {\n return { ...item, data: redactBase64DataUrl(item.data) }\n }\n\n // Anthropic style\n if (item.type === 'image' && 'source' in item && isObject(item.source) && 'data' in item.source) {\n if (isMultimodalEnabled()) return item\n return {\n ...item,\n source: {\n ...item.source,\n data: redactBase64DataUrl(item.source.data),\n },\n }\n }\n\n // Google style\n if (item.type === 'media' && 'data' in item) {\n return { ...item, data: redactBase64DataUrl(item.data) }\n }\n\n return item\n}\n\n// Export individual sanitizers for tree-shaking\nexport const sanitizeOpenAI = (data: unknown): unknown => {\n return processMessages(data, sanitizeOpenAIImage)\n}\n\nexport const sanitizeOpenAIResponse = (data: unknown): unknown => {\n return processMessages(data, sanitizeOpenAIResponseImage)\n}\n\nexport const sanitizeAnthropic = (data: unknown): unknown => {\n return processMessages(data, sanitizeAnthropicImage)\n}\n\nexport const sanitizeGemini = (data: unknown): unknown => {\n // Gemini has a different structure with 'parts' directly on items instead of 'content'\n // So we need custom processing instead of using processMessages\n if (!data) return data\n\n if (Array.isArray(data)) {\n return data.map(processGeminiItem)\n }\n\n return processGeminiItem(data)\n}\n\nexport const sanitizeLangChain = (data: unknown): unknown => {\n return processMessages(data, sanitizeLangChainImage)\n}\n","import { EventMessage, PostHog } from 'posthog-node'\nimport OpenAIOrignal from 'openai'\nimport AnthropicOriginal from '@anthropic-ai/sdk'\nimport type { ChatCompletionTool } from 'openai/resources/chat/completions'\nimport type { ResponseCreateParamsWithTools } from 'openai/lib/ResponsesParser'\nimport type { Tool as GeminiTool } from '@google/genai'\nimport type { FormattedMessage, FormattedContent, TokenUsage } from './types'\nimport { version } from '../package.json'\nimport { v4 as uuidv4 } from 'uuid'\nimport { isString } from './typeGuards'\nimport { uuidv7, ErrorTracking as CoreErrorTracking } from '@posthog/core'\nimport { redactBase64DataUrl } from './sanitization'\n\ntype ChatCompletionCreateParamsBase = OpenAIOrignal.Chat.Completions.ChatCompletionCreateParams\ntype MessageCreateParams = AnthropicOriginal.Messages.MessageCreateParams\ntype ResponseCreateParams = OpenAIOrignal.Responses.ResponseCreateParams\ntype EmbeddingCreateParams = OpenAIOrignal.EmbeddingCreateParams\ntype TranscriptionCreateParams = OpenAIOrignal.Audio.Transcriptions.TranscriptionCreateParams\ntype AnthropicTool = AnthropicOriginal.Tool\n\n// limit large outputs by truncating to 200kb (approx 200k bytes)\nexport const MAX_OUTPUT_SIZE = 200000\nconst STRING_FORMAT = 'utf8'\n\n/**\n * Safely converts content to a string, preserving structure for objects/arrays.\n * - If content is already a string, returns it as-is\n * - If content is an object or array, stringifies it with JSON.stringify to preserve structure\n * - Otherwise, converts to string with String()\n *\n * This prevents the \"[object Object]\" bug when objects are naively converted to strings.\n *\n * @param content - The content to convert to a string\n * @returns A string representation that preserves structure for complex types\n */\nexport function toContentString(content: unknown): string {\n if (typeof content === 'string') {\n return content\n }\n if (content !== undefined && content !== null && typeof content === 'object') {\n try {\n return JSON.stringify(content)\n } catch {\n // Fallback for circular refs, BigInt, or objects with throwing toJSON\n return String(content)\n }\n }\n return String(content)\n}\n\nexport interface MonitoringEventPropertiesWithDefaults {\n distinctId?: string\n traceId: string\n properties?: Record<string, any>\n privacyMode: boolean\n groups?: Record<string, any>\n modelOverride?: string\n providerOverride?: string\n costOverride?: CostOverride\n captureImmediate?: boolean\n}\n\nexport type MonitoringEventProperties = Partial<MonitoringEventPropertiesWithDefaults>\n\nexport type MonitoringParams = {\n [K in keyof MonitoringEventProperties as `posthog${Capitalize<string & K>}`]: MonitoringEventProperties[K]\n}\n\nexport interface CostOverride {\n inputCost: number\n outputCost: number\n}\n\nexport const getModelParams = (\n params:\n | ((\n | ChatCompletionCreateParamsBase\n | MessageCreateParams\n | ResponseCreateParams\n | ResponseCreateParamsWithTools\n | EmbeddingCreateParams\n | TranscriptionCreateParams\n ) &\n MonitoringParams)\n | null\n): Record<string, any> => {\n if (!params) {\n return {}\n }\n const modelParams: Record<string, any> = {}\n const paramKeys = [\n 'temperature',\n 'max_tokens',\n 'max_completion_tokens',\n 'top_p',\n 'frequency_penalty',\n 'presence_penalty',\n 'n',\n 'stop',\n 'stream',\n 'streaming',\n 'language',\n 'response_format',\n 'timestamp_granularities',\n ] as const\n\n for (const key of paramKeys) {\n if (key in params && (params as any)[key] !== undefined) {\n modelParams[key] = (params as any)[key]\n }\n }\n return modelParams\n}\n\n/**\n * Helper to format responses (non-streaming) for consumption\n */\nexport const formatResponse = (response: any, provider: string): FormattedMessage[] => {\n if (!response) {\n return []\n }\n if (provider === 'anthropic') {\n return formatResponseAnthropic(response)\n } else if (provider === 'openai') {\n return formatResponseOpenAI(response)\n } else if (provider === 'gemini') {\n return formatResponseGemini(response)\n }\n return []\n}\n\nexport const formatResponseAnthropic = (response: any): FormattedMessage[] => {\n const output: FormattedMessage[] = []\n const content: FormattedContent = []\n\n for (const choice of response.content ?? []) {\n if (choice?.type === 'text' && choice?.text) {\n content.push({ type: 'text', text: choice.text })\n } else if (choice?.type === 'tool_use' && choice?.name && choice?.id) {\n content.push({\n type: 'function',\n id: choice.id,\n function: {\n name: choice.name,\n arguments: choice.input || {},\n },\n })\n }\n }\n\n if (content.length > 0) {\n output.push({\n role: 'assistant',\n content,\n })\n }\n\n return output\n}\n\nexport const formatResponseOpenAI = (response: any): FormattedMessage[] => {\n const output: FormattedMessage[] = []\n\n if (response.choices) {\n for (const choice of response.choices) {\n const content: FormattedContent = []\n let role = 'assistant'\n\n if (choice.message) {\n if (choice.message.role) {\n role = choice.message.role\n }\n\n if (choice.message.content) {\n content.push({ type: 'text', text: choice.message.content })\n }\n\n if (choice.message.tool_calls) {\n for (const toolCall of choice.message.tool_calls) {\n content.push({\n type: 'function',\n id: toolCall.id,\n function: {\n name: toolCall.function.name,\n arguments: toolCall.function.arguments,\n },\n })\n }\n }\n\n // Handle audio output (gpt-4o-audio-preview)\n if (choice.message.audio) {\n content.push({\n type: 'audio',\n ...choice.message.audio,\n })\n }\n }\n\n if (content.length > 0) {\n output.push({\n role,\n content,\n })\n }\n }\n }\n\n // Handle Responses API format\n if (response.output) {\n const content: FormattedContent = []\n let role = 'assistant'\n\n for (const item of response.output) {\n if (item.type === 'message') {\n role = item.role\n\n if (item.content && Array.isArray(item.content)) {\n for (const contentItem of item.content) {\n if (contentItem.type === 'output_text' && contentItem.text) {\n content.push({ type: 'text', text: contentItem.text })\n } else if (contentItem.text) {\n content.push({ type: 'text', text: contentItem.text })\n } else if (contentItem.type === 'input_image' && contentItem.image_url) {\n content.push({\n type: 'image',\n image: contentItem.image_url,\n })\n }\n }\n } else if (item.content) {\n content.push({ type: 'text', text: String(item.content) })\n }\n } else if (item.type === 'function_call') {\n content.push({\n type: 'function',\n id: item.call_id || item.id || '',\n function: {\n name: item.name,\n arguments: item.arguments || {},\n },\n })\n }\n }\n\n if (content.length > 0) {\n output.push({\n role,\n content,\n })\n }\n }\n\n return output\n}\n\nexport const formatResponseGemini = (response: any): FormattedMessage[] => {\n const output: FormattedMessage[] = []\n\n if (response.candidates && Array.isArray(response.candidates)) {\n for (const candidate of response.candidates) {\n if (candidate.content && candidate.content.parts) {\n const content: FormattedContent = []\n\n for (const part of candidate.content.parts) {\n if (part.text) {\n content.push({ type: 'text', text: part.text })\n } else if (part.functionCall) {\n content.push({\n type: 'function',\n function: {\n name: part.functionCall.name,\n arguments: part.functionCall.args,\n },\n })\n } else if (part.inlineData) {\n // Handle audio/media inline data\n const mimeType = part.inlineData.mimeType || 'audio/pcm'\n let data = part.inlineData.data\n\n // Handle binary data (Uint8Array/Buffer -> base64)\n if (data instanceof Uint8Array) {\n if (typeof Buffer !== 'undefined') {\n data = Buffer.from(data).toString('base64')\n } else {\n let binary = ''\n for (let i = 0; i < data.length; i++) {\n binary += String.fromCharCode(data[i])\n }\n data = btoa(binary)\n }\n }\n\n // Sanitize base64 data for images and other large inline data\n data = redactBase64DataUrl(data)\n\n content.push({\n type: 'audio',\n mime_type: mimeType,\n data: data,\n })\n }\n }\n\n if (content.length > 0) {\n output.push({\n role: 'assistant',\n content,\n })\n }\n } else if (candidate.text) {\n output.push({\n role: 'assistant',\n content: [{ type: 'text', text: candidate.text }],\n })\n }\n }\n } else if (response.text) {\n output.push({\n role: 'assistant',\n content: [{ type: 'text', text: response.text }],\n })\n }\n\n return output\n}\n\nexport const mergeSystemPrompt = (params: MessageCreateParams & MonitoringParams, provider: string): any => {\n if (provider == 'anthropic') {\n const messages = params.messages || []\n if (!(params as any).system) {\n return messages\n }\n const systemMessage = (params as any).system\n return [{ role: 'system', content: systemMessage }, ...messages]\n }\n return params.messages\n}\n\nexport const withPrivacyMode = (client: PostHog, privacyMode: boolean, input: any): any => {\n return (client as any).privacy_mode || privacyMode ? null : input\n}\n\nfunction toSafeString(input: unknown): string {\n if (input === undefined || input === null) {\n return ''\n }\n if (typeof input === 'string') {\n return input\n }\n try {\n return JSON.stringify(input)\n } catch {\n console.warn('Failed to stringify input', input)\n return ''\n }\n}\n\nexport const truncate = (input: unknown): string => {\n const str = toSafeString(input)\n if (str === '') {\n return ''\n }\n\n // Check if we need to truncate and ensure STRING_FORMAT is respected\n const encoder = new TextEncoder()\n const buffer = encoder.encode(str)\n if (buffer.length <= MAX_OUTPUT_SIZE) {\n // Ensure STRING_FORMAT is respected\n return new TextDecoder(STRING_FORMAT).decode(buffer)\n }\n\n // Truncate the buffer and ensure a valid string is returned\n const truncatedBuffer = buffer.slice(0, MAX_OUTPUT_SIZE)\n // fatal: false means we get U+FFFD at the end if truncation broke the encoding\n const decoder = new TextDecoder(STRING_FORMAT, { fatal: false })\n let truncatedStr = decoder.decode(truncatedBuffer)\n if (truncatedStr.endsWith('\\uFFFD')) {\n truncatedStr = truncatedStr.slice(0, -1)\n }\n return `${truncatedStr}... [truncated]`\n}\n\n/**\n * Calculate web search count from raw API response.\n *\n * Uses a two-tier detection strategy:\n * Priority 1 (Exact Count): Count actual web search calls when available\n * Priority 2 (Binary Detection): Return 1 if web search indicators are present, 0 otherwise\n *\n * @param result - Raw API response from any provider (OpenAI, Perplexity, OpenRouter, Gemini, etc.)\n * @returns Number of web searches performed (exact count or binary 1/0)\n */\nexport function calculateWebSearchCount(result: unknown): number {\n if (!result || typeof result !== 'object') {\n return 0\n }\n\n // Priority 1: Exact Count\n // Check for OpenAI Responses API web_search_call items\n if ('output' in result && Array.isArray(result.output)) {\n let count = 0\n\n for (const item of result.output) {\n if (typeof item === 'object' && item !== null && 'type' in item && item.type === 'web_search_call') {\n count++\n }\n }\n\n if (count > 0) {\n return count\n }\n }\n\n // Priority 2: Binary Detection (1 or 0)\n\n // Check for citations at root level (Perplexity)\n if ('citations' in result && Array.isArray(result.citations) && result.citations.length > 0) {\n return 1\n }\n\n // Check for search_results at root level (Perplexity via OpenRouter)\n if ('search_results' in result && Array.isArray(result.search_results) && result.search_results.length > 0) {\n return 1\n }\n\n // Check for usage.search_context_size (Perplexity via OpenRouter)\n if ('usage' in result && typeof result.usage === 'object' && result.usage !== null) {\n if ('search_context_size' in result.usage && result.usage.search_context_size) {\n return 1\n }\n }\n\n // Check for annotations with url_citation in choices[].message or choices[].delta (OpenAI/Perplexity)\n if ('choices' in result && Array.isArray(result.choices)) {\n for (const choice of result.choices) {\n if (typeof choice === 'object' && choice !== null) {\n // Check both message (non-streaming) and delta (streaming) for annotations\n const content = ('message' in choice ? choice.message : null) || ('delta' in choice ? choice.delta : null)\n\n if (typeof content === 'object' && content !== null && 'annotations' in content) {\n const annotations = content.annotations\n\n if (Array.isArray(annotations)) {\n const hasUrlCitation = annotations.some((ann: unknown) => {\n return typeof ann === 'object' && ann !== null && 'type' in ann && ann.type === 'url_citation'\n })\n\n if (hasUrlCitation) {\n return 1\n }\n }\n }\n }\n }\n }\n\n // Check for annotations in output[].content[] (OpenAI Responses API)\n if ('output' in result && Array.isArray(result.output)) {\n for (const item of result.output) {\n if (typeof item === 'object' && item !== null && 'content' in item) {\n const content = item.content\n\n if (Array.isArray(content)) {\n for (const contentItem of content) {\n if (typeof contentItem === 'object' && contentItem !== null && 'annotations' in contentItem) {\n const annotations = contentItem.annotations\n\n if (Array.isArray(annotations)) {\n const hasUrlCitation = annotations.some((ann: unknown) => {\n return typeof ann === 'object' && ann !== null && 'type' in ann && ann.type === 'url_citation'\n })\n\n if (hasUrlCitation) {\n return 1\n }\n }\n }\n }\n }\n }\n }\n }\n\n // Check for grounding_metadata (Gemini)\n if ('candidates' in result && Array.isArray(result.candidates)) {\n for (const candidate of result.candidates) {\n if (\n typeof candidate === 'object' &&\n candidate !== null &&\n 'grounding_metadata' in candidate &&\n candidate.grounding_metadata\n ) {\n return 1\n }\n }\n }\n\n return 0\n}\n\n/**\n * Extract available tool calls from the request parameters.\n * These are the tools provided to the LLM, not the tool calls in the response.\n */\nexport const extractAvailableToolCalls = (\n provider: string,\n params: any\n): ChatCompletionTool[] | AnthropicTool[] | GeminiTool[] | null => {\n if (provider === 'anthropic') {\n if (params.tools) {\n return params.tools\n }\n\n return null\n } else if (provider === 'gemini') {\n if (params.config && params.config.tools) {\n return params.config.tools\n }\n\n return null\n } else if (provider === 'openai') {\n if (params.tools) {\n return params.tools\n }\n\n return null\n } else if (provider === 'vercel') {\n if (params.tools) {\n return params.tools\n }\n\n return null\n }\n\n return null\n}\n\nexport enum AIEvent {\n Generation = '$ai_generation',\n Embedding = '$ai_embedding',\n}\n\nexport type SendEventToPosthogParams = {\n client: PostHog\n eventType?: AIEvent\n distinctId?: string\n traceId: string\n model?: string\n provider: string\n input: any\n output: any\n latency: number\n timeToFirstToken?: number\n baseURL: string\n httpStatus: number\n usage?: TokenUsage\n params: (\n | ChatCompletionCreateParamsBase\n | MessageCreateParams\n | ResponseCreateParams\n | ResponseCreateParamsWithTools\n | EmbeddingCreateParams\n | TranscriptionCreateParams\n ) &\n MonitoringParams\n error?: unknown\n exceptionId?: string\n tools?: ChatCompletionTool[] | AnthropicTool[] | GeminiTool[] | null\n captureImmediate?: boolean\n}\n\nfunction sanitizeValues(obj: any): any {\n if (obj === undefined || obj === null) {\n return obj\n }\n const jsonSafe = JSON.parse(JSON.stringify(obj))\n if (typeof jsonSafe === 'string') {\n // Sanitize lone surrogates by round-tripping through UTF-8\n return new TextDecoder().decode(new TextEncoder().encode(jsonSafe))\n } else if (Array.isArray(jsonSafe)) {\n return jsonSafe.map(sanitizeValues)\n } else if (jsonSafe && typeof jsonSafe === 'object') {\n return Object.fromEntries(Object.entries(jsonSafe).map(([k, v]) => [k, sanitizeValues(v)]))\n }\n return jsonSafe\n}\n\nconst POSTHOG_PARAMS_MAP: Record<keyof MonitoringParams, string> = {\n posthogDistinctId: 'distinctId',\n posthogTraceId: 'traceId',\n posthogProperties: 'properties',\n posthogPrivacyMode: 'privacyMode',\n posthogGroups: 'groups',\n posthogModelOverride: 'modelOverride',\n posthogProviderOverride: 'providerOverride',\n posthogCostOverride: 'costOverride',\n posthogCaptureImmediate: 'captureImmediate',\n}\n\nexport function extractPosthogParams<T>(body: T & MonitoringParams): {\n providerParams: T\n posthogParams: MonitoringEventPropertiesWithDefaults\n} {\n const providerParams: Record<string, unknown> = {}\n const posthogParams: Record<string, unknown> = {}\n\n for (const [key, value] of Object.entries(body)) {\n if (POSTHOG_PARAMS_MAP[key as keyof MonitoringParams]) {\n posthogParams[POSTHOG_PARAMS_MAP[key as keyof MonitoringParams]] = value\n } else if (key.startsWith('posthog')) {\n console.warn(`Unknown Posthog parameter ${key}`)\n } else {\n providerParams[key] = value\n }\n }\n\n return {\n providerParams: providerParams as T,\n posthogParams: addDefaults(posthogParams),\n }\n}\n\nfunction addDefaults(params: MonitoringEventProperties): MonitoringEventPropertiesWithDefaults {\n return {\n ...params,\n privacyMode: params.privacyMode ?? false,\n traceId: params.traceId ?? uuidv4(),\n }\n}\n\nexport const sendEventWithErrorToPosthog = async ({\n client,\n traceId,\n error,\n ...args\n}: Omit<SendEventToPosthogParams, 'error' | 'httpStatus'> &\n Required<Pick<SendEventToPosthogParams, 'error'>>): Promise<unknown> => {\n const httpStatus =\n error && typeof error === 'object' && 'status' in error ? ((error as { status?: number }).status ?? 500) : 500\n\n const properties = { client, traceId, httpStatus, error: JSON.stringify(error), ...args }\n const enrichedError = error as CoreErrorTracking.PreviouslyCapturedError\n\n if (client.options?.enableExceptionAutocapture) {\n // assign a uuid that can be used to link the trace and exception events\n const exceptionId = uuidv7()\n client.captureException(error, undefined, { $ai_trace_id: traceId }, exceptionId)\n enrichedError.__posthog_previously_captured_error = true\n properties.exceptionId = exceptionId\n }\n\n await sendEventToPosthog(properties)\n\n return enrichedError\n}\n\nexport const sendEventToPosthog = async ({\n client,\n eventType = AIEvent.Generation,\n distinctId,\n traceId,\n model,\n provider,\n input,\n output,\n latency,\n timeToFirstToken,\n baseURL,\n params,\n httpStatus = 200,\n usage = {},\n error,\n exceptionId,\n tools,\n captureImmediate = false,\n}: SendEventToPosthogParams): Promise<void> => {\n if (!client.capture) {\n return Promise.resolve()\n }\n // sanitize input and output for UTF-8 validity\n const safeInput = sanitizeValues(input)\n const safeOutput = sanitizeValues(output)\n const safeError = sanitizeValues(error)\n\n let errorData = {}\n if (error) {\n errorData = {\n $ai_is_error: true,\n $ai_error: safeError,\n $exception_event_id: exceptionId,\n }\n }\n let costOverrideData = {}\n if (params.posthogCostOverride) {\n const inputCostUSD = (params.posthogCostOverride.inputCost ?? 0) * (usage.inputTokens ?? 0)\n const outputCostUSD = (params.posthogCostOverride.outputCost ?? 0) * (usage.outputTokens ?? 0)\n costOverrideData = {\n $ai_input_cost_usd: inputCostUSD,\n $ai_output_cost_usd: outputCostUSD,\n $ai_total_cost_usd: inputCostUSD + outputCostUSD,\n }\n }\n\n const additionalTokenValues = {\n ...(usage.reasoningTokens ? { $ai_reasoning_tokens: usage.reasoningTokens } : {}),\n ...(usage.cacheReadInputTokens ? { $ai_cache_read_input_tokens: usage.cacheReadInputTokens } : {}),\n ...(usage.cacheCreationInputTokens ? { $ai_cache_creation_input_tokens: usage.cacheCreationInputTokens } : {}),\n ...(usage.webSearchCount ? { $ai_web_search_count: usage.webSearchCount } : {}),\n ...(usage.rawUsage ? { $ai_usage: usage.rawUsage } : {}),\n }\n\n const properties = {\n $ai_lib: 'posthog-ai',\n $ai_lib_version: version,\n $ai_provider: params.posthogProviderOverride ?? provider,\n $ai_model: params.posthogModelOverride ?? model,\n $ai_model_parameters: getModelParams(params),\n $ai_input: withPrivacyMode(client, params.posthogPrivacyMode ?? false, safeInput),\n $ai_output_choices: withPrivacyMode(client, params.posthogPrivacyMode ?? false, safeOutput),\n $ai_http_status: httpStatus,\n $ai_input_tokens: usage.inputTokens ?? 0,\n ...(usage.outputTokens !== undefined ? { $ai_output_tokens: usage.outputTokens } : {}),\n ...additionalTokenValues,\n $ai_latency: latency,\n ...(timeToFirstToken !== undefined ? { $ai_time_to_first_token: timeToFirstToken } : {}),\n $ai_trace_id: traceId,\n $ai_base_url: baseURL,\n ...params.posthogProperties,\n ...(distinctId ? {} : { $process_person_profile: false }),\n ...(tools ? { $ai_tools: tools } : {}),\n ...errorData,\n ...costOverrideData,\n }\n\n const event: EventMessage = {\n distinctId: distinctId ?? traceId,\n event: eventType,\n properties,\n groups: params.posthogGroups,\n }\n\n if (captureImmediate) {\n // await capture promise to send single event in serverless environments\n await client.captureImmediate(event)\n } else {\n client.capture(event)\n }\n\n return Promise.resolve()\n}\n\nexport function formatOpenAIResponsesInput(input: unknown, instructions?: string | null): FormattedMessage[] {\n const messages: FormattedMessage[] = []\n\n if (instructions) {\n messages.push({\n role: 'system',\n content: instructions,\n })\n }\n\n if (Array.isArray(input)) {\n for (const item of input) {\n if (typeof item === 'string') {\n messages.push({ role: 'user', content: item })\n } else if (item && typeof item === 'object') {\n const obj = item as Record<string, unknown>\n const role = isString(obj.role) ? obj.role : 'user'\n\n // Handle content properly - preserve structure for objects/arrays\n const content = obj.content ?? obj.text ?? item\n messages.push({ role, content: toContentString(content) })\n } else {\n messages.push({ role: 'user', content: toContentString(item) })\n }\n }\n } else if (typeof input === 'string') {\n messages.push({ role: 'user', content: input })\n } else if (input) {\n messages.push({ role: 'user', content: toContentString(input) })\n }\n\n return messages\n}\n","import {\n GoogleGenAI,\n GenerateContentResponse as GeminiResponse,\n GenerateContentParameters,\n Part,\n GenerateContentResponseUsageMetadata,\n} from '@google/genai'\nimport type { GoogleGenAIOptions } from '@google/genai'\nimport { PostHog } from 'posthog-node'\nimport {\n MonitoringParams,\n sendEventToPosthog,\n extractAvailableToolCalls,\n formatResponseGemini,\n extractPosthogParams,\n toContentString,\n sendEventWithErrorToPosthog,\n} from '../utils'\nimport { sanitizeGemini } from '../sanitization'\nimport type { TokenUsage, FormattedContent, FormattedContentItem, FormattedMessage } from '../types'\nimport { isString } from '../typeGuards'\n\ninterface MonitoringGeminiConfig extends GoogleGenAIOptions {\n posthog: PostHog\n}\n\nexport class PostHogGoogleGenAI {\n private readonly phClient: PostHog\n private readonly client: GoogleGenAI\n public models: WrappedModels\n\n constructor(config: MonitoringGeminiConfig) {\n const { posthog, ...geminiConfig } = config\n this.phClient = posthog\n this.client = new GoogleGenAI(geminiConfig)\n this.models = new WrappedModels(this.client, this.phClient)\n }\n}\n\nexport class WrappedModels {\n private readonly phClient: PostHog\n private readonly client: GoogleGenAI\n\n constructor(client: GoogleGenAI, phClient: PostHog) {\n this.client = client\n this.phClient = phClient\n }\n\n public async generateContent(params: GenerateContentParameters & MonitoringParams): Promise<GeminiResponse> {\n const { providerParams: geminiParams, posthogParams } = extractPosthogParams(params)\n const startTime = Date.now()\n\n try {\n const response = await this.client.models.generateContent(geminiParams as GenerateContentParameters)\n const latency = (Date.now() - startTime) / 1000\n\n const availableTools = extractAvailableToolCalls('gemini', geminiParams)\n\n const metadata = response.usageMetadata\n await sendEventToPosthog({\n client: this.phClient,\n ...posthogParams,\n model: geminiParams.model,\n provider: 'gemini',\n input: this.formatInputForPostHog(geminiParams),\n output: formatResponseGemini(response),\n latency,\n baseURL: 'https://generativelanguage.googleapis.com',\n params: params as GenerateContentParameters & MonitoringParams,\n httpStatus: 200,\n usage: {\n inputTokens: metadata?.promptTokenCount ?? 0,\n outputTokens: metadata?.candidatesTokenCount ?? 0,\n reasoningTokens:\n (metadata as GenerateContentResponseUsageMetadata & { thoughtsTokenCount?: number })?.thoughtsTokenCount ??\n 0,\n cacheReadInputTokens: metadata?.cachedContentTokenCount ?? 0,\n webSearchCount: calculateGoogleWebSearchCount(response),\n rawUsage: metadata,\n },\n tools: availableTools,\n })\n\n return response\n } catch (error: unknown) {\n const latency = (Date.now() - startTime) / 1000\n const enrichedError = await sendEventWithErrorToPosthog({\n client: this.phClient,\n ...posthogParams,\n model: geminiParams.model,\n provider: 'gemini',\n input: this.formatInputForPostHog(geminiParams),\n output: [],\n latency,\n baseURL: 'https://generativelanguage.googleapis.com',\n params: params as GenerateContentParameters & MonitoringParams,\n usage: {\n inputTokens: 0,\n outputTokens: 0,\n },\n error: error,\n })\n throw enrichedError\n }\n }\n\n public async *generateContentStream(\n params: GenerateContentParameters & MonitoringParams\n ): AsyncGenerator<GeminiResponse, void, unknown> {\n const { providerParams: geminiParams, posthogParams } = extractPosthogParams(params)\n const startTime = Date.now()\n const accumulatedContent: FormattedContent = []\n let firstTokenTime: number | undefined\n let usage: TokenUsage = {\n inputTokens: 0,\n outputTokens: 0,\n webSearchCount: 0,\n rawUsage: undefined,\n }\n\n try {\n const stream = await this.client.models.generateContentStream(geminiParams as GenerateContentParameters)\n\n for await (const chunk of stream) {\n // Track first token time when we get text content\n if (firstTokenTime === undefined && chunk.text) {\n firstTokenTime = Date.now()\n }\n const chunkWebSearchCount = calculateGoogleWebSearchCount(chunk)\n if (chunkWebSearchCount > 0 && chunkWebSearchCount > (usage.webSearchCount ?? 0)) {\n usage.webSearchCount = chunkWebSearchCount\n }\n\n // Handle text content\n if (chunk.text) {\n // Find if we already have a text item to append to\n let lastTextItem: FormattedContentItem | undefined\n for (let i = accumulatedContent.length - 1; i >= 0; i--) {\n if (accumulatedContent[i].type === 'text') {\n lastTextItem = accumulatedContent[i]\n break\n }\n }\n\n if (lastTextItem && lastTextItem.type === 'text') {\n lastTextItem.text += chunk.text\n } else {\n accumulatedContent.push({ type: 'text', text: chunk.text })\n }\n }\n\n // Handle function calls from candidates\n if (chunk.candidates && Array.isArray(chunk.candidates)) {\n for (const candidate of chunk.candidates) {\n if (candidate.content && candidate.content.parts) {\n for (const part of candidate.content.parts) {\n // Type-safe check for functionCall\n if ('functionCall' in part) {\n if (firstTokenTime === undefined) {\n firstTokenTime = Date.now()\n }\n const funcCall = (part as Part & { functionCall?: { name?: string; args?: unknown } }).functionCall\n if (funcCall?.name) {\n accumulatedContent.push({\n type: 'function',\n function: {\n name: funcCall.name,\n arguments: funcCall.args || {},\n },\n })\n }\n }\n }\n }\n }\n }\n\n // Update usage metadata - handle both old and new field names\n if (chunk.usageMetadata) {\n const metadata = chunk.usageMetadata as GenerateContentResponseUsageMetadata\n usage = {\n inputTokens: metadata.promptTokenCount ?? 0,\n outputTokens: metadata.candidatesTokenCount ?? 0,\n reasoningTokens:\n (metadata as GenerateContentResponseUsageMetadata & { thoughtsTokenCount?: number }).thoughtsTokenCount ??\n 0,\n cacheReadInputTokens: metadata.cachedContentTokenCount ?? 0,\n webSearchCount: usage.webSearchCount,\n rawUsage: metadata,\n }\n }\n yield chunk\n }\n\n const latency = (Date.now() - startTime) / 1000\n const timeToFirstToken = firstTokenTime !== undefined ? (firstTokenTime - startTime) / 1000 : undefined\n\n const availableTools = extractAvailableToolCalls('gemini', geminiParams)\n\n // Format output similar to formatResponseGemini\n const output = accumulatedContent.length > 0 ? [{ role: 'assistant', content: accumulatedContent }] : []\n\n await sendEventToPosthog({\n client: this.phClient,\n ...posthogParams,\n model: geminiParams.model,\n provider: 'gemini',\n input: this.formatInputForPostHog(geminiParams),\n output,\n latency,\n timeToFirstToken,\n baseURL: 'https://generativelanguage.googleapis.com',\n params: params as GenerateContentParameters & MonitoringParams,\n httpStatus: 200,\n usage: {\n ...usage,\n webSearchCount: usage.webSearchCount,\n rawUsage: usage.rawUsage,\n },\n tools: availableTools,\n })\n } catch (error: unknown) {\n const latency = (Date.now() - startTime) / 1000\n const enrichedError = await sendEventWithErrorToPosthog({\n client: this.phClient,\n ...posthogParams,\n model: geminiParams.model,\n provider: 'gemini',\n input: this.formatInputForPostHog(geminiParams),\n output: [],\n latency,\n baseURL: 'https://generativelanguage.googleapis.com',\n params: params as GenerateContentParameters & MonitoringParams,\n usage: {\n inputTokens: 0,\n outputTokens: 0,\n },\n error: error,\n })\n throw enrichedError\n }\n }\n\n private formatPartsAsContentBlocks(parts: unknown[]): FormattedContent {\n const blocks: FormattedContent = []\n\n for (const part of parts) {\n // Handle dict/object with text field\n if (part && typeof part === 'object' && 'text' in part && part.text) {\n blocks.push({ type: 'text', text: String(part.text) })\n }\n // Handle string parts\n else if (typeof part === 'string') {\n blocks.push({ type: 'text', text: part })\n }\n // Handle inlineData (images, audio, PDFs)\n else if (part && typeof part === 'object' && 'inlineData' in part) {\n const inlineData = (part as any).inlineData\n const mimeType = inlineData.mimeType || inlineData.mime_type || ''\n const contentType = mimeType.startsWith('image/') ? 'image' : 'document'\n\n blocks.push({\n type: contentType,\n inline_data: {\n data: inlineData.data,\n mime_type: mimeType,\n },\n } as FormattedContentItem)\n }\n }\n\n return blocks\n }\n\n private formatInput(contents: unknown): FormattedMessage[] {\n if (typeof contents === 'string') {\n return [{ role: 'user', content: contents }]\n }\n\n if (Array.isArray(contents)) {\n return contents.map((item) => {\n if (typeof item === 'string') {\n return { role: 'user', content: item }\n }\n\n if (item && typeof item === 'object') {\n const obj = item as Record<string, unknown>\n if ('text' in obj && obj.text) {\n return { role: isString(obj.role) ? obj.role : 'user', content: obj.text }\n }\n\n if ('content' in obj && obj.content) {\n // If content is a list, format it as content blocks\n if (Array.isArray(obj.content)) {\n const contentBlocks = this.formatPartsAsContentBlocks(obj.content)\n return { role: isString(obj.role) ? obj.role : 'user', content: contentBlocks }\n }\n return { role: isString(obj.role) ? obj.role : 'user', content: obj.content }\n }\n\n if ('parts' in obj && Array.isArray(obj.parts)) {\n const contentBlocks = this.formatPartsAsContentBlocks(obj.parts)\n return {\n role: isString(obj.role) ? obj.role : 'user',\n content: contentBlocks,\n }\n }\n }\n\n return { role: 'user', content: toContentString(item) }\n })\n }\n\n if (contents && typeof contents === 'object') {\n const obj = contents as Record<string, unknown>\n if ('text' in obj && obj.text) {\n return [{ role: 'user', content: obj.text }]\n }\n\n if ('content' in obj && obj.content) {\n return [{ role: 'user', content: obj.content }]\n }\n }\n\n return [{ role: 'user', content: toContentString(contents) }]\n }\n\n private extractSystemInstruction(params: GenerateContentParameters): string | null {\n if (!params || typeof params !== 'object' || !params.config) {\n return null\n }\n const config = params.config as any\n if (!('systemInstruction' in config)) {\n return null\n }\n const systemInstruction = config.systemInstruction\n if (typeof systemInstruction === 'string') {\n return systemInstruction\n }\n if (systemInstruction && typeof systemInstruction === 'object' && 'text' in systemInstruction) {\n return systemInstruction.text\n }\n if (\n systemInstruction &&\n typeof systemInstruction === 'object' &&\n 'parts' in systemInstruction &&\n Array.isArray(systemInstruction.parts)\n ) {\n for (const part of systemInstruction.parts) {\n if (part && typeof part === 'object' && 'text' in part && typeof part.text === 'string') {\n return part.text\n }\n }\n }\n if (Array.isArray(systemInstruction)) {\n for (const part of systemInstruction) {\n if (typeof part === 'string') {\n return part\n }\n if (part && typeof part === 'object' && 'text' in part && typeof part.text === 'string') {\n return part.text\n }\n }\n }\n return null\n }\n\n private formatInputForPostHog(params: GenerateContentParameters): FormattedMessage[] {\n const sanitized = sanitizeGemini(params.contents)\n const messages = this.formatInput(sanitized)\n\n const systemInstruction = this.extractSystemInstruction(params)\n\n if (systemInstruction) {\n const hasSystemMessage = messages.some((msg: FormattedMessage) => msg.role === 'system')\n\n if (!hasSystemMessage) {\n return [{ role: 'system', content: systemInstruction }, ...messages]\n }\n }\n\n return messages\n }\n}\n\n/**\n * Detect if Google Search grounding was used in the response.\n * Gemini bills per request that uses grounding, not per individual query.\n * Returns 1 if grounding was used, 0 otherwise.\n */\nfunction calculateGoogleWebSearchCount(response: unknown): number {\n if (!response || typeof response !== 'object' || !('candidates' in response)) {\n return 0\n }\n\n const candidates = response.candidates\n\n if (!Array.isArray(candidates)) {\n return 0\n }\n\n const hasGrounding = candidates.some((candidate: unknown) => {\n if (!candidate || typeof candidate !== 'object') {\n return false\n }\n\n // Check for grounding metadata\n if ('groundingMetadata' in candidate && candidate.groundingMetadata) {\n const metadata = candidate.groundingMetadata as any\n\n if (typeof metadata === 'object') {\n // Check if web_search_queries exists and is non-empty\n if (\n 'webSearchQueries' in metadata &&\n Array.isArray(metadata.webSearchQueries) &&\n metadata.webSearchQueries.length > 0\n ) {\n return true\n }\n\n // Check if grounding_chunks exists and is non-empty\n if (\n 'groundingChunks' in metadata &&\n Array.isArray(metadata.groundingChunks) &&\n metadata.groundingChunks.length > 0\n ) {\n return true\n }\n }\n }\n\n // Check for google search in function calls\n if ('content' in candidate && candidate.content && typeof candidate.content === 'object') {\n const content = candidate.content\n\n if ('parts' in content && Array.isArray(content.parts)) {\n return content.parts.some((part: unknown) => {\n if (!part || typeof part !== 'object' || !('functionCall' in part)) {\n return false\n }\n\n const functionCall = part.functionCall\n\n if (\n functionCall &&\n typeof functionCall === 'object' &&\n 'name' in functionCall &&\n typeof functionCall.name === 'string'\n ) {\n return functionCall.name.includes('google_search') || functionCall.name.includes('grounding')\n }\n\n return false\n })\n }\n }\n\n return false\n })\n\n return hasGrounding ? 1 : 0\n}\n\nexport default PostHogGoogleGenAI\nexport { PostHogGoogleGenAI as Gemini }\n"],"names":["isString","value","isObject","Array","isArray","REDACTED_IMAGE_PLACEHOLDER","isMultimodalEnabled","val","process","env","_INTERNAL_LLMA_MULTIMODAL","toLowerCase","isBase64DataUrl","str","test","isValidUrl","URL","startsWith","isRawBase64","length","redactBase64DataUrl","sanitizeGeminiPart","part","inlineData","data","processGeminiItem","item","parts","map","sanitizeGemini","toContentString","content","undefined","JSON","stringify","String","getModelParams","params","modelParams","paramKeys","key","formatResponseGemini","response","output","candidates","candidate","text","push","type","functionCall","function","name","arguments","args","mimeType","Uint8Array","Buffer","from","toString","binary","i","fromCharCode","btoa","mime_type","role","withPrivacyMode","client","privacyMode","input","privacy_mode","extractAvailableToolCalls","provider","config","tools","AIEvent","sanitizeValues","obj","jsonSafe","parse","TextDecoder","decode","TextEncoder","encode","Object","fromEntries","entries","k","v","POSTHOG_PARAMS_MAP","posthogDistinctId","posthogTraceId","posthogProperties","posthogPrivacyMode","posthogGroups","posthogModelOverride","posthogProviderOverride","posthogCostOverride","posthogCaptureImmediate","extractPosthogParams","body","providerParams","posthogParams","console","warn","addDefaults","traceId","uuidv4","sendEventWithErrorToPosthog","error","httpStatus","status","properties","enrichedError","options","enableExceptionAutocapture","exceptionId","uuidv7","captureException","$ai_trace_id","__posthog_previously_captured_error","sendEventToPosthog","eventType","Generation","distinctId","model","latency","timeToFirstToken","baseURL","usage","captureImmediate","capture","Promise","resolve","safeInput","safeOutput","safeError","errorData","$ai_is_erro