UNPKG

@posthog/ai

Version:
1,262 lines (1,181 loc) 42.7 kB
'use strict'; var uuid = require('uuid'); require('@posthog/core'); function _interopNamespace(e) { if (e && e.__esModule) return e; var n = Object.create(null); if (e) { Object.keys(e).forEach(function (k) { if (k !== 'default') { var d = Object.getOwnPropertyDescriptor(e, k); Object.defineProperty(n, k, d.get ? d : { enumerable: true, get: function () { return e[k]; } }); } }); } n.default = e; return Object.freeze(n); } var uuid__namespace = /*#__PURE__*/_interopNamespace(uuid); var version = "7.9.2"; // Type guards for safer type checking const isString = value => { return typeof value === 'string'; }; const isObject = value => { return value !== null && typeof value === 'object' && !Array.isArray(value); }; const REDACTED_IMAGE_PLACEHOLDER = '[base64 image redacted]'; // ============================================ // Multimodal Feature Toggle // ============================================ const isMultimodalEnabled = () => { const val = process.env._INTERNAL_LLMA_MULTIMODAL || ''; return val.toLowerCase() === 'true' || val === '1' || val.toLowerCase() === 'yes'; }; // ============================================ // Base64 Detection Helpers // ============================================ const isBase64DataUrl = str => { return /^data:([^;]+);base64,/.test(str); }; const isValidUrl = str => { try { new URL(str); return true; } catch { // Not an absolute URL, check if it's a relative URL or path return str.startsWith('/') || str.startsWith('./') || str.startsWith('../'); } }; const isRawBase64 = str => { // Skip if it's a valid URL or path if (isValidUrl(str)) { return false; } // Check if it's a valid base64 string // Base64 images are typically at least a few hundred chars, but we'll be conservative return str.length > 20 && /^[A-Za-z0-9+/]+=*$/.test(str); }; function redactBase64DataUrl(str) { if (isMultimodalEnabled()) return str; if (!isString(str)) return str; // Check for data URL format if (isBase64DataUrl(str)) { return REDACTED_IMAGE_PLACEHOLDER; } // Check for raw base64 (Vercel sends raw base64 for inline images) if (isRawBase64(str)) { return REDACTED_IMAGE_PLACEHOLDER; } return str; } // ============================================ // Common Message Processing // ============================================ const processMessages = (messages, transformContent) => { if (!messages) return messages; const processContent = content => { if (typeof content === 'string') return content; if (!content) return content; if (Array.isArray(content)) { return content.map(transformContent); } // Handle single object content return transformContent(content); }; const processMessage = msg => { if (!isObject(msg) || !('content' in msg)) return msg; return { ...msg, content: processContent(msg.content) }; }; // Handle both arrays and single messages if (Array.isArray(messages)) { return messages.map(processMessage); } return processMessage(messages); }; const sanitizeLangChainImage = item => { if (!isObject(item)) return item; // OpenAI style if (item.type === 'image_url' && 'image_url' in item && isObject(item.image_url) && 'url' in item.image_url) { return { ...item, image_url: { ...item.image_url, url: redactBase64DataUrl(item.image_url.url) } }; } // Direct image with data field if (item.type === 'image' && 'data' in item) { return { ...item, data: redactBase64DataUrl(item.data) }; } // Anthropic style if (item.type === 'image' && 'source' in item && isObject(item.source) && 'data' in item.source) { if (isMultimodalEnabled()) return item; return { ...item, source: { ...item.source, data: redactBase64DataUrl(item.source.data) } }; } // Google style if (item.type === 'media' && 'data' in item) { return { ...item, data: redactBase64DataUrl(item.data) }; } return item; }; const sanitizeLangChain = data => { return processMessages(data, sanitizeLangChainImage); }; /** * Safely converts content to a string, preserving structure for objects/arrays. * - If content is already a string, returns it as-is * - If content is an object or array, stringifies it with JSON.stringify to preserve structure * - Otherwise, converts to string with String() * * This prevents the "[object Object]" bug when objects are naively converted to strings. * * @param content - The content to convert to a string * @returns A string representation that preserves structure for complex types */ function toContentString(content) { if (typeof content === 'string') { return content; } if (content !== undefined && content !== null && typeof content === 'object') { try { return JSON.stringify(content); } catch { // Fallback for circular refs, BigInt, or objects with throwing toJSON return String(content); } } return String(content); } const getModelParams = params => { if (!params) { return {}; } const modelParams = {}; const paramKeys = ['temperature', 'max_tokens', 'max_completion_tokens', 'top_p', 'frequency_penalty', 'presence_penalty', 'n', 'stop', 'stream', 'streaming', 'language', 'response_format', 'timestamp_granularities']; for (const key of paramKeys) { if (key in params && params[key] !== undefined) { modelParams[key] = params[key]; } } return modelParams; }; const withPrivacyMode = (client, privacyMode, input) => { return client.privacy_mode || privacyMode ? null : input; }; function getDefaultExportFromCjs (x) { return x && x.__esModule && Object.prototype.hasOwnProperty.call(x, 'default') ? x['default'] : x; } var decamelize; var hasRequiredDecamelize; function requireDecamelize () { if (hasRequiredDecamelize) return decamelize; hasRequiredDecamelize = 1; decamelize = function (str, sep) { if (typeof str !== 'string') { throw new TypeError('Expected a string'); } sep = typeof sep === 'undefined' ? '_' : sep; return str .replace(/([a-z\d])([A-Z])/g, '$1' + sep + '$2') .replace(/([A-Z]+)([A-Z][a-z\d]+)/g, '$1' + sep + '$2') .toLowerCase(); }; return decamelize; } var decamelizeExports = requireDecamelize(); var snakeCase = /*@__PURE__*/getDefaultExportFromCjs(decamelizeExports); var camelcase = {exports: {}}; var hasRequiredCamelcase; function requireCamelcase () { if (hasRequiredCamelcase) return camelcase.exports; hasRequiredCamelcase = 1; const UPPERCASE = /[\p{Lu}]/u; const LOWERCASE = /[\p{Ll}]/u; const LEADING_CAPITAL = /^[\p{Lu}](?![\p{Lu}])/gu; const IDENTIFIER = /([\p{Alpha}\p{N}_]|$)/u; const SEPARATORS = /[_.\- ]+/; const LEADING_SEPARATORS = new RegExp('^' + SEPARATORS.source); const SEPARATORS_AND_IDENTIFIER = new RegExp(SEPARATORS.source + IDENTIFIER.source, 'gu'); const NUMBERS_AND_IDENTIFIER = new RegExp('\\d+' + IDENTIFIER.source, 'gu'); const preserveCamelCase = (string, toLowerCase, toUpperCase) => { let isLastCharLower = false; let isLastCharUpper = false; let isLastLastCharUpper = false; for (let i = 0; i < string.length; i++) { const character = string[i]; if (isLastCharLower && UPPERCASE.test(character)) { string = string.slice(0, i) + '-' + string.slice(i); isLastCharLower = false; isLastLastCharUpper = isLastCharUpper; isLastCharUpper = true; i++; } else if (isLastCharUpper && isLastLastCharUpper && LOWERCASE.test(character)) { string = string.slice(0, i - 1) + '-' + string.slice(i - 1); isLastLastCharUpper = isLastCharUpper; isLastCharUpper = false; isLastCharLower = true; } else { isLastCharLower = toLowerCase(character) === character && toUpperCase(character) !== character; isLastLastCharUpper = isLastCharUpper; isLastCharUpper = toUpperCase(character) === character && toLowerCase(character) !== character; } } return string; }; const preserveConsecutiveUppercase = (input, toLowerCase) => { LEADING_CAPITAL.lastIndex = 0; return input.replace(LEADING_CAPITAL, m1 => toLowerCase(m1)); }; const postProcess = (input, toUpperCase) => { SEPARATORS_AND_IDENTIFIER.lastIndex = 0; NUMBERS_AND_IDENTIFIER.lastIndex = 0; return input.replace(SEPARATORS_AND_IDENTIFIER, (_, identifier) => toUpperCase(identifier)) .replace(NUMBERS_AND_IDENTIFIER, m => toUpperCase(m)); }; const camelCase = (input, options) => { if (!(typeof input === 'string' || Array.isArray(input))) { throw new TypeError('Expected the input to be `string | string[]`'); } options = { pascalCase: false, preserveConsecutiveUppercase: false, ...options }; if (Array.isArray(input)) { input = input.map(x => x.trim()) .filter(x => x.length) .join('-'); } else { input = input.trim(); } if (input.length === 0) { return ''; } const toLowerCase = options.locale === false ? string => string.toLowerCase() : string => string.toLocaleLowerCase(options.locale); const toUpperCase = options.locale === false ? string => string.toUpperCase() : string => string.toLocaleUpperCase(options.locale); if (input.length === 1) { return options.pascalCase ? toUpperCase(input) : toLowerCase(input); } const hasUpperCase = input !== toLowerCase(input); if (hasUpperCase) { input = preserveCamelCase(input, toLowerCase, toUpperCase); } input = input.replace(LEADING_SEPARATORS, ''); if (options.preserveConsecutiveUppercase) { input = preserveConsecutiveUppercase(input, toLowerCase); } else { input = toLowerCase(input); } if (options.pascalCase) { input = toUpperCase(input.charAt(0)) + input.slice(1); } return postProcess(input, toUpperCase); }; camelcase.exports = camelCase; // TODO: Remove this for the next major release camelcase.exports.default = camelCase; return camelcase.exports; } requireCamelcase(); //#region src/load/map_keys.ts function keyToJson(key, map) { return map?.[key] || snakeCase(key); } function mapKeys(fields, mapper, map) { const mapped = {}; for (const key in fields) if (Object.hasOwn(fields, key)) mapped[mapper(key, map)] = fields[key]; return mapped; } //#region src/load/validation.ts /** * Sentinel key used to mark escaped user objects during serialization. * * When a plain object contains 'lc' key (which could be confused with LC objects), * we wrap it as `{"__lc_escaped__": {...original...}}`. */ const LC_ESCAPED_KEY = "__lc_escaped__"; /** * Check if an object needs escaping to prevent confusion with LC objects. * * An object needs escaping if: * 1. It has an `'lc'` key (could be confused with LC serialization format) * 2. It has only the escape key (would be mistaken for an escaped object) */ function needsEscaping(obj) { return "lc" in obj || Object.keys(obj).length === 1 && LC_ESCAPED_KEY in obj; } /** * Wrap an object in the escape marker. * * @example * ```typescript * {"key": "value"} // becomes {"__lc_escaped__": {"key": "value"}} * ``` */ function escapeObject(obj) { return { [LC_ESCAPED_KEY]: obj }; } /** * Check if an object looks like a Serializable instance (duck typing). */ function isSerializableLike(obj) { return obj !== null && typeof obj === "object" && "lc_serializable" in obj && typeof obj.toJSON === "function"; } /** * Create a "not_implemented" serialization result for objects that cannot be serialized. */ function createNotImplemented(obj) { let id; if (obj !== null && typeof obj === "object") if ("lc_id" in obj && Array.isArray(obj.lc_id)) id = obj.lc_id; else id = [obj.constructor?.name ?? "Object"]; else id = [typeof obj]; return { lc: 1, type: "not_implemented", id }; } /** * Escape a value if it needs escaping (contains `lc` key). * * This is a simpler version of `serializeValue` that doesn't handle Serializable * objects - it's meant to be called on kwargs values that have already been * processed by `toJSON()`. * * @param value - The value to potentially escape. * @param pathSet - WeakSet to track ancestor objects in the current path to detect circular references. * Objects are removed after processing to allow shared references (same object in * multiple places) while still detecting true circular references (ancestor in descendant). * @returns The value with any `lc`-containing objects wrapped in escape markers. */ function escapeIfNeeded(value, pathSet = /* @__PURE__ */ new WeakSet()) { if (value !== null && typeof value === "object" && !Array.isArray(value)) { if (pathSet.has(value)) return createNotImplemented(value); if (isSerializableLike(value)) return value; pathSet.add(value); const record = value; if (needsEscaping(record)) { pathSet.delete(value); return escapeObject(record); } const result = {}; for (const [key, val] of Object.entries(record)) result[key] = escapeIfNeeded(val, pathSet); pathSet.delete(value); return result; } if (Array.isArray(value)) return value.map((item) => escapeIfNeeded(item, pathSet)); return value; } function shallowCopy(obj) { return Array.isArray(obj) ? [...obj] : { ...obj }; } function replaceSecrets(root, secretsMap) { const result = shallowCopy(root); for (const [path, secretId] of Object.entries(secretsMap)) { const [last, ...partsReverse] = path.split(".").reverse(); let current = result; for (const part of partsReverse.reverse()) { if (current[part] === void 0) break; current[part] = shallowCopy(current[part]); current = current[part]; } if (current[last] !== void 0) current[last] = { lc: 1, type: "secret", id: [secretId] }; } return result; } /** * Get a unique name for the module, rather than parent class implementations. * Should not be subclassed, subclass lc_name above instead. */ function get_lc_unique_name(serializableClass) { const parentClass = Object.getPrototypeOf(serializableClass); if (typeof serializableClass.lc_name === "function" && (typeof parentClass.lc_name !== "function" || serializableClass.lc_name() !== parentClass.lc_name())) return serializableClass.lc_name(); else return serializableClass.name; } var Serializable = class Serializable { lc_serializable = false; lc_kwargs; /** * The name of the serializable. Override to provide an alias or * to preserve the serialized module name in minified environments. * * Implemented as a static method to support loading logic. */ static lc_name() { return this.name; } /** * The final serialized identifier for the module. */ get lc_id() { return [...this.lc_namespace, get_lc_unique_name(this.constructor)]; } /** * A map of secrets, which will be omitted from serialization. * Keys are paths to the secret in constructor args, e.g. "foo.bar.baz". * Values are the secret ids, which will be used when deserializing. */ get lc_secrets() {} /** * A map of additional attributes to merge with constructor args. * Keys are the attribute names, e.g. "foo". * Values are the attribute values, which will be serialized. * These attributes need to be accepted by the constructor as arguments. */ get lc_attributes() {} /** * A map of aliases for constructor args. * Keys are the attribute names, e.g. "foo". * Values are the alias that will replace the key in serialization. * This is used to eg. make argument names match Python. */ get lc_aliases() {} /** * A manual list of keys that should be serialized. * If not overridden, all fields passed into the constructor will be serialized. */ get lc_serializable_keys() {} constructor(kwargs, ..._args) { if (this.lc_serializable_keys !== void 0) this.lc_kwargs = Object.fromEntries(Object.entries(kwargs || {}).filter(([key]) => this.lc_serializable_keys?.includes(key))); else this.lc_kwargs = kwargs ?? {}; } toJSON() { if (!this.lc_serializable) return this.toJSONNotImplemented(); if (this.lc_kwargs instanceof Serializable || typeof this.lc_kwargs !== "object" || Array.isArray(this.lc_kwargs)) return this.toJSONNotImplemented(); const aliases = {}; const secrets = {}; const kwargs = Object.keys(this.lc_kwargs).reduce((acc, key) => { acc[key] = key in this ? this[key] : this.lc_kwargs[key]; return acc; }, {}); for (let current = Object.getPrototypeOf(this); current; current = Object.getPrototypeOf(current)) { Object.assign(aliases, Reflect.get(current, "lc_aliases", this)); Object.assign(secrets, Reflect.get(current, "lc_secrets", this)); Object.assign(kwargs, Reflect.get(current, "lc_attributes", this)); } Object.keys(secrets).forEach((keyPath) => { let read = this; let write = kwargs; const [last, ...partsReverse] = keyPath.split(".").reverse(); for (const key of partsReverse.reverse()) { if (!(key in read) || read[key] === void 0) return; if (!(key in write) || write[key] === void 0) { if (typeof read[key] === "object" && read[key] != null) write[key] = {}; else if (Array.isArray(read[key])) write[key] = []; } read = read[key]; write = write[key]; } if (last in read && read[last] !== void 0) write[last] = write[last] || read[last]; }); const escapedKwargs = {}; const pathSet = /* @__PURE__ */ new WeakSet(); pathSet.add(this); for (const [key, value] of Object.entries(kwargs)) escapedKwargs[key] = escapeIfNeeded(value, pathSet); const processedKwargs = mapKeys(Object.keys(secrets).length ? replaceSecrets(escapedKwargs, secrets) : escapedKwargs, keyToJson, aliases); return { lc: 1, type: "constructor", id: this.lc_id, kwargs: processedKwargs }; } toJSONNotImplemented() { return { lc: 1, type: "not_implemented", id: this.lc_id }; } }; const isDeno = () => typeof Deno !== "undefined"; function getEnvironmentVariable(name) { try { if (typeof process !== "undefined") return process.env?.[name]; else if (isDeno()) return Deno?.env.get(name); else return; } catch { return; } } /** * Abstract class that provides a set of optional methods that can be * overridden in derived classes to handle various events during the * execution of a LangChain application. */ var BaseCallbackHandlerMethodsClass = class {}; /** * Abstract base class for creating callback handlers in the LangChain * framework. It provides a set of optional methods that can be overridden * in derived classes to handle various events during the execution of a * LangChain application. */ var BaseCallbackHandler = class extends BaseCallbackHandlerMethodsClass { lc_serializable = false; get lc_namespace() { return [ "langchain_core", "callbacks", this.name ]; } get lc_secrets() {} get lc_attributes() {} get lc_aliases() {} get lc_serializable_keys() {} /** * The name of the serializable. Override to provide an alias or * to preserve the serialized module name in minified environments. * * Implemented as a static method to support loading logic. */ static lc_name() { return this.name; } /** * The final serialized identifier for the module. */ get lc_id() { return [...this.lc_namespace, get_lc_unique_name(this.constructor)]; } lc_kwargs; ignoreLLM = false; ignoreChain = false; ignoreAgent = false; ignoreRetriever = false; ignoreCustomEvent = false; raiseError = false; awaitHandlers = getEnvironmentVariable("LANGCHAIN_CALLBACKS_BACKGROUND") === "false"; constructor(input) { super(); this.lc_kwargs = input || {}; if (input) { this.ignoreLLM = input.ignoreLLM ?? this.ignoreLLM; this.ignoreChain = input.ignoreChain ?? this.ignoreChain; this.ignoreAgent = input.ignoreAgent ?? this.ignoreAgent; this.ignoreRetriever = input.ignoreRetriever ?? this.ignoreRetriever; this.ignoreCustomEvent = input.ignoreCustomEvent ?? this.ignoreCustomEvent; this.raiseError = input.raiseError ?? this.raiseError; this.awaitHandlers = this.raiseError || (input._awaitHandler ?? this.awaitHandlers); } } copy() { return new this.constructor(this); } toJSON() { return Serializable.prototype.toJSON.call(this); } toJSONNotImplemented() { return Serializable.prototype.toJSONNotImplemented.call(this); } static fromMethods(methods) { class Handler extends BaseCallbackHandler { name = uuid__namespace.v7(); constructor() { super(); Object.assign(this, methods); } } return new Handler(); } }; /** A run may either be a Span or a Generation */ /** Storage for run metadata */ class LangChainCallbackHandler extends BaseCallbackHandler { name = 'PosthogCallbackHandler'; runs = {}; parentTree = {}; constructor(options) { if (!options.client) { throw new Error('PostHog client is required'); } super(); this.client = options.client; this.distinctId = options.distinctId; this.traceId = options.traceId; this.properties = options.properties || {}; this.privacyMode = options.privacyMode || false; this.groups = options.groups || {}; this.debug = options.debug || false; } // ===== CALLBACK METHODS ===== handleChainStart(chain, inputs, runId, parentRunId, tags, metadata, _runType, runName) { this._logDebugEvent('on_chain_start', runId, parentRunId, { inputs, tags }); this._setParentOfRun(runId, parentRunId); this._setTraceOrSpanMetadata(chain, inputs, runId, parentRunId, metadata, tags, runName); } handleChainEnd(outputs, runId, parentRunId, tags, _kwargs) { this._logDebugEvent('on_chain_end', runId, parentRunId, { outputs, tags }); this._popRunAndCaptureTraceOrSpan(runId, parentRunId, outputs); } handleChainError(error, runId, parentRunId, tags, _kwargs) { this._logDebugEvent('on_chain_error', runId, parentRunId, { error, tags }); this._popRunAndCaptureTraceOrSpan(runId, parentRunId, error); } handleChatModelStart(serialized, messages, runId, parentRunId, extraParams, tags, metadata, runName) { this._logDebugEvent('on_chat_model_start', runId, parentRunId, { messages, tags }); this._setParentOfRun(runId, parentRunId); // Flatten the two-dimensional messages and convert each message to a plain object const input = messages.flat().map(m => this._convertMessageToDict(m)); this._setLLMMetadata(serialized, runId, input, metadata, extraParams, runName); } handleLLMStart(serialized, prompts, runId, parentRunId, extraParams, tags, metadata, runName) { this._logDebugEvent('on_llm_start', runId, parentRunId, { prompts, tags }); this._setParentOfRun(runId, parentRunId); this._setLLMMetadata(serialized, runId, prompts, metadata, extraParams, runName); } handleLLMEnd(output, runId, parentRunId, tags, _extraParams) { this._logDebugEvent('on_llm_end', runId, parentRunId, { output, tags }); this._popRunAndCaptureGeneration(runId, parentRunId, output); } handleLLMError(err, runId, parentRunId, tags, _extraParams) { this._logDebugEvent('on_llm_error', runId, parentRunId, { err, tags }); this._popRunAndCaptureGeneration(runId, parentRunId, err); } handleToolStart(tool, input, runId, parentRunId, tags, metadata, runName) { this._logDebugEvent('on_tool_start', runId, parentRunId, { input, tags }); this._setParentOfRun(runId, parentRunId); this._setTraceOrSpanMetadata(tool, input, runId, parentRunId, metadata, tags, runName); } handleToolEnd(output, runId, parentRunId, tags) { this._logDebugEvent('on_tool_end', runId, parentRunId, { output, tags }); this._popRunAndCaptureTraceOrSpan(runId, parentRunId, output); } handleToolError(err, runId, parentRunId, tags) { this._logDebugEvent('on_tool_error', runId, parentRunId, { err, tags }); this._popRunAndCaptureTraceOrSpan(runId, parentRunId, err); } handleRetrieverStart(retriever, query, runId, parentRunId, tags, metadata, name) { this._logDebugEvent('on_retriever_start', runId, parentRunId, { query, tags }); this._setParentOfRun(runId, parentRunId); this._setTraceOrSpanMetadata(retriever, query, runId, parentRunId, metadata, tags, name); } handleRetrieverEnd(documents, runId, parentRunId, tags) { this._logDebugEvent('on_retriever_end', runId, parentRunId, { documents, tags }); this._popRunAndCaptureTraceOrSpan(runId, parentRunId, documents); } handleRetrieverError(err, runId, parentRunId, tags) { this._logDebugEvent('on_retriever_error', runId, parentRunId, { err, tags }); this._popRunAndCaptureTraceOrSpan(runId, parentRunId, err); } handleAgentAction(action, runId, parentRunId, tags) { this._logDebugEvent('on_agent_action', runId, parentRunId, { action, tags }); this._setParentOfRun(runId, parentRunId); this._setTraceOrSpanMetadata(null, action, runId, parentRunId); } handleAgentEnd(action, runId, parentRunId, tags) { this._logDebugEvent('on_agent_finish', runId, parentRunId, { action, tags }); this._popRunAndCaptureTraceOrSpan(runId, parentRunId, action); } // ===== PRIVATE HELPERS ===== _setParentOfRun(runId, parentRunId) { if (parentRunId) { this.parentTree[runId] = parentRunId; } } _popParentOfRun(runId) { delete this.parentTree[runId]; } _findRootRun(runId) { let id = runId; while (this.parentTree[id]) { id = this.parentTree[id]; } return id; } _setTraceOrSpanMetadata(serialized, input, runId, parentRunId, ...args) { // Use default names if not provided: if this is a top-level run, we mark it as a trace, otherwise as a span. const defaultName = parentRunId ? 'span' : 'trace'; const runName = this._getLangchainRunName(serialized, ...args) || defaultName; this.runs[runId] = { name: runName, input, startTime: Date.now() }; } _setLLMMetadata(serialized, runId, messages, metadata, extraParams, runName) { const runNameFound = this._getLangchainRunName(serialized, { extraParams, runName }) || 'generation'; const generation = { name: runNameFound, input: sanitizeLangChain(messages), startTime: Date.now() }; if (extraParams) { generation.modelParams = getModelParams(extraParams.invocation_params); if (extraParams.invocation_params && extraParams.invocation_params.tools) { generation.tools = extraParams.invocation_params.tools; } } if (metadata) { if (metadata.ls_model_name) { generation.model = metadata.ls_model_name; } if (metadata.ls_provider) { generation.provider = metadata.ls_provider; } } if (serialized && 'kwargs' in serialized && serialized.kwargs.openai_api_base) { generation.baseUrl = serialized.kwargs.openai_api_base; } this.runs[runId] = generation; } _popRunMetadata(runId) { const endTime = Date.now(); const run = this.runs[runId]; if (!run) { console.warn(`No run metadata found for run ${runId}`); return undefined; } run.endTime = endTime; delete this.runs[runId]; return run; } _getTraceId(runId) { return this.traceId ? String(this.traceId) : this._findRootRun(runId); } _getParentRunId(traceId, _runId, parentRunId) { // Replace the parent-run if not found in our stored parent tree. if (parentRunId && !this.parentTree[parentRunId]) { return traceId; } return parentRunId; } _popRunAndCaptureTraceOrSpan(runId, parentRunId, outputs) { const traceId = this._getTraceId(runId); this._popParentOfRun(runId); const run = this._popRunMetadata(runId); if (!run) { return; } if ('modelParams' in run) { console.warn(`Run ${runId} is a generation, but attempted to be captured as a trace/span.`); return; } const actualParentRunId = this._getParentRunId(traceId, runId, parentRunId); this._captureTraceOrSpan(traceId, runId, run, outputs, actualParentRunId); } _captureTraceOrSpan(traceId, runId, run, outputs, parentRunId) { const eventName = parentRunId ? '$ai_span' : '$ai_trace'; const latency = run.endTime ? (run.endTime - run.startTime) / 1000 : 0; const eventProperties = { $ai_lib: 'posthog-ai', $ai_lib_version: version, $ai_trace_id: traceId, $ai_input_state: withPrivacyMode(this.client, this.privacyMode, run.input), $ai_latency: latency, $ai_span_name: run.name, $ai_span_id: runId, $ai_framework: 'langchain' }; if (parentRunId) { eventProperties['$ai_parent_id'] = parentRunId; } Object.assign(eventProperties, this.properties); if (!this.distinctId) { eventProperties['$process_person_profile'] = false; } if (outputs instanceof Error) { eventProperties['$ai_error'] = outputs.toString(); eventProperties['$ai_is_error'] = true; } else if (outputs !== undefined) { eventProperties['$ai_output_state'] = withPrivacyMode(this.client, this.privacyMode, outputs); } this.client.capture({ distinctId: this.distinctId ? this.distinctId.toString() : runId, event: eventName, properties: eventProperties, groups: this.groups }); } _popRunAndCaptureGeneration(runId, parentRunId, response) { const traceId = this._getTraceId(runId); this._popParentOfRun(runId); const run = this._popRunMetadata(runId); if (!run || typeof run !== 'object' || !('modelParams' in run)) { console.warn(`Run ${runId} is not a generation, but attempted to be captured as such.`); return; } const actualParentRunId = this._getParentRunId(traceId, runId, parentRunId); this._captureGeneration(traceId, runId, run, response, actualParentRunId); } _captureGeneration(traceId, runId, run, output, parentRunId) { const latency = run.endTime ? (run.endTime - run.startTime) / 1000 : 0; const eventProperties = { $ai_lib: 'posthog-ai', $ai_lib_version: version, $ai_trace_id: traceId, $ai_span_id: runId, $ai_span_name: run.name, $ai_parent_id: parentRunId, $ai_provider: run.provider, $ai_model: run.model, $ai_model_parameters: run.modelParams, $ai_input: withPrivacyMode(this.client, this.privacyMode, run.input), $ai_http_status: 200, $ai_latency: latency, $ai_base_url: run.baseUrl, $ai_framework: 'langchain' }; if (run.tools) { eventProperties['$ai_tools'] = run.tools; } if (output instanceof Error) { eventProperties['$ai_http_status'] = output.status || 500; eventProperties['$ai_error'] = output.toString(); eventProperties['$ai_is_error'] = true; } else { // Handle token usage const [inputTokens, outputTokens, additionalTokenData] = this.parseUsage(output, run.provider, run.model); eventProperties['$ai_input_tokens'] = inputTokens; eventProperties['$ai_output_tokens'] = outputTokens; // Add additional token data to properties if (additionalTokenData.cacheReadInputTokens) { eventProperties['$ai_cache_read_input_tokens'] = additionalTokenData.cacheReadInputTokens; } if (additionalTokenData.cacheWriteInputTokens) { eventProperties['$ai_cache_creation_input_tokens'] = additionalTokenData.cacheWriteInputTokens; } if (additionalTokenData.reasoningTokens) { eventProperties['$ai_reasoning_tokens'] = additionalTokenData.reasoningTokens; } if (additionalTokenData.webSearchCount !== undefined) { eventProperties['$ai_web_search_count'] = additionalTokenData.webSearchCount; } // Handle generations/completions let completions; if (output.generations && Array.isArray(output.generations)) { const lastGeneration = output.generations[output.generations.length - 1]; if (Array.isArray(lastGeneration) && lastGeneration.length > 0) { // Check if this is a ChatGeneration by looking at the first item const isChatGeneration = 'message' in lastGeneration[0] && lastGeneration[0].message; if (isChatGeneration) { // For ChatGeneration, convert messages to dict format completions = lastGeneration.map(gen => { return this._convertMessageToDict(gen.message); }); } else { // For non-ChatGeneration, extract raw response completions = lastGeneration.map(gen => { return this._extractRawResponse(gen); }); } } } if (completions) { eventProperties['$ai_output_choices'] = withPrivacyMode(this.client, this.privacyMode, completions); } } Object.assign(eventProperties, this.properties); if (!this.distinctId) { eventProperties['$process_person_profile'] = false; } this.client.capture({ distinctId: this.distinctId ? this.distinctId.toString() : traceId, event: '$ai_generation', properties: eventProperties, groups: this.groups }); } _logDebugEvent(eventName, runId, parentRunId, extra) { if (this.debug) { console.log(`Event: ${eventName}, runId: ${runId}, parentRunId: ${parentRunId}, extra:`, extra); } } _getLangchainRunName(serialized, ...args) { if (args && args.length > 0) { for (const arg of args) { if (arg && typeof arg === 'object' && 'name' in arg) { return arg.name; } else if (arg && typeof arg === 'object' && 'runName' in arg) { return arg.runName; } } } if (serialized && serialized.name) { return serialized.name; } if (serialized && serialized.id) { return Array.isArray(serialized.id) ? serialized.id[serialized.id.length - 1] : serialized.id; } return undefined; } _convertLcToolCallsToOai(toolCalls) { return toolCalls.map(toolCall => ({ type: 'function', id: toolCall.id, function: { name: toolCall.name, arguments: JSON.stringify(toolCall.args) } })); } _extractRawResponse(generation) { // Extract the response from the last response of the LLM call // We return the text of the response if not empty if (generation.text != null && generation.text.trim() !== '') { return generation.text.trim(); } else if (generation.message) { // Additional kwargs contains the response in case of tool usage return generation.message.additional_kwargs || generation.message.additionalKwargs || {}; } else { // Not tool usage, some LLM responses can be simply empty return ''; } } _convertMessageToDict(message) { let messageDict = {}; const messageType = message.getType(); switch (messageType) { case 'human': messageDict = { role: 'user', content: message.content }; break; case 'ai': messageDict = { role: 'assistant', content: message.content }; if (message.tool_calls) { messageDict.tool_calls = this._convertLcToolCallsToOai(message.tool_calls); } break; case 'system': messageDict = { role: 'system', content: message.content }; break; case 'tool': messageDict = { role: 'tool', content: message.content }; break; case 'function': messageDict = { role: 'function', content: message.content }; break; default: messageDict = { role: messageType, content: toContentString(message.content) }; break; } if (message.additional_kwargs) { messageDict = { ...messageDict, ...message.additional_kwargs }; } // Sanitize the message content to redact base64 images return sanitizeLangChain(messageDict); } _parseUsageModel(usage, provider, model) { const conversionList = [['promptTokens', 'input'], ['completionTokens', 'output'], ['input_tokens', 'input'], ['output_tokens', 'output'], ['prompt_token_count', 'input'], ['candidates_token_count', 'output'], ['inputTokenCount', 'input'], ['outputTokenCount', 'output'], ['input_token_count', 'input'], ['generated_token_count', 'output']]; const parsedUsage = conversionList.reduce((acc, [modelKey, typeKey]) => { const value = usage[modelKey]; if (value != null) { const finalCount = Array.isArray(value) ? value.reduce((sum, tokenCount) => sum + tokenCount, 0) : value; acc[typeKey] = finalCount; } return acc; }, { input: 0, output: 0 }); // Extract additional token details like cached tokens and reasoning tokens const additionalTokenData = {}; // Check for cached tokens in various formats if (usage.prompt_tokens_details?.cached_tokens != null) { additionalTokenData.cacheReadInputTokens = usage.prompt_tokens_details.cached_tokens; } else if (usage.input_token_details?.cache_read != null) { additionalTokenData.cacheReadInputTokens = usage.input_token_details.cache_read; } else if (usage.cachedPromptTokens != null) { additionalTokenData.cacheReadInputTokens = usage.cachedPromptTokens; } else if (usage.cache_read_input_tokens != null) { additionalTokenData.cacheReadInputTokens = usage.cache_read_input_tokens; } // Check for cache write/creation tokens in various formats if (usage.cache_creation_input_tokens != null) { additionalTokenData.cacheWriteInputTokens = usage.cache_creation_input_tokens; } else if (usage.input_token_details?.cache_creation != null) { additionalTokenData.cacheWriteInputTokens = usage.input_token_details.cache_creation; } // Check for reasoning tokens in various formats if (usage.completion_tokens_details?.reasoning_tokens != null) { additionalTokenData.reasoningTokens = usage.completion_tokens_details.reasoning_tokens; } else if (usage.output_token_details?.reasoning != null) { additionalTokenData.reasoningTokens = usage.output_token_details.reasoning; } else if (usage.reasoningTokens != null) { additionalTokenData.reasoningTokens = usage.reasoningTokens; } // Extract web search counts from various provider formats let webSearchCount; // Priority 1: Exact Count // Check Anthropic format (server_tool_use.web_search_requests) if (usage.server_tool_use?.web_search_requests !== undefined) { webSearchCount = usage.server_tool_use.web_search_requests; } // Priority 2: Binary Detection (1 or 0) // Check for citations array (Perplexity) else if (usage.citations && Array.isArray(usage.citations) && usage.citations.length > 0) { webSearchCount = 1; } // Check for search_results array (Perplexity via OpenRouter) else if (usage.search_results && Array.isArray(usage.search_results) && usage.search_results.length > 0) { webSearchCount = 1; } // Check for search_context_size (Perplexity via OpenRouter) else if (usage.search_context_size) { webSearchCount = 1; } // Check for annotations with url_citation type else if (usage.annotations && Array.isArray(usage.annotations)) { const hasUrlCitation = usage.annotations.some(ann => { return ann && typeof ann === 'object' && 'type' in ann && ann.type === 'url_citation'; }); if (hasUrlCitation) { webSearchCount = 1; } } // Check Gemini format (grounding metadata - binary 0 or 1) else if (usage.grounding_metadata?.grounding_support !== undefined || usage.grounding_metadata?.web_search_queries !== undefined) { webSearchCount = 1; } if (webSearchCount !== undefined) { additionalTokenData.webSearchCount = webSearchCount; } // For Anthropic providers, LangChain reports input_tokens as the sum of all input tokens. // Our cost calculation expects them to be separate for Anthropic, so we subtract cache tokens. // Both cache_read and cache_write tokens should be subtracted since Anthropic's raw API // reports input_tokens as tokens NOT read from or used to create a cache. // For other providers (OpenAI, etc.), input_tokens already excludes cache tokens as expected. // Match logic consistent with plugin-server: exact match on provider OR substring match on model let isAnthropic = false; if (provider && provider.toLowerCase() === 'anthropic') { isAnthropic = true; } else if (model && model.toLowerCase().includes('anthropic')) { isAnthropic = true; } if (isAnthropic && parsedUsage.input) { const cacheTokens = (additionalTokenData.cacheReadInputTokens || 0) + (additionalTokenData.cacheWriteInputTokens || 0); if (cacheTokens > 0) { parsedUsage.input = Math.max(parsedUsage.input - cacheTokens, 0); } } return [parsedUsage.input, parsedUsage.output, additionalTokenData]; } parseUsage(response, provider, model) { let llmUsage = [0, 0, {}]; const llmUsageKeys = ['token_usage', 'usage', 'tokenUsage']; if (response.llmOutput != null) { const key = llmUsageKeys.find(k => response.llmOutput?.[k] != null); if (key) { llmUsage = this._parseUsageModel(response.llmOutput[key], provider, model); } } // If top-level usage info was not found, try checking the generations. if (llmUsage[0] === 0 && llmUsage[1] === 0 && response.generations) { for (const generation of response.generations) { for (const genChunk of generation) { // Check other paths for usage information if (genChunk.generationInfo?.usage_metadata) { llmUsage = this._parseUsageModel(genChunk.generationInfo.usage_metadata, provider, model); return llmUsage; } const messageChunk = genChunk.generationInfo ?? {}; const responseMetadata = messageChunk.response_metadata ?? {}; const chunkUsage = responseMetadata['usage'] ?? responseMetadata['amazon-bedrock-invocationMetrics'] ?? messageChunk.usage_metadata; if (chunkUsage) { llmUsage = this._parseUsageModel(chunkUsage, provider, model); return llmUsage; } } } } return llmUsage; } } exports.LangChainCallbackHandler = LangChainCallbackHandler; //# sourceMappingURL=index.cjs.map