UNPKG

@posthog/ai

Version:
1,360 lines (1,287 loc) 47.3 kB
import * as uuid from 'uuid'; const DATA_URL_PREFIX_RE = /^data:([^;,\s]+)(?:;[^;,\s]+)*;base64,/i; const BASE64_ALPHABET_RE = /^[A-Za-z0-9+/_=-]+$/; class Base64Recognizer { recognize(value, minLength) { const dataUrl = DATA_URL_PREFIX_RE.exec(value); if (dataUrl) return { kind: 'data-url', mediaType: dataUrl[1] }; if (value.length < minLength) return { kind: 'none' }; const confidencePrefix = value.slice(0, minLength); if (BASE64_ALPHABET_RE.test(confidencePrefix)) { return { kind: 'raw' }; } else { return { kind: 'none' }; } } } const MIME_HINT_KEYS = ['mediaType', 'media_type', 'mimeType', 'mime_type']; const STRONG_CONTEXT_KEYS = new Set(['data', 'file_data', 'fileData', 'image_url', 'imageUrl', 'video_url', 'videoUrl', 'audio', 'audio_data', 'audioData', 'inline_data', 'inlineData', 'source', 'result']); const STRONG_CONTEXT_TYPES = new Set(['image', 'image_url', 'input_image', 'audio', 'input_audio', 'video', 'video_url', 'file', 'input_file', 'document', 'media', 'file-data']); const FILE_FAMILY_TYPES = new Set(['file', 'input_file', 'document', 'media', 'file-data']); const KNOWN_AUDIO_FORMATS = new Set(['wav', 'mp3', 'ogg', 'flac', 'm4a', 'aac', 'webm']); class MediaTypeContext { static EMPTY = new MediaTypeContext(undefined, undefined); constructor(parent, key) { this.parent = parent; this.key = key; } inferMediaType() { return this.inferFromSiblingMime() ?? this.inferFromSiblingFormat() ?? this.inferFromParentType() ?? this.inferFromKey(); } inferFromSiblingMime() { if (!this.parent) return undefined; for (const hint of MIME_HINT_KEYS) { const v = this.parent[hint]; if (typeof v === 'string') return v; } return undefined; } inferFromSiblingFormat() { if (!this.parent) return undefined; const fmt = this.parent.format; if (typeof fmt === 'string' && KNOWN_AUDIO_FORMATS.has(fmt.toLowerCase())) { return `audio/${fmt.toLowerCase()}`; } return undefined; } inferFromParentType() { if (!this.parent) return undefined; const t = this.parent.type; if (typeof t !== 'string') return undefined; if (t === 'image' || t === 'image_url' || t === 'input_image') return 'image'; if (t === 'audio' || t === 'input_audio') return 'audio'; if (t === 'video' || t === 'video_url') return 'video'; if (FILE_FAMILY_TYPES.has(t)) return 'application/octet-stream'; return undefined; } inferFromKey() { if (!this.key) return undefined; const key = this.key.toLowerCase(); if (key.includes('audio')) return 'audio'; if (key.includes('video')) return 'video'; if (key.includes('image')) return 'image'; if (key.includes('file') || key.includes('document')) return 'application/octet-stream'; return undefined; } signalsBinary() { if (this.parent) { for (const hint of MIME_HINT_KEYS) { if (typeof this.parent[hint] === 'string') return true; } const fmt = this.parent.format; if (typeof fmt === 'string' && KNOWN_AUDIO_FORMATS.has(fmt.toLowerCase())) return true; const t = this.parent.type; if (typeof t === 'string' && STRONG_CONTEXT_TYPES.has(t)) return true; } if (this.key && STRONG_CONTEXT_KEYS.has(this.key)) return true; return false; } } const STRONG_CONTEXT_MIN_LENGTH = 64; const WEAK_CONTEXT_MIN_LENGTH = 1024; class BinaryContentRedactor { visited = new WeakSet(); constructor(recognizer = new Base64Recognizer()) { this.recognizer = recognizer; } redact(value) { if (this.isMultimodalEnabled()) return value; this.visited = new WeakSet(); return this.walk(value, MediaTypeContext.EMPTY); } walk(value, ctx) { if (value === null || value === undefined) return value; if (typeof value === 'string') return this.redactString(value, ctx); if (typeof value !== 'object') return value; // Buffer extends Uint8Array, so this branch catches both. if (typeof Uint8Array !== 'undefined' && value instanceof Uint8Array) { return this.placeholderFor(ctx.inferMediaType()); } if (this.visited.has(value)) return null; this.visited.add(value); if (Array.isArray(value)) { return value.map(item => this.walk(item, ctx)); } const obj = value; const out = {}; for (const k of Object.keys(obj)) { out[k] = this.walk(obj[k], new MediaTypeContext(obj, k)); } return out; } redactString(value, ctx) { const minLength = ctx.signalsBinary() ? STRONG_CONTEXT_MIN_LENGTH : WEAK_CONTEXT_MIN_LENGTH; const recognition = this.recognizer.recognize(value, minLength); switch (recognition.kind) { case 'data-url': return this.placeholderFor(recognition.mediaType); case 'raw': return this.placeholderFor(ctx.inferMediaType()); case 'none': return value; } } placeholderFor(mediaType) { if (!mediaType) return '[base64 redacted]'; if (mediaType === 'application/octet-stream') return '[base64 file redacted]'; return `[base64 ${mediaType} redacted]`; } isMultimodalEnabled() { const val = process.env._INTERNAL_LLMA_MULTIMODAL || ''; return val.toLowerCase() === 'true' || val === '1' || val.toLowerCase() === 'yes'; } } const redactor = new BinaryContentRedactor(); const sanitizeLangChain = data => redactor.redact(data); const STRING_FORMAT = 'utf8'; // Reused across calls to avoid per-invocation allocation; truncate() runs // hundreds of times for prompts with many parts. new TextEncoder(); new TextDecoder(STRING_FORMAT, { fatal: false }); /** * Safely converts content to a string, preserving structure for objects/arrays. * - If content is already a string, returns it as-is * - If content is an object or array, stringifies it with JSON.stringify to preserve structure * - Otherwise, converts to string with String() * * This prevents the "[object Object]" bug when objects are naively converted to strings. * * @param content - The content to convert to a string * @returns A string representation that preserves structure for complex types */ function toContentString(content) { if (typeof content === 'string') { return content; } if (content !== undefined && content !== null && typeof content === 'object') { try { return JSON.stringify(content); } catch { // Fallback for circular refs, BigInt, or objects with throwing toJSON return String(content); } } return String(content); } const getModelParams = params => { if (!params) { return {}; } const modelParams = {}; const paramKeys = ['temperature', 'max_tokens', 'max_completion_tokens', 'top_p', 'frequency_penalty', 'presence_penalty', 'n', 'stop', 'stream', 'streaming', 'language', 'response_format', 'timestamp_granularities']; for (const key of paramKeys) { if (key in params && params[key] !== undefined) { modelParams[key] = params[key]; } } return modelParams; }; const withPrivacyMode = (client, privacyMode, input) => { return client.privacy_mode || privacyMode ? null : input; }; function sanitizeValues(obj) { if (obj === undefined || obj === null) { return obj; } const jsonSafe = JSON.parse(JSON.stringify(obj)); if (typeof jsonSafe === 'string') { // Sanitize lone surrogates by round-tripping through UTF-8 return new TextDecoder().decode(new TextEncoder().encode(jsonSafe)); } else if (Array.isArray(jsonSafe)) { return jsonSafe.map(sanitizeValues); } else if (jsonSafe && typeof jsonSafe === 'object') { return Object.fromEntries(Object.entries(jsonSafe).map(([k, v]) => [k, sanitizeValues(v)])); } return jsonSafe; } function getDefaultExportFromCjs (x) { return x && x.__esModule && Object.prototype.hasOwnProperty.call(x, 'default') ? x['default'] : x; } var decamelize; var hasRequiredDecamelize; function requireDecamelize () { if (hasRequiredDecamelize) return decamelize; hasRequiredDecamelize = 1; decamelize = function (str, sep) { if (typeof str !== 'string') { throw new TypeError('Expected a string'); } sep = typeof sep === 'undefined' ? '_' : sep; return str .replace(/([a-z\d])([A-Z])/g, '$1' + sep + '$2') .replace(/([A-Z]+)([A-Z][a-z\d]+)/g, '$1' + sep + '$2') .toLowerCase(); }; return decamelize; } var decamelizeExports = requireDecamelize(); var snakeCase = /*@__PURE__*/getDefaultExportFromCjs(decamelizeExports); var camelcase = {exports: {}}; var hasRequiredCamelcase; function requireCamelcase () { if (hasRequiredCamelcase) return camelcase.exports; hasRequiredCamelcase = 1; const UPPERCASE = /[\p{Lu}]/u; const LOWERCASE = /[\p{Ll}]/u; const LEADING_CAPITAL = /^[\p{Lu}](?![\p{Lu}])/gu; const IDENTIFIER = /([\p{Alpha}\p{N}_]|$)/u; const SEPARATORS = /[_.\- ]+/; const LEADING_SEPARATORS = new RegExp('^' + SEPARATORS.source); const SEPARATORS_AND_IDENTIFIER = new RegExp(SEPARATORS.source + IDENTIFIER.source, 'gu'); const NUMBERS_AND_IDENTIFIER = new RegExp('\\d+' + IDENTIFIER.source, 'gu'); const preserveCamelCase = (string, toLowerCase, toUpperCase) => { let isLastCharLower = false; let isLastCharUpper = false; let isLastLastCharUpper = false; for (let i = 0; i < string.length; i++) { const character = string[i]; if (isLastCharLower && UPPERCASE.test(character)) { string = string.slice(0, i) + '-' + string.slice(i); isLastCharLower = false; isLastLastCharUpper = isLastCharUpper; isLastCharUpper = true; i++; } else if (isLastCharUpper && isLastLastCharUpper && LOWERCASE.test(character)) { string = string.slice(0, i - 1) + '-' + string.slice(i - 1); isLastLastCharUpper = isLastCharUpper; isLastCharUpper = false; isLastCharLower = true; } else { isLastCharLower = toLowerCase(character) === character && toUpperCase(character) !== character; isLastLastCharUpper = isLastCharUpper; isLastCharUpper = toUpperCase(character) === character && toLowerCase(character) !== character; } } return string; }; const preserveConsecutiveUppercase = (input, toLowerCase) => { LEADING_CAPITAL.lastIndex = 0; return input.replace(LEADING_CAPITAL, m1 => toLowerCase(m1)); }; const postProcess = (input, toUpperCase) => { SEPARATORS_AND_IDENTIFIER.lastIndex = 0; NUMBERS_AND_IDENTIFIER.lastIndex = 0; return input.replace(SEPARATORS_AND_IDENTIFIER, (_, identifier) => toUpperCase(identifier)) .replace(NUMBERS_AND_IDENTIFIER, m => toUpperCase(m)); }; const camelCase = (input, options) => { if (!(typeof input === 'string' || Array.isArray(input))) { throw new TypeError('Expected the input to be `string | string[]`'); } options = { pascalCase: false, preserveConsecutiveUppercase: false, ...options }; if (Array.isArray(input)) { input = input.map(x => x.trim()) .filter(x => x.length) .join('-'); } else { input = input.trim(); } if (input.length === 0) { return ''; } const toLowerCase = options.locale === false ? string => string.toLowerCase() : string => string.toLocaleLowerCase(options.locale); const toUpperCase = options.locale === false ? string => string.toUpperCase() : string => string.toLocaleUpperCase(options.locale); if (input.length === 1) { return options.pascalCase ? toUpperCase(input) : toLowerCase(input); } const hasUpperCase = input !== toLowerCase(input); if (hasUpperCase) { input = preserveCamelCase(input, toLowerCase, toUpperCase); } input = input.replace(LEADING_SEPARATORS, ''); if (options.preserveConsecutiveUppercase) { input = preserveConsecutiveUppercase(input, toLowerCase); } else { input = toLowerCase(input); } if (options.pascalCase) { input = toUpperCase(input.charAt(0)) + input.slice(1); } return postProcess(input, toUpperCase); }; camelcase.exports = camelCase; // TODO: Remove this for the next major release camelcase.exports.default = camelCase; return camelcase.exports; } requireCamelcase(); //#region src/load/map_keys.ts function keyToJson(key, map) { return map?.[key] || snakeCase(key); } function mapKeys(fields, mapper, map) { const mapped = {}; for (const key in fields) if (Object.hasOwn(fields, key)) mapped[mapper(key, map)] = fields[key]; return mapped; } //#region src/load/validation.ts /** * Sentinel key used to mark escaped user objects during serialization. * * When a plain object contains 'lc' key (which could be confused with LC objects), * we wrap it as `{"__lc_escaped__": {...original...}}`. */ const LC_ESCAPED_KEY = "__lc_escaped__"; /** * Check if an object needs escaping to prevent confusion with LC objects. * * An object needs escaping if: * 1. It has an `'lc'` key (could be confused with LC serialization format) * 2. It has only the escape key (would be mistaken for an escaped object) */ function needsEscaping(obj) { return "lc" in obj || Object.keys(obj).length === 1 && LC_ESCAPED_KEY in obj; } /** * Wrap an object in the escape marker. * * @example * ```typescript * {"key": "value"} // becomes {"__lc_escaped__": {"key": "value"}} * ``` */ function escapeObject(obj) { return { [LC_ESCAPED_KEY]: obj }; } /** * Check if an object looks like a Serializable instance (duck typing). */ function isSerializableLike(obj) { return obj !== null && typeof obj === "object" && "lc_serializable" in obj && typeof obj.toJSON === "function"; } /** * Create a "not_implemented" serialization result for objects that cannot be serialized. */ function createNotImplemented(obj) { let id; if (obj !== null && typeof obj === "object") if ("lc_id" in obj && Array.isArray(obj.lc_id)) id = obj.lc_id; else id = [obj.constructor?.name ?? "Object"]; else id = [typeof obj]; return { lc: 1, type: "not_implemented", id }; } /** * Escape a value if it needs escaping (contains `lc` key). * * This is a simpler version of `serializeValue` that doesn't handle Serializable * objects - it's meant to be called on kwargs values that have already been * processed by `toJSON()`. * * @param value - The value to potentially escape. * @param pathSet - WeakSet to track ancestor objects in the current path to detect circular references. * Objects are removed after processing to allow shared references (same object in * multiple places) while still detecting true circular references (ancestor in descendant). * @returns The value with any `lc`-containing objects wrapped in escape markers. */ function escapeIfNeeded(value, pathSet = /* @__PURE__ */ new WeakSet()) { if (value !== null && typeof value === "object" && !Array.isArray(value)) { if (pathSet.has(value)) return createNotImplemented(value); if (isSerializableLike(value)) return value; pathSet.add(value); const record = value; if (needsEscaping(record)) { pathSet.delete(value); return escapeObject(record); } const result = {}; for (const [key, val] of Object.entries(record)) result[key] = escapeIfNeeded(val, pathSet); pathSet.delete(value); return result; } if (Array.isArray(value)) return value.map((item) => escapeIfNeeded(item, pathSet)); return value; } function shallowCopy(obj) { return Array.isArray(obj) ? [...obj] : { ...obj }; } function replaceSecrets(root, secretsMap) { const result = shallowCopy(root); for (const [path, secretId] of Object.entries(secretsMap)) { const [last, ...partsReverse] = path.split(".").reverse(); let current = result; for (const part of partsReverse.reverse()) { if (current[part] === void 0) break; current[part] = shallowCopy(current[part]); current = current[part]; } if (current[last] !== void 0) current[last] = { lc: 1, type: "secret", id: [secretId] }; } return result; } /** * Get a unique name for the module, rather than parent class implementations. * Should not be subclassed, subclass lc_name above instead. */ function get_lc_unique_name(serializableClass) { const parentClass = Object.getPrototypeOf(serializableClass); if (typeof serializableClass.lc_name === "function" && (typeof parentClass.lc_name !== "function" || serializableClass.lc_name() !== parentClass.lc_name())) return serializableClass.lc_name(); else return serializableClass.name; } var Serializable = class Serializable { lc_serializable = false; lc_kwargs; /** * The name of the serializable. Override to provide an alias or * to preserve the serialized module name in minified environments. * * Implemented as a static method to support loading logic. */ static lc_name() { return this.name; } /** * The final serialized identifier for the module. */ get lc_id() { return [...this.lc_namespace, get_lc_unique_name(this.constructor)]; } /** * A map of secrets, which will be omitted from serialization. * Keys are paths to the secret in constructor args, e.g. "foo.bar.baz". * Values are the secret ids, which will be used when deserializing. */ get lc_secrets() {} /** * A map of additional attributes to merge with constructor args. * Keys are the attribute names, e.g. "foo". * Values are the attribute values, which will be serialized. * These attributes need to be accepted by the constructor as arguments. */ get lc_attributes() {} /** * A map of aliases for constructor args. * Keys are the attribute names, e.g. "foo". * Values are the alias that will replace the key in serialization. * This is used to eg. make argument names match Python. */ get lc_aliases() {} /** * A manual list of keys that should be serialized. * If not overridden, all fields passed into the constructor will be serialized. */ get lc_serializable_keys() {} constructor(kwargs, ..._args) { if (this.lc_serializable_keys !== void 0) this.lc_kwargs = Object.fromEntries(Object.entries(kwargs || {}).filter(([key]) => this.lc_serializable_keys?.includes(key))); else this.lc_kwargs = kwargs ?? {}; } toJSON() { if (!this.lc_serializable) return this.toJSONNotImplemented(); if (this.lc_kwargs instanceof Serializable || typeof this.lc_kwargs !== "object" || Array.isArray(this.lc_kwargs)) return this.toJSONNotImplemented(); const aliases = {}; const secrets = {}; const kwargs = Object.keys(this.lc_kwargs).reduce((acc, key) => { acc[key] = key in this ? this[key] : this.lc_kwargs[key]; return acc; }, {}); for (let current = Object.getPrototypeOf(this); current; current = Object.getPrototypeOf(current)) { Object.assign(aliases, Reflect.get(current, "lc_aliases", this)); Object.assign(secrets, Reflect.get(current, "lc_secrets", this)); Object.assign(kwargs, Reflect.get(current, "lc_attributes", this)); } Object.keys(secrets).forEach((keyPath) => { let read = this; let write = kwargs; const [last, ...partsReverse] = keyPath.split(".").reverse(); for (const key of partsReverse.reverse()) { if (!(key in read) || read[key] === void 0) return; if (!(key in write) || write[key] === void 0) { if (typeof read[key] === "object" && read[key] != null) write[key] = {}; else if (Array.isArray(read[key])) write[key] = []; } read = read[key]; write = write[key]; } if (last in read && read[last] !== void 0) write[last] = write[last] || read[last]; }); const escapedKwargs = {}; const pathSet = /* @__PURE__ */ new WeakSet(); pathSet.add(this); for (const [key, value] of Object.entries(kwargs)) escapedKwargs[key] = escapeIfNeeded(value, pathSet); const processedKwargs = mapKeys(Object.keys(secrets).length ? replaceSecrets(escapedKwargs, secrets) : escapedKwargs, keyToJson, aliases); return { lc: 1, type: "constructor", id: this.lc_id, kwargs: processedKwargs }; } toJSONNotImplemented() { return { lc: 1, type: "not_implemented", id: this.lc_id }; } }; const isDeno = () => typeof Deno !== "undefined"; function getEnvironmentVariable(name) { try { if (typeof process !== "undefined") return process.env?.[name]; else if (isDeno()) return Deno?.env.get(name); else return; } catch { return; } } /** * Abstract class that provides a set of optional methods that can be * overridden in derived classes to handle various events during the * execution of a LangChain application. */ var BaseCallbackHandlerMethodsClass = class {}; /** * Abstract base class for creating callback handlers in the LangChain * framework. It provides a set of optional methods that can be overridden * in derived classes to handle various events during the execution of a * LangChain application. */ var BaseCallbackHandler = class extends BaseCallbackHandlerMethodsClass { lc_serializable = false; get lc_namespace() { return [ "langchain_core", "callbacks", this.name ]; } get lc_secrets() {} get lc_attributes() {} get lc_aliases() {} get lc_serializable_keys() {} /** * The name of the serializable. Override to provide an alias or * to preserve the serialized module name in minified environments. * * Implemented as a static method to support loading logic. */ static lc_name() { return this.name; } /** * The final serialized identifier for the module. */ get lc_id() { return [...this.lc_namespace, get_lc_unique_name(this.constructor)]; } lc_kwargs; ignoreLLM = false; ignoreChain = false; ignoreAgent = false; ignoreRetriever = false; ignoreCustomEvent = false; raiseError = false; awaitHandlers = getEnvironmentVariable("LANGCHAIN_CALLBACKS_BACKGROUND") === "false"; constructor(input) { super(); this.lc_kwargs = input || {}; if (input) { this.ignoreLLM = input.ignoreLLM ?? this.ignoreLLM; this.ignoreChain = input.ignoreChain ?? this.ignoreChain; this.ignoreAgent = input.ignoreAgent ?? this.ignoreAgent; this.ignoreRetriever = input.ignoreRetriever ?? this.ignoreRetriever; this.ignoreCustomEvent = input.ignoreCustomEvent ?? this.ignoreCustomEvent; this.raiseError = input.raiseError ?? this.raiseError; this.awaitHandlers = this.raiseError || (input._awaitHandler ?? this.awaitHandlers); } } copy() { return new this.constructor(this); } toJSON() { return Serializable.prototype.toJSON.call(this); } toJSONNotImplemented() { return Serializable.prototype.toJSONNotImplemented.call(this); } static fromMethods(methods) { class Handler extends BaseCallbackHandler { name = uuid.v7(); constructor() { super(); Object.assign(this, methods); } } return new Handler(); } }; var version = "7.19.5"; const DEFAULT_MAX_DEPTH = 3; const MAX_STACK_LINES = 20; function serializeError(value, depth = DEFAULT_MAX_DEPTH) { if (depth < 0 || value === null || typeof value !== 'object') { return value; } if (value instanceof Error) { const out = { name: value.name, message: value.message, stack: truncateStack(value.stack) }; for (const key of Object.keys(value)) { out[key] = serializeError(value[key], depth - 1); } if (value.cause !== undefined) { out.cause = serializeError(value.cause, depth - 1); } return out; } if (Array.isArray(value)) { return value.map(item => serializeError(item, depth - 1)); } return value; } function stringifyError(error) { try { return JSON.stringify(sanitizeValues(serializeError(error))); } catch { if (error instanceof Error) { return JSON.stringify({ name: error.name, message: error.message }); } return JSON.stringify({ message: String(error) }); } } function truncateStack(stack) { if (!stack) { return stack; } const lines = stack.split('\n'); if (lines.length <= MAX_STACK_LINES) { return stack; } return [...lines.slice(0, MAX_STACK_LINES), '... (truncated)'].join('\n'); } /** A run may either be a Span or a Generation */ /** Storage for run metadata */ class LangChainCallbackHandler extends BaseCallbackHandler { name = 'PosthogCallbackHandler'; runs = {}; parentTree = {}; constructor(options) { if (!options.client) { throw new Error('PostHog client is required'); } super(); this.client = options.client; this.distinctId = options.distinctId; this.traceId = options.traceId; this.properties = options.properties || {}; this.privacyMode = options.privacyMode || false; this.groups = options.groups || {}; this.debug = options.debug || false; } // ===== CALLBACK METHODS ===== handleChainStart(chain, inputs, runId, parentRunId, tags, metadata, _runType, runName) { this._logDebugEvent('on_chain_start', runId, parentRunId, { inputs, tags }); this._setParentOfRun(runId, parentRunId); this._setTraceOrSpanMetadata(chain, inputs, runId, parentRunId, metadata, tags, runName); } handleChainEnd(outputs, runId, parentRunId, tags, _kwargs) { this._logDebugEvent('on_chain_end', runId, parentRunId, { outputs, tags }); this._popRunAndCaptureTraceOrSpan(runId, parentRunId, outputs); } handleChainError(error, runId, parentRunId, tags, _kwargs) { this._logDebugEvent('on_chain_error', runId, parentRunId, { error, tags }); this._popRunAndCaptureTraceOrSpan(runId, parentRunId, error); } handleChatModelStart(serialized, messages, runId, parentRunId, extraParams, tags, metadata, runName) { this._logDebugEvent('on_chat_model_start', runId, parentRunId, { messages, tags }); this._setParentOfRun(runId, parentRunId); // Flatten the two-dimensional messages and convert each message to a plain object const input = messages.flat().map(m => this._convertMessageToDict(m)); this._setLLMMetadata(serialized, runId, input, metadata, extraParams, runName); } handleLLMStart(serialized, prompts, runId, parentRunId, extraParams, tags, metadata, runName) { this._logDebugEvent('on_llm_start', runId, parentRunId, { prompts, tags }); this._setParentOfRun(runId, parentRunId); this._setLLMMetadata(serialized, runId, prompts, metadata, extraParams, runName); } handleLLMEnd(output, runId, parentRunId, tags, _extraParams) { this._logDebugEvent('on_llm_end', runId, parentRunId, { output, tags }); this._popRunAndCaptureGeneration(runId, parentRunId, output); } handleLLMError(err, runId, parentRunId, tags, _extraParams) { this._logDebugEvent('on_llm_error', runId, parentRunId, { err, tags }); this._popRunAndCaptureGeneration(runId, parentRunId, err); } handleToolStart(tool, input, runId, parentRunId, tags, metadata, runName) { this._logDebugEvent('on_tool_start', runId, parentRunId, { input, tags }); this._setParentOfRun(runId, parentRunId); this._setTraceOrSpanMetadata(tool, input, runId, parentRunId, metadata, tags, runName); } handleToolEnd(output, runId, parentRunId, tags) { this._logDebugEvent('on_tool_end', runId, parentRunId, { output, tags }); this._popRunAndCaptureTraceOrSpan(runId, parentRunId, output); } handleToolError(err, runId, parentRunId, tags) { this._logDebugEvent('on_tool_error', runId, parentRunId, { err, tags }); this._popRunAndCaptureTraceOrSpan(runId, parentRunId, err); } handleRetrieverStart(retriever, query, runId, parentRunId, tags, metadata, name) { this._logDebugEvent('on_retriever_start', runId, parentRunId, { query, tags }); this._setParentOfRun(runId, parentRunId); this._setTraceOrSpanMetadata(retriever, query, runId, parentRunId, metadata, tags, name); } handleRetrieverEnd(documents, runId, parentRunId, tags) { this._logDebugEvent('on_retriever_end', runId, parentRunId, { documents, tags }); this._popRunAndCaptureTraceOrSpan(runId, parentRunId, documents); } handleRetrieverError(err, runId, parentRunId, tags) { this._logDebugEvent('on_retriever_error', runId, parentRunId, { err, tags }); this._popRunAndCaptureTraceOrSpan(runId, parentRunId, err); } handleAgentAction(action, runId, parentRunId, tags) { this._logDebugEvent('on_agent_action', runId, parentRunId, { action, tags }); this._setParentOfRun(runId, parentRunId); this._setTraceOrSpanMetadata(null, action, runId, parentRunId); } handleAgentEnd(action, runId, parentRunId, tags) { this._logDebugEvent('on_agent_finish', runId, parentRunId, { action, tags }); this._popRunAndCaptureTraceOrSpan(runId, parentRunId, action); } // ===== PRIVATE HELPERS ===== _setParentOfRun(runId, parentRunId) { if (parentRunId) { this.parentTree[runId] = parentRunId; } } _popParentOfRun(runId) { delete this.parentTree[runId]; } _findRootRun(runId) { let id = runId; while (this.parentTree[id]) { id = this.parentTree[id]; } return id; } _setTraceOrSpanMetadata(serialized, input, runId, parentRunId, ...args) { // Use default names if not provided: if this is a top-level run, we mark it as a trace, otherwise as a span. const defaultName = parentRunId ? 'span' : 'trace'; const runName = this._getLangchainRunName(serialized, ...args) || defaultName; this.runs[runId] = { name: runName, input, startTime: Date.now() }; } _setLLMMetadata(serialized, runId, messages, metadata, extraParams, runName) { const runNameFound = this._getLangchainRunName(serialized, { extraParams, runName }) || 'generation'; const generation = { name: runNameFound, input: sanitizeLangChain(messages), startTime: Date.now() }; if (extraParams) { generation.modelParams = getModelParams(extraParams.invocation_params); if (extraParams.invocation_params && extraParams.invocation_params.tools) { generation.tools = extraParams.invocation_params.tools; } } if (metadata) { if (metadata.ls_model_name) { generation.model = metadata.ls_model_name; } if (metadata.ls_provider) { generation.provider = metadata.ls_provider; } } if (serialized && 'kwargs' in serialized && serialized.kwargs.openai_api_base) { generation.baseUrl = serialized.kwargs.openai_api_base; } this.runs[runId] = generation; } _popRunMetadata(runId) { const endTime = Date.now(); const run = this.runs[runId]; if (!run) { console.warn(`No run metadata found for run ${runId}`); return undefined; } run.endTime = endTime; delete this.runs[runId]; return run; } _getTraceId(runId) { return this.traceId ? String(this.traceId) : this._findRootRun(runId); } _getParentRunId(traceId, _runId, parentRunId) { // Replace the parent-run if not found in our stored parent tree. if (parentRunId && !this.parentTree[parentRunId]) { return traceId; } return parentRunId; } _popRunAndCaptureTraceOrSpan(runId, parentRunId, outputs) { const traceId = this._getTraceId(runId); this._popParentOfRun(runId); const run = this._popRunMetadata(runId); if (!run) { return; } if ('modelParams' in run) { console.warn(`Run ${runId} is a generation, but attempted to be captured as a trace/span.`); return; } const actualParentRunId = this._getParentRunId(traceId, runId, parentRunId); this._captureTraceOrSpan(traceId, runId, run, outputs, actualParentRunId); } _captureTraceOrSpan(traceId, runId, run, outputs, parentRunId) { const eventName = parentRunId ? '$ai_span' : '$ai_trace'; const latency = run.endTime ? (run.endTime - run.startTime) / 1000 : 0; const eventProperties = { $ai_lib: 'posthog-ai', $ai_lib_version: version, $ai_trace_id: traceId, $ai_input_state: withPrivacyMode(this.client, this.privacyMode, run.input), $ai_latency: latency, $ai_span_name: run.name, $ai_span_id: runId, $ai_framework: 'langchain' }; if (parentRunId) { eventProperties['$ai_parent_id'] = parentRunId; } Object.assign(eventProperties, this.properties); if (!this.distinctId) { eventProperties['$process_person_profile'] = false; } if (outputs instanceof Error) { eventProperties['$ai_error'] = stringifyError(outputs); eventProperties['$ai_is_error'] = true; } else if (outputs !== undefined) { eventProperties['$ai_output_state'] = withPrivacyMode(this.client, this.privacyMode, outputs); } this.client.capture({ distinctId: this.distinctId ? this.distinctId.toString() : runId, event: eventName, properties: eventProperties, groups: this.groups }); } _popRunAndCaptureGeneration(runId, parentRunId, response) { const traceId = this._getTraceId(runId); this._popParentOfRun(runId); const run = this._popRunMetadata(runId); if (!run || typeof run !== 'object' || !('modelParams' in run)) { console.warn(`Run ${runId} is not a generation, but attempted to be captured as such.`); return; } const actualParentRunId = this._getParentRunId(traceId, runId, parentRunId); this._captureGeneration(traceId, runId, run, response, actualParentRunId); } _captureGeneration(traceId, runId, run, output, parentRunId) { const latency = run.endTime ? (run.endTime - run.startTime) / 1000 : 0; const eventProperties = { $ai_lib: 'posthog-ai', $ai_lib_version: version, $ai_trace_id: traceId, $ai_span_id: runId, $ai_span_name: run.name, $ai_parent_id: parentRunId, $ai_provider: run.provider, $ai_model: run.model, $ai_model_parameters: run.modelParams, $ai_input: withPrivacyMode(this.client, this.privacyMode, run.input), $ai_http_status: 200, $ai_latency: latency, $ai_base_url: run.baseUrl, $ai_framework: 'langchain' }; if (run.tools) { eventProperties['$ai_tools'] = run.tools; } if (output instanceof Error) { eventProperties['$ai_http_status'] = output.status || 500; eventProperties['$ai_error'] = stringifyError(output); eventProperties['$ai_is_error'] = true; } else { // Handle token usage const [inputTokens, outputTokens, additionalTokenData] = this.parseUsage(output, run.provider, run.model); eventProperties['$ai_input_tokens'] = inputTokens; eventProperties['$ai_output_tokens'] = outputTokens; // Add additional token data to properties if (additionalTokenData.cacheReadInputTokens) { eventProperties['$ai_cache_read_input_tokens'] = additionalTokenData.cacheReadInputTokens; } if (additionalTokenData.cacheWriteInputTokens) { eventProperties['$ai_cache_creation_input_tokens'] = additionalTokenData.cacheWriteInputTokens; } if (additionalTokenData.reasoningTokens) { eventProperties['$ai_reasoning_tokens'] = additionalTokenData.reasoningTokens; } if (additionalTokenData.webSearchCount !== undefined) { eventProperties['$ai_web_search_count'] = additionalTokenData.webSearchCount; } // Extract stop reason from generation info const stopReason = this._extractStopReason(output); if (stopReason) { eventProperties['$ai_stop_reason'] = stopReason; } // Handle generations/completions let completions; if (output.generations && Array.isArray(output.generations)) { const lastGeneration = output.generations[output.generations.length - 1]; if (Array.isArray(lastGeneration) && lastGeneration.length > 0) { // Check if this is a ChatGeneration by looking at the first item const isChatGeneration = 'message' in lastGeneration[0] && lastGeneration[0].message; if (isChatGeneration) { // For ChatGeneration, convert messages to dict format completions = lastGeneration.map(gen => { return this._convertMessageToDict(gen.message); }); } else { // For non-ChatGeneration, extract raw response completions = lastGeneration.map(gen => { return this._extractRawResponse(gen); }); } } } if (completions) { eventProperties['$ai_output_choices'] = withPrivacyMode(this.client, this.privacyMode, completions); } } Object.assign(eventProperties, this.properties); if (!this.distinctId) { eventProperties['$process_person_profile'] = false; } this.client.capture({ distinctId: this.distinctId ? this.distinctId.toString() : traceId, event: '$ai_generation', properties: eventProperties, groups: this.groups }); } _logDebugEvent(eventName, runId, parentRunId, extra) { if (this.debug) { console.log(`Event: ${eventName}, runId: ${runId}, parentRunId: ${parentRunId}, extra:`, extra); } } _getLangchainRunName(serialized, ...args) { if (args && args.length > 0) { for (const arg of args) { if (arg && typeof arg === 'object' && 'name' in arg) { return arg.name; } else if (arg && typeof arg === 'object' && 'runName' in arg) { return arg.runName; } } } if (serialized && serialized.name) { return serialized.name; } if (serialized && serialized.id) { return Array.isArray(serialized.id) ? serialized.id[serialized.id.length - 1] : serialized.id; } return undefined; } _convertLcToolCallsToOai(toolCalls) { return toolCalls.map(toolCall => ({ type: 'function', id: toolCall.id, function: { name: toolCall.name, arguments: JSON.stringify(toolCall.args) } })); } _extractRawResponse(generation) { // Extract the response from the last response of the LLM call // We return the text of the response if not empty if (generation.text != null && generation.text.trim() !== '') { return generation.text.trim(); } else if (generation.message) { // Additional kwargs contains the response in case of tool usage return generation.message.additional_kwargs || generation.message.additionalKwargs || {}; } else { // Not tool usage, some LLM responses can be simply empty return ''; } } _convertMessageToDict(message) { let messageDict = {}; const messageType = message.getType(); switch (messageType) { case 'human': messageDict = { role: 'user', content: message.content }; break; case 'ai': messageDict = { role: 'assistant', content: message.content }; if (message.tool_calls) { messageDict.tool_calls = this._convertLcToolCallsToOai(message.tool_calls); } break; case 'system': messageDict = { role: 'system', content: message.content }; break; case 'tool': messageDict = { role: 'tool', content: message.content }; break; case 'function': messageDict = { role: 'function', content: message.content }; break; default: messageDict = { role: messageType, content: toContentString(message.content) }; break; } if (message.additional_kwargs) { messageDict = { ...messageDict, ...message.additional_kwargs }; } // Sanitize the message content to redact base64 images return sanitizeLangChain(messageDict); } _extractStopReason(output) { if (!output.generations || !Array.isArray(output.generations)) { return undefined; } const lastGeneration = output.generations[output.generations.length - 1]; if (!Array.isArray(lastGeneration) || lastGeneration.length === 0) { return undefined; } const gen = lastGeneration[0]; // Check generationInfo for finish_reason (OpenAI format) if (gen.generationInfo?.finish_reason) { return String(gen.generationInfo.finish_reason); } // Check generationInfo for response_metadata.stop_reason (Anthropic format) if (gen.generationInfo?.response_metadata?.stop_reason) { return String(gen.generationInfo.response_metadata.stop_reason); } // Check message response_metadata for finish_reason (common LangChain format) if (gen.generationInfo?.response_metadata?.finish_reason) { return String(gen.generationInfo.response_metadata.finish_reason); } // Check for stop_reason directly in generationInfo if (gen.generationInfo?.stop_reason) { return String(gen.generationInfo.stop_reason); } return undefined; } _parseUsageModel(usage, provider, model) { const conversionList = [['promptTokens', 'input'], ['completionTokens', 'output'], ['input_tokens', 'input'], ['output_tokens', 'output'], ['prompt_token_count', 'input'], ['candidates_token_count', 'output'], ['inputTokenCount', 'input'], ['outputTokenCount', 'output'], ['input_token_count', 'input'], ['generated_token_count', 'output']]; const parsedUsage = conversionList.reduce((acc, [modelKey, typeKey]) => { const value = usage[modelKey]; if (value != null) { const finalCount = Array.isArray(value) ? value.reduce((sum, tokenCount) => sum + tokenCount, 0) : value; acc[typeKey] = finalCount; } return acc; }, { input: 0, output: 0 }); // Extract additional token details like cached tokens and reasoning tokens const additionalTokenData = {}; // Check for cached tokens in various formats if (usage.prompt_tokens_details?.cached_tokens != null) { additionalTokenData.cacheReadInputTokens = usage.prompt_tokens_details.cached_tokens; } else if (usage.input_token_details?.cache_read != null) { additionalTokenData.cacheReadInputTokens = usage.input_token_details.cache_read; } else if (usage.cachedPromptTokens != null) { additionalTokenData.cacheReadInputTokens = usage.cachedPromptTokens; } else if (usage.cache_read_input_tokens != null) { additionalTokenData.cacheReadInputTokens = usage.cache_read_input_tokens; } // Check for cache write/creation tokens in various formats if (usage.cache_creation_input_tokens != null) { additionalTokenData.cacheWriteInputTokens = usage.cache_creation_input_tokens; } else if (usage.input_token_details?.cache_creation != null) { additionalTokenData.cacheWriteInputTokens = usage.input_token_details.cache_creation; } // Check for reasoning tokens in various formats if (usage.completion_tokens_details?.reasoning_tokens != null) { additionalTokenData.reasoningTokens = usage.completion_tokens_details.reasoning_tokens; } else if (usage.output_token_details?.reasoning != null) { additionalTokenData.reasoningTokens = usage.output_token_details.reasoning; } else if (usage.reasoningTokens != null) { additionalTokenData.reasoningTokens = usage.reasoningTokens; } // Extract web search counts from various provider formats let webSearchCount; // Priority 1: Exact Count // Check Anthropic format (server_tool_use.web_search_requests) if (usage.server_tool_use?.web_search_requests !== undefined) { webSearchCount = usage.server_tool_use.web_search_requests; } // Priority 2: Binary Detection (1 or 0) // Check for citations array (Perplexity) else if (usage.citations && Array.isArray(usage.citations) && usage.citations.length > 0) { webSearchCount = 1; } // Check for search_results array (Perplexity via OpenRouter) else if (usage.search_results && Array.isArray(usage.search_results) && usage.search_results.length > 0) { webSearchCount = 1; } // Check for search_context_size (Perplexity via OpenRouter) else if (usage.search_context_size) { webSearchCount = 1; } // Check for annotations with url_citation type else if (usage.annotations && Array.isArray(usage.annotations)) { const hasUrlCitation = usage.annotations.some(ann => { return ann && typeof ann === 'object' && 'type' in ann && ann.type === 'url_citation'; }); if (hasUrlCitation) { webSearchCount = 1; } } // Check Gemini format (grounding metadata - binary 0 or 1) else if (usage.grounding_metadata?.grounding_support !== undefined || usage.grounding_metadata?.web_search_queries !== undefined) { webSearchCount = 1; } if (webSearchCount !== undefined) { additionalTokenData.webSearchCount = webSearchCount; } // For Anthropic providers, LangChain reports input_tokens as the sum of all input tokens. // Our cost calculation expects them to be separate for Anthropic, so we subtract cache tokens. // Both cache_read and cache_write tokens should be subtracted since Anthropic's raw API // reports input_tokens as tokens NOT read from or used to create a cache. // For other providers (OpenAI, etc.), input_tokens already excludes cache tokens as expected. // Match logic consistent with plugin-server: exact match on provider OR substring match on model let isAnthropic = false; if (provider && provider.toLowerCase() === 'anthropic') { isAnthropic = true; } else if (model && model.toLowerCase().includes('anthropic')) { isAnthropic = true; } if (isAnthropic && parsedUsage.input) { const cacheTokens = (additionalTokenData.cacheReadInputTokens || 0) + (additionalTokenData.cacheWriteInputTokens || 0); if (cacheTokens > 0) { parsedUsage.input = Math.max(parsedUsage.input - cacheTokens, 0); } } return [parsedUsage.input, parsedUsage.output, additionalTokenData]; } parseUsage(response, provider, model) { let llmUsage = [0, 0, {}]; const llmUsageKeys = ['token_usage', 'usage', 'tokenUsage']; if (response.llmOutput != null) { const key = llmUsageKeys.find(k => response.llmOutput?.[k] != null); if (key) { llmUsage = this._parseUsageModel(response.llmOutput[key], provider, model); } } // If top-level usage info was not found, try checking the generations. if (llmUsage[0] === 0 && llmUsage[1] === 0 && response.generations) { for (const generation of response.generations) { for (const genChunk of generation) { // Check other paths for usage information if (genChunk.generationInfo?.usage_metadata) { llmUsage = this._parseUsageModel(genChunk.generationInfo.usage_metadata, provider, model); return llmUsage; } const messageChunk = genChunk.generationInfo ?? {}; const responseMetadata = messageChunk.response_metadata ?? {}; const chunkUsage = responseMetadata['usage'] ?? responseMetadata['amazon-bedrock-invocationMetrics'] ?? messageChunk.usage_metadata; if (chunkUsage) { llmUsage = this._parseUsageModel(chunkUsage, provider, model); return llmUsage; } } } } return llmUsage; } } export { LangChainCallbackHandler }; //# sourceMappingURL=index.mjs.map