@llumiverse/core
Version:
Provide an universal API to LLMs. Support for existing LLMs can be added by writing a driver.
579 lines • 21.3 kB
JavaScript
"use strict";
/**
* Utilities for cleaning up conversation objects before storage.
*
* These functions strip binary data (Uint8Array) and large base64 strings
* from conversation objects to prevent JSON.stringify corruption and reduce
* storage bloat.
*
* IMPORTANT: These functions replace entire image/document/video BLOCKS with
* text placeholders, not just the data. This ensures the conversation remains
* valid for subsequent API calls.
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.getConversationMeta = getConversationMeta;
exports.setConversationMeta = setConversationMeta;
exports.unwrapConversationArray = unwrapConversationArray;
exports.incrementConversationTurn = incrementConversationTurn;
exports.stripBinaryFromConversation = stripBinaryFromConversation;
exports.deserializeBinaryFromStorage = deserializeBinaryFromStorage;
exports.stripBase64ImagesFromConversation = stripBase64ImagesFromConversation;
exports.truncateLargeTextInConversation = truncateLargeTextInConversation;
exports.stripHeartbeatsFromConversation = stripHeartbeatsFromConversation;
const IMAGE_PLACEHOLDER = '[Image removed from conversation history]';
const DOCUMENT_PLACEHOLDER = '[Document removed from conversation history]';
const VIDEO_PLACEHOLDER = '[Video removed from conversation history]';
const TEXT_TRUNCATED_MARKER = '\n\n[Content truncated - exceeded token limit]';
/** Metadata key used to store turn information in conversations */
const META_KEY = '_llumiverse_meta';
/**
* Check if an object is a Bedrock image block: { image: { source: { bytes: Uint8Array } } }
*/
function isBedrockImageBlock(obj) {
if (typeof obj !== 'object' || obj === null)
return false;
const o = obj;
if (!o.image || typeof o.image !== 'object')
return false;
const img = o.image;
if (!img.source || typeof img.source !== 'object')
return false;
const src = img.source;
return src.bytes instanceof Uint8Array;
}
/**
* Check if an object is a serialized Bedrock image block (Uint8Array converted to base64)
*/
function isSerializedBedrockImageBlock(obj) {
if (typeof obj !== 'object' || obj === null)
return false;
const o = obj;
if (!o.image || typeof o.image !== 'object')
return false;
const img = o.image;
if (!img.source || typeof img.source !== 'object')
return false;
const src = img.source;
// Check for our serialized format: bytes: { _base64: string }
if (!src.bytes || typeof src.bytes !== 'object')
return false;
const bytes = src.bytes;
return typeof bytes._base64 === 'string';
}
/**
* Check if an object is a Bedrock document block: { document: { source: { bytes: Uint8Array } } }
*/
function isBedrockDocumentBlock(obj) {
if (typeof obj !== 'object' || obj === null)
return false;
const o = obj;
if (!o.document || typeof o.document !== 'object')
return false;
const doc = o.document;
if (!doc.source || typeof doc.source !== 'object')
return false;
const src = doc.source;
return src.bytes instanceof Uint8Array;
}
/**
* Check if an object is a serialized Bedrock document block
*/
function isSerializedBedrockDocumentBlock(obj) {
if (typeof obj !== 'object' || obj === null)
return false;
const o = obj;
if (!o.document || typeof o.document !== 'object')
return false;
const doc = o.document;
if (!doc.source || typeof doc.source !== 'object')
return false;
const src = doc.source;
// Check for our serialized format: bytes: { _base64: string }
if (!src.bytes || typeof src.bytes !== 'object')
return false;
const bytes = src.bytes;
return typeof bytes._base64 === 'string';
}
/**
* Check if an object is a Bedrock video block: { video: { source: { bytes: Uint8Array } } }
*/
function isBedrockVideoBlock(obj) {
if (typeof obj !== 'object' || obj === null)
return false;
const o = obj;
if (!o.video || typeof o.video !== 'object')
return false;
const vid = o.video;
if (!vid.source || typeof vid.source !== 'object')
return false;
const src = vid.source;
return src.bytes instanceof Uint8Array;
}
/**
* Check if an object is a serialized Bedrock video block
*/
function isSerializedBedrockVideoBlock(obj) {
if (typeof obj !== 'object' || obj === null)
return false;
const o = obj;
if (!o.video || typeof o.video !== 'object')
return false;
const vid = o.video;
if (!vid.source || typeof vid.source !== 'object')
return false;
const src = vid.source;
// Check for our serialized format: bytes: { _base64: string }
if (!src.bytes || typeof src.bytes !== 'object')
return false;
const bytes = src.bytes;
return typeof bytes._base64 === 'string';
}
/**
* Check if an object is an OpenAI image_url block with base64 data
*/
function isOpenAIBase64ImageBlock(obj) {
if (typeof obj !== 'object' || obj === null)
return false;
const o = obj;
if (o.type !== 'image_url')
return false;
if (!o.image_url || typeof o.image_url !== 'object')
return false;
const imgUrl = o.image_url;
return typeof imgUrl.url === 'string' &&
imgUrl.url.startsWith('data:image/') &&
imgUrl.url.includes(';base64,');
}
/**
* Check if an object is an Anthropic base64 image block: { type: "image", source: { type: "base64", data: "...", media_type: "..." } }
*/
function isAnthropicBase64ImageBlock(obj) {
if (typeof obj !== 'object' || obj === null)
return false;
const o = obj;
if (o.type !== 'image')
return false;
if (!o.source || typeof o.source !== 'object')
return false;
const src = o.source;
return src.type === 'base64' && typeof src.data === 'string';
}
/**
* Check if an object is an Anthropic base64 document block: { type: "document", source: { type: "base64", data: "...", media_type: "..." } }
*/
function isAnthropicBase64DocumentBlock(obj) {
if (typeof obj !== 'object' || obj === null)
return false;
const o = obj;
if (o.type !== 'document')
return false;
if (!o.source || typeof o.source !== 'object')
return false;
const src = o.source;
return src.type === 'base64' && typeof src.data === 'string';
}
/**
* Check if an object is a Gemini inlineData block with large base64 data
*/
function isGeminiInlineDataBlock(obj) {
if (typeof obj !== 'object' || obj === null)
return false;
const o = obj;
if (!o.inlineData || typeof o.inlineData !== 'object')
return false;
const inlineData = o.inlineData;
return typeof inlineData.data === 'string' && inlineData.data.length > 1000;
}
/**
* Convert Uint8Array to base64 string for safe JSON serialization.
*/
function uint8ArrayToBase64(bytes) {
let binary = '';
for (let i = 0; i < bytes.byteLength; i++) {
binary += String.fromCharCode(bytes[i]);
}
return btoa(binary);
}
/**
* Convert base64 string back to Uint8Array.
*/
function base64ToUint8Array(base64) {
const binary = atob(base64);
const bytes = new Uint8Array(binary.length);
for (let i = 0; i < binary.length; i++) {
bytes[i] = binary.charCodeAt(i);
}
return bytes;
}
/**
* Get metadata from a conversation object, or return defaults.
*/
function getConversationMeta(conversation) {
if (typeof conversation === 'object' && conversation !== null) {
const meta = conversation[META_KEY];
if (meta && typeof meta === 'object') {
return meta;
}
}
return { turnNumber: 0 };
}
/** Key used to wrap array conversations to preserve their type through JSON serialization */
const ARRAY_WRAPPER_KEY = '_arrayConversation';
/**
* Set metadata on a conversation object.
* Arrays are wrapped in an object to preserve their type through JSON serialization.
*/
function setConversationMeta(conversation, meta) {
if (Array.isArray(conversation)) {
// Wrap arrays in an object to preserve their array nature through JSON serialization
return { [ARRAY_WRAPPER_KEY]: conversation, [META_KEY]: meta };
}
if (typeof conversation === 'object' && conversation !== null) {
return { ...conversation, [META_KEY]: meta };
}
return conversation;
}
/**
* Unwrap a conversation array that was wrapped by setConversationMeta.
* If the conversation is not a wrapped array, returns undefined.
* Use this to extract the actual message array from a conversation object.
*/
function unwrapConversationArray(conversation) {
if (typeof conversation === 'object' && conversation !== null) {
const c = conversation;
if (Array.isArray(c[ARRAY_WRAPPER_KEY])) {
return c[ARRAY_WRAPPER_KEY];
}
}
return undefined;
}
/**
* Increment the turn number in a conversation and return the updated conversation.
*/
function incrementConversationTurn(conversation) {
const meta = getConversationMeta(conversation);
return setConversationMeta(conversation, { ...meta, turnNumber: meta.turnNumber + 1 });
}
/**
* Strip binary data (Uint8Array) from conversation to prevent JSON.stringify corruption.
*
* When Uint8Array is passed through JSON.stringify, it gets corrupted into an object
* like { "0": 137, "1": 80, ... } instead of proper binary data. This breaks
* subsequent API calls that expect binary data.
*
* This function either:
* - Serializes images to base64 for safe storage (keepForTurns = Infinity, default)
* - Strips images immediately (keepForTurns = 0)
* - Strips images after N turns
*
* @param obj The conversation object to strip binary data from
* @param options Optional settings for turn-based stripping
* @returns A new object with binary content handled appropriately
*/
function stripBinaryFromConversation(obj, options) {
const { keepForTurns = Infinity } = options ?? {};
const currentTurn = options?.currentTurn ?? getConversationMeta(obj).turnNumber;
// If we should keep images and haven't exceeded the turn threshold,
// serialize Uint8Array to base64 for safe JSON storage
if (keepForTurns > 0 && currentTurn < keepForTurns) {
return serializeBinaryForStorage(obj);
}
// Strip all binary/serialized images
return stripBinaryFromConversationInternal(obj);
}
/**
* Serialize Uint8Array to base64 for safe JSON storage, preserving the image structure.
*/
function serializeBinaryForStorage(obj) {
if (obj === null || obj === undefined)
return obj;
if (obj instanceof Uint8Array) {
return { _base64: uint8ArrayToBase64(obj) };
}
if (Array.isArray(obj)) {
return obj.map(item => serializeBinaryForStorage(item));
}
if (typeof obj === 'object') {
const result = {};
for (const [key, value] of Object.entries(obj)) {
result[key] = serializeBinaryForStorage(value);
}
return result;
}
return obj;
}
/**
* Restore Uint8Array from base64 serialization.
* Call this before sending conversation to API if images were preserved.
*/
function deserializeBinaryFromStorage(obj) {
if (obj === null || obj === undefined)
return obj;
// Check for our serialized format
if (typeof obj === 'object' && obj !== null) {
const o = obj;
if (typeof o._base64 === 'string' && Object.keys(o).length === 1) {
return base64ToUint8Array(o._base64);
}
}
if (Array.isArray(obj)) {
return obj.map(item => deserializeBinaryFromStorage(item));
}
if (typeof obj === 'object') {
const result = {};
for (const [key, value] of Object.entries(obj)) {
result[key] = deserializeBinaryFromStorage(value);
}
return result;
}
return obj;
}
function stripBinaryFromConversationInternal(obj) {
if (obj === null || obj === undefined)
return obj;
// Handle Uint8Array directly
if (obj instanceof Uint8Array) {
return IMAGE_PLACEHOLDER;
}
// Handle our serialized format
if (typeof obj === 'object' && obj !== null) {
const o = obj;
if (typeof o._base64 === 'string' && Object.keys(o).length === 1) {
return IMAGE_PLACEHOLDER;
}
}
if (Array.isArray(obj)) {
return obj.map(item => {
// Replace entire Bedrock image/document/video blocks with text blocks
if (isBedrockImageBlock(item) || isSerializedBedrockImageBlock(item)) {
return { text: IMAGE_PLACEHOLDER };
}
if (isBedrockDocumentBlock(item) || isSerializedBedrockDocumentBlock(item)) {
return { text: DOCUMENT_PLACEHOLDER };
}
if (isBedrockVideoBlock(item) || isSerializedBedrockVideoBlock(item)) {
return { text: VIDEO_PLACEHOLDER };
}
return stripBinaryFromConversationInternal(item);
});
}
if (typeof obj === 'object') {
const result = {};
for (const [key, value] of Object.entries(obj)) {
// Preserve metadata
if (key === META_KEY) {
result[key] = value;
}
else {
result[key] = stripBinaryFromConversationInternal(value);
}
}
return result;
}
return obj;
}
/**
* Strip large base64 image data from conversation to reduce storage bloat.
*
* While base64 strings survive JSON.stringify (unlike Uint8Array), they can
* significantly bloat conversation storage. This function replaces entire
* image blocks with text placeholders:
* - OpenAI: { type: "image_url", image_url: { url: "data:..." } } → { type: "text", text: "[placeholder]" }
* - Gemini: { inlineData: { data: "...", mimeType: "..." } } → { text: "[placeholder]" }
*
* @param obj The conversation object to strip base64 images from
* @param options Optional settings for turn-based stripping
* @returns A new object with image blocks replaced with text placeholders
*/
function stripBase64ImagesFromConversation(obj, options) {
const { keepForTurns = Infinity } = options ?? {};
const currentTurn = options?.currentTurn ?? getConversationMeta(obj).turnNumber;
// If we should keep images and haven't exceeded the turn threshold, don't strip
// (base64 strings are already safe for JSON serialization)
if (keepForTurns > 0 && currentTurn < keepForTurns) {
return obj;
}
return stripBase64ImagesFromConversationInternal(obj);
}
function stripBase64ImagesFromConversationInternal(obj) {
if (obj === null || obj === undefined)
return obj;
// Handle base64 data URL string directly
if (typeof obj === 'string' && obj.startsWith('data:image/') && obj.includes(';base64,')) {
return IMAGE_PLACEHOLDER;
}
if (Array.isArray(obj)) {
return obj.map(item => {
// Replace entire OpenAI image_url blocks with text blocks
if (isOpenAIBase64ImageBlock(item)) {
return { type: 'text', text: IMAGE_PLACEHOLDER };
}
// Replace entire Gemini inlineData blocks with text blocks
if (isGeminiInlineDataBlock(item)) {
return { text: IMAGE_PLACEHOLDER };
}
// Replace Anthropic base64 image blocks with text blocks
if (isAnthropicBase64ImageBlock(item)) {
return { type: 'text', text: IMAGE_PLACEHOLDER };
}
// Replace Anthropic base64 document blocks with text blocks
if (isAnthropicBase64DocumentBlock(item)) {
return { type: 'text', text: DOCUMENT_PLACEHOLDER };
}
return stripBase64ImagesFromConversationInternal(item);
});
}
if (typeof obj === 'object') {
const result = {};
for (const [key, value] of Object.entries(obj)) {
// Preserve metadata
if (key === META_KEY) {
result[key] = value;
}
else {
result[key] = stripBase64ImagesFromConversationInternal(value);
}
}
return result;
}
return obj;
}
/** Approximate characters per token for text truncation */
const CHARS_PER_TOKEN = 4;
/**
* Truncate large text content in conversation to reduce storage and context bloat.
*
* This function finds text strings in tool results and truncates them if they
* exceed the specified token limit (using ~4 chars/token estimate).
*
* Works with:
* - Bedrock: toolResult.content[].text
* - OpenAI: tool message content (string)
* - Gemini: function response content
*
* @param obj The conversation object to truncate text in
* @param options Options including textMaxTokens
* @returns A new object with large text content truncated
*/
function truncateLargeTextInConversation(obj, options) {
const maxTokens = options?.textMaxTokens;
// If no max tokens specified or 0, don't truncate
if (!maxTokens || maxTokens <= 0) {
return obj;
}
const maxChars = maxTokens * CHARS_PER_TOKEN;
return truncateLargeTextInternal(obj, maxChars);
}
function shouldPreserveMediaPayload(obj) {
if (typeof obj !== 'object' || obj === null)
return false;
const o = obj;
// Preserved Bedrock binary blocks and their serialized storage wrapper.
if (isBedrockImageBlock(obj) || isSerializedBedrockImageBlock(obj) ||
isBedrockDocumentBlock(obj) || isSerializedBedrockDocumentBlock(obj) ||
isBedrockVideoBlock(obj) || isSerializedBedrockVideoBlock(obj)) {
return true;
}
if (typeof o._base64 === 'string' && Object.keys(o).length === 1) {
return true;
}
// Preserved base64 media payloads for OpenAI, Gemini, and Anthropic/Claude.
if (isOpenAIBase64ImageBlock(obj) || isGeminiInlineDataBlock(obj) ||
isAnthropicBase64ImageBlock(obj) || isAnthropicBase64DocumentBlock(obj)) {
return true;
}
return false;
}
function truncateLargeTextInternal(obj, maxChars) {
if (obj === null || obj === undefined)
return obj;
// Truncate large strings
if (typeof obj === 'string') {
if (obj.startsWith('data:image/') && obj.includes(';base64,')) {
return obj;
}
if (obj.length > maxChars) {
return obj.substring(0, maxChars) + TEXT_TRUNCATED_MARKER;
}
return obj;
}
if (Array.isArray(obj)) {
return obj.map(item => truncateLargeTextInternal(item, maxChars));
}
if (typeof obj === 'object') {
if (shouldPreserveMediaPayload(obj)) {
return obj;
}
const result = {};
for (const [key, value] of Object.entries(obj)) {
// Preserve metadata without truncation
if (key === META_KEY) {
result[key] = value;
}
else {
result[key] = truncateLargeTextInternal(value, maxChars);
}
}
return result;
}
return obj;
}
const HEARTBEAT_OPEN_TAG = '<heartbeat>';
const HEARTBEAT_CLOSE_TAG = '</heartbeat>';
const HEARTBEAT_PLACEHOLDER = '[Heartbeat removed from conversation history]';
/**
* Strip heartbeat status messages from conversation to reduce context bloat.
*
* Heartbeat messages are periodic workstream status updates injected by the
* workstream management system. They are wrapped with `<heartbeat>...</heartbeat>`
* tags at the point of injection.
*
* This function recursively walks the conversation and replaces any string
* wrapped in heartbeat tags with a short placeholder.
*
* @param obj The conversation object to strip heartbeats from
* @param options Optional settings for turn-based stripping (default keepForTurns: 1)
* @returns A new object with old heartbeat messages replaced
*/
function stripHeartbeatsFromConversation(obj, options) {
const { keepForTurns = 1 } = options ?? {};
const currentTurn = options?.currentTurn ?? getConversationMeta(obj).turnNumber;
// If keepForTurns is Infinity, never strip
if (keepForTurns === Infinity) {
return obj;
}
// Keep heartbeats if we haven't exceeded the turn threshold
if (keepForTurns > 0 && currentTurn < keepForTurns) {
return obj;
}
return stripHeartbeatsInternal(obj);
}
function stripHeartbeatsInternal(obj) {
if (obj === null || obj === undefined)
return obj;
// Replace heartbeat-tagged strings with placeholder
if (typeof obj === 'string') {
if (obj.startsWith(HEARTBEAT_OPEN_TAG) && obj.endsWith(HEARTBEAT_CLOSE_TAG)) {
return HEARTBEAT_PLACEHOLDER;
}
return obj;
}
if (Array.isArray(obj)) {
return obj.map(item => stripHeartbeatsInternal(item));
}
if (typeof obj === 'object') {
const result = {};
for (const [key, value] of Object.entries(obj)) {
// Preserve metadata
if (key === META_KEY) {
result[key] = value;
}
else {
result[key] = stripHeartbeatsInternal(value);
}
}
return result;
}
return obj;
}
//# sourceMappingURL=conversation-utils.js.map