UNPKG

@aj-archipelago/cortex

Version:

Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.

github.com/aj-archipelago/cortex

aj-archipelago/cortex

378 lines (332 loc) • 12.6 kB

JavaScript

import logger from "./logger.js"; import subvibe from '@aj-archipelago/subvibe'; import { URL } from 'url'; import { v4 as uuidv4 } from 'uuid'; function getUniqueId(){ return uuidv4(); } function getSearchResultId() { const timestamp = Date.now().toString(36); // Base36 timestamp const random = Math.random().toString(36).substring(2, 5); // 3 random chars return `${timestamp}-${random}`; } // Helper function to extract citation title from URL function extractCitationTitle(url) { let title = 'Citation'; if (!url || typeof url !== 'string') return title; try { const urlObj = new URL(url); const hostname = urlObj.hostname.replace(/^www\./, ''); const pathname = urlObj.pathname; // Check if it's an X/Twitter URL first if (url.includes('x.com/') || url.includes('twitter.com/')) { // Extract handle and status ID from X/Twitter URL const handleMatch = url.match(/(?:x\.com|twitter\.com)\/([^\/\?]+)/); const statusMatch = url.match(/status\/(\d+)/); if (handleMatch && statusMatch) { const handle = handleMatch[1]; const statusId = statusMatch[1]; // Handle the /i/ internal redirect format - just show as "X Post" with ID // The /i/ path is X's internal redirect and doesn't indicate the real author if (handle === 'i') { title = `X Post ${statusId}`; } else { // Format as "X Post <number> from <username>" const cleanHandle = handle.startsWith('@') ? handle.substring(1) : handle; title = `X Post ${statusId} from @${cleanHandle}`; } } else if (handleMatch) { const handle = handleMatch[1]; if (handle === 'i') { title = 'X Post'; } else { const cleanHandle = handle.startsWith('@') ? handle.substring(1) : handle; title = `X Post from @${cleanHandle}`; } } else { title = 'X Post'; } } else { // Try to create a meaningful title from the URL if (pathname && pathname !== '/') { const lastPart = pathname.split('/').pop(); if (lastPart && lastPart.length > 3) { title = lastPart.replace(/[-_]/g, ' ').replace(/\.[^/.]+$/, ''); } else { title = hostname; } } else { title = hostname; } } } catch (error) { // If URL parsing fails, use the URL itself as title title = url; } return title; } function convertToSingleContentChatHistory(chatHistory){ for(let i=0; i<chatHistory.length; i++){ //if isarray make it single string if (Array.isArray(chatHistory[i]?.content)) { chatHistory[i].content = chatHistory[i].content.join("\n"); } } } //check if args has a type in chatHistory function chatArgsHasType(args, type){ const { chatHistory } = args; for(const ch of chatHistory){ // Handle both array and string content const contents = Array.isArray(ch.content) ? ch.content : [ch.content]; for(const content of contents){ try{ if((content?.type || JSON.parse(content).type) == type){ return true; } }catch(e){ continue; } } } return false; } //check if args has an image_url in chatHistory function chatArgsHasImageUrl(args){ return chatArgsHasType(args, 'image_url'); } // convert srt format to text function convertSrtToText(str) { return str .split('\n') .filter(line => !line.match(/^\d+$/) && !line.match(/^\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}$/) && line !== '') .join(' '); } function alignSubtitles(subtitles, format, offsets) { // Basic input validation if (!Array.isArray(subtitles) || !Array.isArray(offsets) || subtitles.length !== offsets.length) { throw new Error('Invalid input: subtitles and offsets must be arrays of equal length'); } if (subtitles.length === 0) { return ''; } const result = []; function shiftSubtitles(subtitle, shiftOffset) { // Skip non-string or empty subtitles if (typeof subtitle !== 'string' || subtitle.trim() === '') { return []; } try { const captions = subvibe.parse(subtitle); if (!captions?.cues) { return []; } return subvibe.resync(captions.cues, { offset: shiftOffset }); } catch (error) { logger.warn(`Failed to parse subtitle: ${error.message}`); return []; } } for (let i = 0; i < subtitles.length; i++) { const shiftedSubtitles = shiftSubtitles(subtitles[i], offsets[i] * 1000); if (shiftedSubtitles.length > 0) { result.push(...shiftedSubtitles); } } try { return subvibe.build(result, format || 'srt'); } catch (error) { throw new Error(`Failed to build subtitles: ${error.message}`); } } function removeOldImageAndFileContent(chatHistory) { if (!chatHistory || !Array.isArray(chatHistory) || chatHistory.length === 0) { return chatHistory; } // Find the index of the last user message with image or file content let lastImageOrFileIndex = -1; for (let i = chatHistory.length - 1; i >= 0; i--) { const message = chatHistory[i]; // Skip non-user messages if (message.role !== 'user') { continue; } // Check if this message has image or file content if (messageHasImageOrFile(message)) { lastImageOrFileIndex = i; break; } } // If no message with image or file found, return original if (lastImageOrFileIndex === -1) { return chatHistory; } // Create a deep copy of the chat history const modifiedChatHistory = JSON.parse(JSON.stringify(chatHistory)); // Process earlier messages to remove image and file content for (let i = 0; i < lastImageOrFileIndex; i++) { const message = modifiedChatHistory[i]; // Only process user messages if (message.role !== 'user') { continue; } // Remove image and file content modifiedChatHistory[i] = removeImageAndFileFromMessage(message); } return modifiedChatHistory; } // Helper function to check if a message has image or file content function messageHasImageOrFile(message) { if (!message || !message.content) { return false; } // Handle array content if (Array.isArray(message.content)) { for (const content of message.content) { try { const contentObj = typeof content === 'string' ? JSON.parse(content) : content; if (contentObj.type === 'image_url' || contentObj.type === 'file') { return true; } } catch (e) { // Not JSON or couldn't be parsed, continue continue; } } } // Handle string content else if (typeof message.content === 'string') { try { const contentObj = JSON.parse(message.content); if (contentObj.type === 'image_url' || contentObj.type === 'file') { return true; } } catch (e) { // Not JSON or couldn't be parsed return false; } } // Handle object content else if (typeof message.content === 'object') { return message.content.type === 'image_url' || message.content.type === 'file'; } return false; } // Helper function to remove image and file content from a message function removeImageAndFileFromMessage(message) { if (!message || !message.content) { return message; } const modifiedMessage = { ...message }; // Handle array content if (Array.isArray(message.content)) { modifiedMessage.content = message.content.filter(content => { try { const contentObj = typeof content === 'string' ? JSON.parse(content) : content; // Keep content that's not image or file return !(contentObj.type === 'image_url' || contentObj.type === 'file'); } catch (e) { // Not JSON or couldn't be parsed, keep it return true; } }); // If all content was removed, add an empty string if (modifiedMessage.content.length === 0) { modifiedMessage.content = [""]; } } // Handle string content else if (typeof message.content === 'string') { try { const contentObj = JSON.parse(message.content); if (contentObj.type === 'image_url' || contentObj.type === 'file') { modifiedMessage.content = ""; } } catch (e) { // Not JSON or couldn't be parsed, keep original } } // Handle object content else if (typeof message.content === 'object') { if (message.content.type === 'image_url' || message.content.type === 'file') { modifiedMessage.content = ""; } } return modifiedMessage; } /** * Recursively sanitizes base64 data in objects/arrays to prevent logging large base64 strings * Replaces base64 data with a placeholder string */ function sanitizeBase64(obj) { if (obj === null || obj === undefined) { return obj; } // Handle strings - check for base64 data URLs or long base64 strings if (typeof obj === 'string') { // Check if it's a data URL with base64 if (obj.startsWith('data:') && obj.includes('base64,')) { return '* base64 data truncated for log *'; } // Check if it's a long base64 string (likely base64 if > 100 chars and matches base64 pattern) if (obj.length > 100 && /^[A-Za-z0-9+/=]+$/.test(obj) && obj.length % 4 === 0) { return '* base64 data truncated for log *'; } return obj; } // Handle arrays if (Array.isArray(obj)) { return obj.map(item => sanitizeBase64(item)); } // Handle objects if (typeof obj === 'object') { const sanitized = {}; for (const [key, value] of Object.entries(obj)) { // Special handling for known base64 fields if (key === 'data' && typeof value === 'string' && value.length > 50) { // Check if it looks like base64 if (/^[A-Za-z0-9+/=]+$/.test(value) && value.length % 4 === 0) { sanitized[key] = '* base64 data truncated for log *'; continue; } } // Handle image_url.url with base64 if (key === 'url' && typeof value === 'string' && value.startsWith('data:') && value.includes('base64,')) { sanitized[key] = '* base64 data truncated for log *'; continue; } // Handle source.data (Claude format) if (key === 'source' && typeof value === 'object' && value?.type === 'base64' && value?.data) { sanitized[key] = { ...value, data: '* base64 data truncated for log *' }; continue; } // Handle inlineData.data (Gemini format) if (key === 'inlineData' && typeof value === 'object' && value?.data) { sanitized[key] = { ...value, data: '* base64 data truncated for log *' }; continue; } // Recursively sanitize nested objects sanitized[key] = sanitizeBase64(value); } return sanitized; } return obj; } export { getUniqueId, getSearchResultId, extractCitationTitle, convertToSingleContentChatHistory, chatArgsHasImageUrl, chatArgsHasType, convertSrtToText, alignSubtitles, removeOldImageAndFileContent, sanitizeBase64 };