UNPKG

@aj-archipelago/cortex

Version:

Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.

736 lines (629 loc) 30.6 kB
import { ModelExecutor } from './modelExecutor.js'; import { modelEndpoints } from '../lib/requestExecutor.js'; import { v4 as uuidv4 } from 'uuid'; import { encode } from '../lib/encodeCache.js'; import { getFirstNToken, getLastNToken, getSemanticChunks } from './chunker.js'; import { PathwayResponseParser } from './pathwayResponseParser.js'; import { Prompt } from './prompt.js'; import { getv, setv } from '../lib/keyValueStorageClient.js'; import { getvWithDoubleDecryption, setvWithDoubleEncryption } from '../lib/keyValueStorageClient.js'; import { requestState } from './requestState.js'; import { callPathway, addCitationsToResolver } from '../lib/pathwayTools.js'; import logger from '../lib/logger.js'; import { publishRequestProgress } from '../lib/redisSubscription.js'; // eslint-disable-next-line import/no-extraneous-dependencies import { createParser } from 'eventsource-parser'; import CortexResponse from '../lib/cortexResponse.js'; const modelTypesExcludedFromProgressUpdates = ['OPENAI-DALLE2', 'OPENAI-DALLE3']; class PathwayResolver { // Optional endpoints override parameter is for testing purposes constructor({ config, pathway, args, endpoints }) { this.endpoints = endpoints || modelEndpoints; this.config = config; this.pathway = pathway; this.args = args; this.useInputChunking = pathway.useInputChunking; this.chunkMaxTokenLength = 0; this.warnings = []; this.errors = []; this.requestId = uuidv4(); this.rootRequestId = null; this.responseParser = new PathwayResponseParser(pathway); this.pathwayResultData = {}; this.modelName = [ pathway.model, args?.model, pathway.inputParameters?.model, config.get('defaultModelName') ].find(modelName => modelName && Object.prototype.hasOwnProperty.call(this.endpoints, modelName)); this.model = this.endpoints[this.modelName]; if (!this.model) { throw new Error(`Model ${this.modelName} not found in config`); } const specifiedModelName = pathway.model || args?.model || pathway.inputParameters?.model; if (this.modelName !== (specifiedModelName)) { if (specifiedModelName) { this.logWarning(`Specified model ${specifiedModelName} not found in config, using ${this.modelName} instead.`); } else { this.logWarning(`No model specified in the pathway, using ${this.modelName}.`); } } this.previousResult = ''; this.prompts = []; this.modelExecutor = new ModelExecutor(this.pathway, this.model); Object.defineProperty(this, 'pathwayPrompt', { get() { return this.prompts }, set(value) { if (!Array.isArray(value)) { value = [value]; } this.prompts = value.map(p => (p instanceof Prompt) ? p : new Prompt({ prompt:p })); this.chunkMaxTokenLength = this.getChunkMaxTokenLength(); } }); // set up initial prompt this.pathwayPrompt = pathway.prompt; } // Legacy 'tool' property is now stored in pathwayResultData get tool() { // Select fields to serialize for legacy compat, excluding undefined values const legacyFields = Object.fromEntries( Object.entries({ hideFromModel: this.pathwayResultData.hideFromModel, toolCallbackName: this.pathwayResultData.toolCallbackName, title: this.pathwayResultData.title, search: this.pathwayResultData.search, coding: this.pathwayResultData.coding, codeRequestId: this.pathwayResultData.codeRequestId, toolCallbackId: this.pathwayResultData.toolCallbackId, toolUsed: this.pathwayResultData.toolUsed, citations: this.pathwayResultData.citations, }).filter(([_, value]) => value !== undefined) ); return JSON.stringify(legacyFields); } set tool(value) { // Accepts a JSON string, parses, merges into pathwayResultData let parsed; try { parsed = (typeof value === 'string') ? JSON.parse(value) : value; this.pathwayResultData = this.mergeResultData(parsed); } catch (e) { // Optionally warn: invalid format or merge error console.warn('Invalid tool property assignment:', e); } } publishNestedRequestProgress(requestProgress) { if (this.rootRequestId) { // if this is a nested request, don't end the stream if (requestProgress.progress === 1) { delete requestProgress.progress; } publishRequestProgress(requestProgress); } else { // this is a root request, so we add the pathwayResultData to the info // and allow the end stream message to be sent if (requestProgress.progress === 1) { const infoObject = { ...this.pathwayResultData || {} }; requestProgress.info = JSON.stringify(infoObject); requestProgress.error = this.errors.join(', ') || ''; } publishRequestProgress(requestProgress); } } // This code handles async and streaming responses for either long-running // tasks or streaming model responses async asyncResolve(args) { let responseData = null; try { responseData = await this.executePathway(args); } catch (error) { this.errors.push(error.message || error.toString()); publishRequestProgress({ requestId: this.rootRequestId || this.requestId, progress: 1, data: '', info: '', error: this.errors.join(', ') }); return; } if (!responseData) { publishRequestProgress({ requestId: this.rootRequestId || this.requestId, progress: 1, data: '', info: '', error: this.errors.join(', ') }); return; } // Handle CortexResponse objects - merge them into pathwayResultData if (responseData && typeof responseData === 'object' && responseData.constructor && responseData.constructor.name === 'CortexResponse') { this.pathwayResultData = this.mergeResultData(responseData); } // If the response is a stream, handle it as streaming response if (responseData && typeof responseData.on === 'function') { await this.handleStream(responseData); } else { const { completedCount = 1, totalCount = 1 } = requestState[this.requestId]; requestState[this.requestId].data = responseData; // some models don't support progress updates if (!modelTypesExcludedFromProgressUpdates.includes(this.model.type)) { const infoObject = { ...this.pathwayResultData || {} }; this.publishNestedRequestProgress({ requestId: this.rootRequestId || this.requestId, progress: Math.min(completedCount, totalCount) / totalCount, // Clients expect these to be strings data: JSON.stringify(responseData || ''), info: JSON.stringify(infoObject) || '', error: this.errors.join(', ') || '' }); } } } mergeResolver(otherResolver) { if (otherResolver) { this.previousResult = otherResolver.previousResult ? otherResolver.previousResult : this.previousResult; this.warnings = [...this.warnings, ...otherResolver.warnings]; this.errors = [...this.errors, ...otherResolver.errors]; // Use the shared mergeResultData method this.pathwayResultData = this.mergeResultData(otherResolver.pathwayResultData); } } // Merge pathwayResultData with either another pathwayResultData object or a CortexResponse mergeResultData(newData) { if (!newData) return this.pathwayResultData; const currentData = this.pathwayResultData || {}; // Handle CortexResponse objects if (newData.constructor && newData.constructor.name === 'CortexResponse') { const cortexResponse = newData; const cortexData = { citations: cortexResponse.citations, toolCalls: cortexResponse.toolCalls, functionCall: cortexResponse.functionCall, usage: cortexResponse.usage, finishReason: cortexResponse.finishReason, artifacts: cortexResponse.artifacts }; newData = cortexData; } // Create merged result const merged = { ...currentData, ...newData }; // Handle array fields that should be concatenated const arrayFields = ['citations', 'toolCalls', 'artifacts']; for (const field of arrayFields) { const currentArray = currentData[field] || []; const newArray = newData[field] || []; if (newArray.length > 0) { merged[field] = [...currentArray, ...newArray]; } else if (currentArray.length > 0) { merged[field] = currentArray; } } // Handle usage and toolUsed data - convert to arrays with most recent first const createArrayFromData = (currentValue, newValue) => { if (!currentValue && !newValue) return null; const array = []; // Add new value first (most recent) if (newValue) { if (Array.isArray(newValue)) { array.push(...newValue); } else { array.push(newValue); } } // Add current value second (older) if (currentValue) { if (Array.isArray(currentValue)) { array.push(...currentValue); } else { array.push(currentValue); } } return array; }; const usageArray = createArrayFromData(currentData.usage, newData.usage); if (usageArray) { merged.usage = usageArray; } const toolUsedArray = createArrayFromData(currentData.toolUsed, newData.toolUsed); if (toolUsedArray) { merged.toolUsed = toolUsedArray; } return merged; } async handleStream(response) { let streamErrorOccurred = false; if (response && typeof response.on === 'function') { try { const incomingMessage = response; let streamEnded = false; const onParse = (event) => { let requestProgress = { requestId: this.rootRequestId || this.requestId }; logger.debug(`Received event: ${event.type}`); if (event.type === 'event') { logger.debug('Received event!') logger.debug(`id: ${event.id || '<none>'}`) logger.debug(`name: ${event.name || '<none>'}`) logger.debug(`data: ${event.data}`) } else if (event.type === 'reconnect-interval') { logger.debug(`We should set reconnect interval to ${event.value} milliseconds`) } try { requestProgress = this.modelExecutor.plugin.processStreamEvent(event, requestProgress); } catch (error) { streamErrorOccurred = true; logger.error(`Stream error: ${error instanceof Error ? error.stack || error.message : JSON.stringify(error)}`); incomingMessage.off('data', processStream); return; } try { if (!streamEnded && requestProgress.data) { this.publishNestedRequestProgress(requestProgress); streamEnded = requestProgress.progress === 1; } } catch (error) { logger.error(`Could not publish the stream message: "${event.data}", ${error instanceof Error ? error.stack || error.message : JSON.stringify(error)}`); } } const sseParser = createParser(onParse); const processStream = (data) => { //logger.warn(`RECEIVED DATA: ${JSON.stringify(data.toString())}`); sseParser.feed(data.toString()); } if (incomingMessage) { await new Promise((resolve, reject) => { incomingMessage.on('data', processStream); incomingMessage.on('end', resolve); incomingMessage.on('error', reject); }); } } catch (error) { logger.error(`Could not subscribe to stream: ${error instanceof Error ? error.stack || error.message : JSON.stringify(error)}`); } if (streamErrorOccurred) { logger.error(`Stream read failed. Finishing stream...`); publishRequestProgress({ requestId: this.requestId, progress: 1, data: '', info: '', error: 'Stream read failed' }); } else { return; } } } async resolve(args) { // Either we're dealing with an async request, stream, or regular request if (args.async || args.stream) { if (!requestState[this.requestId]) { requestState[this.requestId] = {} } this.rootRequestId = args.rootRequestId ?? null; requestState[this.requestId] = { ...requestState[this.requestId], args, resolver: this.asyncResolve.bind(this), pathwayResolver: this }; return this.requestId; } else { // Syncronously process the request return await this.executePathway(args); } } async executePathway(args) { // Bidirectional context transformation for backward compatibility: // 1. If agentContext provided: extract contextId/contextKey for legacy pathways // 2. If contextId provided without agentContext: create agentContext for new pathways if (args.agentContext && Array.isArray(args.agentContext) && args.agentContext.length > 0) { const defaultCtx = args.agentContext.find(ctx => ctx.default) || args.agentContext[0]; if (defaultCtx) { args.contextId = defaultCtx.contextId; args.contextKey = defaultCtx.contextKey || null; } } else if (args.contextId && !args.agentContext) { // Backward compat: create agentContext from legacy contextId/contextKey args.agentContext = [{ contextId: args.contextId, contextKey: args.contextKey || null, default: true }]; } if (this.pathway.executePathway && typeof this.pathway.executePathway === 'function') { return await this.pathway.executePathway({ args, runAllPrompts: this.promptAndParse.bind(this), resolver: this }); } else { return await this.promptAndParse(args); } } async promptAndParse(args) { // Check if model is specified in args and swap if different from current model if (args.modelOverride && args.modelOverride !== this.modelName) { try { this.swapModel(args.modelOverride); } catch (error) { this.logError(`Failed to swap model to ${args.modelOverride}: ${error.message}`); } } // Get saved context from contextId or change contextId if needed const { contextId, useMemory } = args; this.savedContextId = contextId ? contextId : uuidv4(); // Check if memory is enabled (default true for backward compatibility) const memoryEnabled = useMemory !== false; const loadMemory = async () => { try { // Always load savedContext (legacy feature) this.savedContext = (getvWithDoubleDecryption && await getvWithDoubleDecryption(this.savedContextId, this.args?.contextKey)) || {}; this.initialState = { savedContext: this.savedContext }; // Only load memory* sections if memory is enabled if (memoryEnabled) { const [memorySelf, memoryDirectives, memoryTopics, memoryUser, memoryContext] = await Promise.all([ callPathway('sys_read_memory', { contextId: this.savedContextId, section: 'memorySelf', priority: 1, stripMetadata: true, contextKey: this.args?.contextKey }), callPathway('sys_read_memory', { contextId: this.savedContextId, section: 'memoryDirectives', priority: 1, stripMetadata: true, contextKey: this.args?.contextKey }), callPathway('sys_read_memory', { contextId: this.savedContextId, section: 'memoryTopics', priority: 0, numResults: 10, contextKey: this.args?.contextKey }), callPathway('sys_read_memory', { contextId: this.savedContextId, section: 'memoryUser', priority: 1, stripMetadata: true, contextKey: this.args?.contextKey }), callPathway('sys_read_memory', { contextId: this.savedContextId, section: 'memoryContext', priority: 0, contextKey: this.args?.contextKey }), ]).catch(error => { this.logError(`Failed to load memory: ${error.message}`); return ['','','','','']; }); this.memorySelf = memorySelf || ''; this.memoryDirectives = memoryDirectives || ''; this.memoryTopics = memoryTopics || ''; this.memoryUser = memoryUser || ''; this.memoryContext = memoryContext || ''; } else { this.memorySelf = ''; this.memoryDirectives = ''; this.memoryTopics = ''; this.memoryUser = ''; this.memoryContext = ''; } } catch (error) { this.logError(`Error in loadMemory: ${error.message}`); this.savedContext = {}; this.memorySelf = ''; this.memoryDirectives = ''; this.memoryTopics = ''; this.memoryUser = ''; this.memoryContext = ''; this.initialState = { savedContext: {} }; } }; const saveChangedMemory = async () => { // Always save savedContext (legacy feature, not governed by useMemory) this.savedContextId = this.savedContextId || uuidv4(); const currentState = { savedContext: this.savedContext, }; if (currentState.savedContext !== this.initialState.savedContext) { setvWithDoubleEncryption && await setvWithDoubleEncryption(this.savedContextId, this.savedContext, this.args?.contextKey); } }; const MAX_RETRIES = 3; let data = null; for (let retries = 0; retries < MAX_RETRIES; retries++) { await loadMemory(); // Reset memory state on each retry data = await this.processRequest(args); if (!data) { break; } // if data is a stream, handle it if (data && typeof data.on === 'function') { await this.handleStream(data); return data; } data = await this.responseParser.parse(data); if (data !== null) { break; } logger.warn(`Bad pathway result - retrying pathway. Attempt ${retries + 1} of ${MAX_RETRIES}`); } if (data !== null) { await saveChangedMemory(); } addCitationsToResolver(this, data); return data; } // Add a warning and log it logWarning(warning) { this.warnings.push(warning); logger.warn(warning); } // Add an error and log it logError(error) { this.errors.push(error); logger.error(error); } // Here we choose how to handle long input - either summarize or chunk processInputText(text) { let chunkTokenLength = 0; if (this.pathway.inputChunkSize) { chunkTokenLength = this.pathway.inputChunkSize; } else { chunkTokenLength = this.chunkMaxTokenLength; } const encoded = text ? encode(text) : []; if (!this.useInputChunking) { // no chunking, return as is if (encoded.length > 0 && encoded.length >= chunkTokenLength) { const warnText = `Truncating long input text. Text length: ${text.length}`; this.logWarning(warnText); text = this.truncate(text, chunkTokenLength); } return [text]; } // chunk the text and return the chunks with newline separators return getSemanticChunks(text, chunkTokenLength, this.pathway.inputFormat); } truncate(str, n) { if (this.modelExecutor.plugin.promptParameters.truncateFromFront) { return getFirstNToken(str, n); } return getLastNToken(str, n); } async summarizeIfEnabled({ text, ...parameters }) { if (this.pathway.useInputSummarization) { return await callPathway('summary', { ...this.args, ...parameters, targetLength: 0}); } return text; } // Calculate the maximum token length for a chunk getChunkMaxTokenLength() { // Skip expensive calculations if not using input chunking if (!this.useInputChunking) { return this.modelExecutor.plugin.getModelMaxPromptTokens(); } // find the longest prompt const maxPromptTokenLength = Math.max(...this.prompts.map((promptData) => this.modelExecutor.plugin.getCompiledPrompt('', this.args, promptData).tokenLength)); // find out if any prompts use both text input and previous result const hasBothProperties = this.prompts.some(prompt => prompt.usesTextInput && prompt.usesPreviousResult); let chunkMaxTokenLength = this.modelExecutor.plugin.getModelMaxPromptTokens() - maxPromptTokenLength - 1; // if we have to deal with prompts that have both text input // and previous result, we need to split the maxChunkToken in half chunkMaxTokenLength = hasBothProperties ? chunkMaxTokenLength / 2 : chunkMaxTokenLength; return chunkMaxTokenLength; } // Process the request and return the result async processRequest({ text, ...parameters }) { text = await this.summarizeIfEnabled({ text, ...parameters }); // summarize if flag enabled const chunks = text && this.processInputText(text) || [text]; let anticipatedRequestCount = chunks.length * this.prompts.length if ((requestState[this.requestId] || {}).canceled) { throw new Error('Request canceled'); } // Store the request state requestState[this.requestId] = { ...requestState[this.requestId], totalCount: anticipatedRequestCount, completedCount: 0 }; if (chunks.length > 1) { // stream behaves as async if there are multiple chunks if (parameters.stream) { parameters.async = true; parameters.stream = false; } } // If pre information is needed, apply current prompt with previous prompt info, only parallelize current call if (this.pathway.useParallelChunkProcessing) { // Apply each prompt across all chunks in parallel // this.previousResult is not available at the object level as it is different for each chunk this.previousResult = ''; const data = await Promise.all(chunks.map(chunk => this.applyPromptsSerially(chunk, parameters))); // Join the chunks with newlines return data.join(this.pathway.joinChunksWith || "\n\n"); } else { // Apply prompts one by one, serially, across all chunks // This is the default processing mode and will make previousResult available at the object level let previousResult = ''; let result = ''; for (let i = 0; i < this.prompts.length; i++) { const currentParameters = { ...parameters, previousResult }; if (currentParameters.stream) { // stream special flow if (i < this.prompts.length - 1) { currentParameters.stream = false; // if not the last prompt then don't stream } else { // use the stream parameter if not async currentParameters.stream = currentParameters.async ? false : currentParameters.stream; } } // If the prompt doesn't contain {{text}} then we can skip the chunking, and also give that token space to the previous result if (!this.prompts[i].usesTextInput) { // Limit context to it's N + text's characters if (previousResult) { previousResult = this.truncate(previousResult, 2 * this.chunkMaxTokenLength); } result = await this.applyPrompt(this.prompts[i], text, currentParameters); } else { // Limit context to N characters if (previousResult) { previousResult = this.truncate(previousResult, this.chunkMaxTokenLength); } result = await Promise.all(chunks.map(chunk => this.applyPrompt(this.prompts[i], chunk, currentParameters))); if (result.length === 1) { result = result[0]; } else if (!currentParameters.stream) { result = result.join(this.pathway.joinChunksWith || "\n\n"); } } // If this is any prompt other than the last, use the result as the previous context if (i < this.prompts.length - 1) { previousResult = result; if (result instanceof CortexResponse) { previousResult = result.output_text; } } } // store the previous result in the PathwayResolver this.previousResult = previousResult; return result; } } async applyPromptsSerially(text, parameters) { let previousResult = ''; let result = ''; for (const prompt of this.prompts) { previousResult = result; result = await this.applyPrompt(prompt, text, { ...parameters, previousResult }); } return result; } /** * Swaps the model used by this PathwayResolver * @param {string} newModelName - The name of the new model to use * @throws {Error} If the new model is not found in the endpoints */ swapModel(newModelName) { // Validate that the new model exists in endpoints if (!this.endpoints[newModelName]) { throw new Error(`Model ${newModelName} not found in config`); } // Update model references this.modelName = newModelName; this.model = this.endpoints[newModelName]; // Create new ModelExecutor with the new model this.modelExecutor = new ModelExecutor(this.pathway, this.model); // Recalculate chunk max token length as it depends on the model this.chunkMaxTokenLength = this.getChunkMaxTokenLength(); this.logWarning(`Model swapped to ${newModelName}`); } async applyPrompt(prompt, text, parameters) { if (requestState[this.requestId].canceled) { return; } let result = ''; result = await this.modelExecutor.execute(text, { ...parameters, ...this.savedContext, memorySelf: this.memorySelf, memoryDirectives: this.memoryDirectives, memoryTopics: this.memoryTopics, memoryUser: this.memoryUser, memoryContext: this.memoryContext }, prompt, this); requestState[this.requestId].completedCount++; if (parameters.async) { const { completedCount, totalCount } = requestState[this.requestId]; if (completedCount < totalCount) { await publishRequestProgress({ requestId: this.requestId, progress: completedCount / totalCount, }); } } // save the result to the context if requested and no errors if (prompt.saveResultTo && this.errors.length === 0) { // Update memory property if it matches a known type if (["memorySelf", "memoryUser", "memoryDirectives", "memoryTopics"].includes(prompt.saveResultTo)) { this[prompt.saveResultTo] = result; } this.savedContext[prompt.saveResultTo] = result; } return result; } } export { PathwayResolver };