UNPKG

@sap-ai-sdk/langchain

Version:

SAP Cloud SDK for AI is the official Software Development Kit (SDK) for **SAP AI Core**, **SAP Generative AI Hub**, and **Orchestration Service**.

325 lines • 15.2 kB
import { BaseChatModel } from '@langchain/core/language_models/chat_models'; import { JsonOutputKeyToolsParser } from '@langchain/core/output_parsers/openai_tools'; import { JsonOutputParser, StructuredOutputParser } from '@langchain/core/output_parsers'; import { RunnablePassthrough, RunnableSequence } from '@langchain/core/runnables'; import { toJsonSchema } from '@langchain/core/utils/json_schema'; import { getSchemaDescription, isInteropZodSchema } from '@langchain/core/utils/types'; import { OrchestrationClient as OrchestrationClientBase } from '@sap-ai-sdk/orchestration'; import { ChatGenerationChunk } from '@langchain/core/outputs'; import { isTemplateRef, mapLangChainMessagesToOrchestrationMessages, mapOutputToChatResult, mapToolToChatCompletionTool, mapOrchestrationChunkToLangChainMessageChunk } from './util.js'; function isInputFilteringError(error) { return (error.cause?.status === 400 && error.cause?.response?.data?.location?.includes('Input Filter')); } /** * The Orchestration client. */ export class OrchestrationClient extends BaseChatModel { orchestrationConfig; langchainOptions; deploymentConfig; destination; streaming = false; constructor(orchestrationConfig, langchainOptions = {}, deploymentConfig, destination) { // Avoid retry if the error is due to input filtering const { onFailedAttempt } = langchainOptions; langchainOptions.onFailedAttempt = error => { if (isInputFilteringError(error)) { throw error; } onFailedAttempt?.(error); }; super(langchainOptions); this.orchestrationConfig = orchestrationConfig; this.langchainOptions = langchainOptions; this.deploymentConfig = deploymentConfig; this.destination = destination; // Initialize streaming flags with LangChain-compatible behavior: // - `streaming`: true enables auto-streaming in `invoke()` calls // - `disableStreaming`: true overrides streaming flag // - `streaming`: `false` causes `disableStreaming` to be set to `true` for framework compatibility this.disableStreaming = langchainOptions?.disableStreaming === true; // If streaming is explicitly false, streaming is disabled if (langchainOptions?.streaming === false) { this.disableStreaming = true; } // Enable streaming only when `streaming` is `true` (default `false`) and `disableStreaming` is not `true` (default `undefined`). this.streaming = langchainOptions?.streaming === true && this.disableStreaming !== true; } _llmType() { return 'orchestration'; } /** * Create a new runnable sequence that runs each individual runnable in series, * piping the output of one runnable into another runnable or runnable-like. * @param coerceable - A runnable, function, or object whose values are functions or runnables. * @returns A new runnable sequence. */ pipe(coerceable) { return super.pipe(coerceable); } async _generate(messages, options, runManager) { options.signal?.throwIfAborted(); // Auto-streaming: transparently stream and concatenate when enabled if (this.streaming) { let generation; const stream = this._streamResponseChunks(messages, options, runManager); for await (const chunk of stream) { generation = generation === undefined ? chunk : generation.concat(chunk); } if (generation === undefined) { throw new Error('No chunks were generated from the stream.'); } return { generations: [generation] }; } const { placeholderValues, customRequestConfig } = options; const allMessages = mapLangChainMessagesToOrchestrationMessages(messages); const mergedOrchestrationConfig = this.mergeOrchestrationConfigs(options); const res = await this.caller.callWithOptions({ signal: options.signal }, () => { const orchestrationClient = new OrchestrationClientBase(mergedOrchestrationConfig, this.deploymentConfig, this.destination); return orchestrationClient.chatCompletion({ messages: allMessages, placeholderValues }, { ...customRequestConfig, signal: options.signal }); }); const content = res.getContent(); await runManager?.handleLLMNewToken(typeof content === 'string' ? content : ''); return mapOutputToChatResult(res._data); } bindTools(tools, kwargs) { let strict; if (kwargs?.strict !== undefined) { strict = kwargs.strict; } return this.withConfig({ tools: tools.map(tool => mapToolToChatCompletionTool(tool, strict)), ...kwargs }); } withStructuredOutput(outputSchema, config) { // Extract config options const method = (config?.method ?? 'jsonSchema'); const name = config?.name ?? 'extract'; const description = getSchemaDescription(outputSchema); const strict = config?.strict; const includeRaw = config?.includeRaw; let llm; let outputParser; // Convert schema to JSON Schema format const jsonSchema = toJsonSchema(outputSchema); // Metadata for langsmith const lsStructuredOutputFormat = { ls_structured_output_format: { kwargs: { method }, schema: jsonSchema } }; if (method === 'functionCalling') { // functionCalling method: Provide tool for structured output construction. outputParser = new JsonOutputKeyToolsParser({ returnSingle: true, keyName: name, ...(isInteropZodSchema(outputSchema) && { zodSchema: outputSchema }) }); llm = this.withConfig({ // TODO: Set `tool_choice` if it becomes supported in Orchestration tools: [ { type: 'function', function: { name, description, parameters: jsonSchema, ...(strict !== undefined && { strict }) } } ], ...lsStructuredOutputFormat }); } else if (method === 'jsonMode') { // jsonMode method: Use orchestration's native JSON response format if (strict !== undefined) { throw new Error('The "strict" option is not supported with the "jsonMode" structured output method. Please use "jsonSchema" or "functionCalling" methods for strict output instead.'); } outputParser = isInteropZodSchema(outputSchema) ? StructuredOutputParser.fromZodSchema(outputSchema) : new JsonOutputParser(); llm = this.withConfig({ responseFormat: { type: 'json_object' }, ...lsStructuredOutputFormat }); } else if (method === 'jsonSchema') { // jsonSchema method: Use orchestration's native JSON Schema response format outputParser = isInteropZodSchema(outputSchema) ? StructuredOutputParser.fromZodSchema(outputSchema) : new JsonOutputParser(); llm = this.withConfig({ responseFormat: { type: 'json_schema', json_schema: { name, description, schema: jsonSchema, ...(strict !== undefined && { strict }) } }, ...lsStructuredOutputFormat }); } else { method; throw new Error(`Unsupported structured output method: ${method}. Supported methods are 'jsonSchema', 'functionCalling', and 'jsonMode'.`); } if (!includeRaw) { return llm.pipe(outputParser); } const parserAssign = RunnablePassthrough.assign({ parsed: (input, parserConfig) => outputParser.invoke(input.raw, parserConfig) }); const parserNone = RunnablePassthrough.assign({ parsed: () => null }); const parsedWithFallback = parserAssign.withFallbacks({ fallbacks: [parserNone] }); return RunnableSequence.from([{ raw: llm }, parsedWithFallback]); } /** * Stream response chunks from the Orchestration client. * @param messages - The messages to send to the model. * @param options - The call options. * @param runManager - The callback manager for the run. * @returns An async generator of chat generation chunks. */ async *_streamResponseChunks(messages, options, runManager) { options.signal?.throwIfAborted(); const orchestrationMessages = mapLangChainMessagesToOrchestrationMessages(messages); const { placeholderValues, customRequestConfig } = options; const mergedOrchestrationConfig = this.mergeOrchestrationConfigs(options); const orchestrationClient = new OrchestrationClientBase(mergedOrchestrationConfig, this.deploymentConfig, this.destination); const response = await this.caller.callWithOptions({ signal: options.signal }, () => orchestrationClient.stream({ messages: orchestrationMessages, placeholderValues }, options.signal, options.streamOptions, customRequestConfig)); for await (const chunk of response.stream) { const orchestrationResult = chunk._data.final_result; // There can be only none or one choice inside a chunk const choice = orchestrationResult?.choices[0]; // Map the chunk to a LangChain message chunk const messageChunk = mapOrchestrationChunkToLangChainMessageChunk(chunk); // Create initial generation info with token indices const newTokenIndices = { prompt: options.promptIndex ?? 0, completion: choice?.index ?? 0 }; const generationInfo = { ...newTokenIndices }; // Process finish reason if (choice?.finish_reason && orchestrationResult) { generationInfo.finish_reason = choice.finish_reason; // Only include system fingerprint in the last chunk for now to avoid concatenation issues generationInfo.system_fingerprint = orchestrationResult.system_fingerprint; generationInfo.model_name = orchestrationResult.model; generationInfo.id = orchestrationResult.id; generationInfo.created = orchestrationResult.created; generationInfo.request_id = chunk._data.request_id; } // Process token usage const tokenUsage = chunk.getTokenUsage(); if (tokenUsage) { generationInfo.token_usage = tokenUsage; messageChunk.usage_metadata = { input_tokens: tokenUsage.prompt_tokens, output_tokens: tokenUsage.completion_tokens, total_tokens: tokenUsage.total_tokens }; } const content = chunk.getDeltaContent() ?? ''; const generationChunk = new ChatGenerationChunk({ message: messageChunk, text: content, generationInfo }); // Notify the run manager about the new token // Some parameters(`_runId`, `_parentRunId`, `_tags`) are set as undefined as they are implicitly read from the context. await runManager?.handleLLMNewToken(content, newTokenIndices, undefined, undefined, undefined, { chunk: generationChunk }); yield generationChunk; } } mergeOrchestrationConfig(orchestrationConfig, options) { const { tools = [], stop = [], responseFormat } = options; const config = { ...orchestrationConfig, promptTemplating: { ...orchestrationConfig.promptTemplating, model: { ...orchestrationConfig.promptTemplating.model, params: { ...orchestrationConfig.promptTemplating.model.params, ...(stop.length && { stop: [ ...(orchestrationConfig.promptTemplating.model.params?.stop || []), ...stop ] }) } } } }; if (tools.length) { if (!config.promptTemplating.prompt) { config.promptTemplating.prompt = {}; } if (typeof config.promptTemplating.prompt === 'object' && !isTemplateRef(config.promptTemplating.prompt)) { config.promptTemplating.prompt.tools = [ // Preserve existing tools configured in the templating module ...(config.promptTemplating.prompt.tools || []), // Add new tools set with LangChain `bindTools()` or `invoke()` methods ...tools.map(t => mapToolToChatCompletionTool(t)) ]; } } // Handle responseFormat for structured output if (responseFormat) { // Ensure prompt object exists if (!config.promptTemplating.prompt) { config.promptTemplating.prompt = {}; } // Check if prompt is a TemplateRef if (typeof config.promptTemplating.prompt === 'object' && isTemplateRef(config.promptTemplating.prompt)) { throw new Error('Cannot use withStructuredOutput with TemplateRef. ' + 'Structured output requires inline template definition to set responseFormat.'); } // Add responseFormat to prompt if (typeof config.promptTemplating.prompt === 'object') { config.promptTemplating.prompt.response_format = responseFormat; } } return config; } mergeOrchestrationConfigs(options) { if (!Array.isArray(this.orchestrationConfig)) { return this.mergeOrchestrationConfig(this.orchestrationConfig, options); } if (!this.orchestrationConfig.length) { throw new Error('Orchestration config list must not be empty for module fallback.'); } return this.orchestrationConfig.map(config => this.mergeOrchestrationConfig(config, options)); } } //# sourceMappingURL=client.js.map