@sap-ai-sdk/langchain
Version:
SAP Cloud SDK for AI is the official Software Development Kit (SDK) for **SAP AI Core**, **SAP Generative AI Hub**, and **Orchestration Service**.
325 lines • 15.2 kB
JavaScript
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { JsonOutputKeyToolsParser } from '@langchain/core/output_parsers/openai_tools';
import { JsonOutputParser, StructuredOutputParser } from '@langchain/core/output_parsers';
import { RunnablePassthrough, RunnableSequence } from '@langchain/core/runnables';
import { toJsonSchema } from '@langchain/core/utils/json_schema';
import { getSchemaDescription, isInteropZodSchema } from '@langchain/core/utils/types';
import { OrchestrationClient as OrchestrationClientBase } from '@sap-ai-sdk/orchestration';
import { ChatGenerationChunk } from '@langchain/core/outputs';
import { isTemplateRef, mapLangChainMessagesToOrchestrationMessages, mapOutputToChatResult, mapToolToChatCompletionTool, mapOrchestrationChunkToLangChainMessageChunk } from './util.js';
function isInputFilteringError(error) {
return (error.cause?.status === 400 &&
error.cause?.response?.data?.location?.includes('Input Filter'));
}
/**
* The Orchestration client.
*/
export class OrchestrationClient extends BaseChatModel {
orchestrationConfig;
langchainOptions;
deploymentConfig;
destination;
streaming = false;
constructor(orchestrationConfig, langchainOptions = {}, deploymentConfig, destination) {
// Avoid retry if the error is due to input filtering
const { onFailedAttempt } = langchainOptions;
langchainOptions.onFailedAttempt = error => {
if (isInputFilteringError(error)) {
throw error;
}
onFailedAttempt?.(error);
};
super(langchainOptions);
this.orchestrationConfig = orchestrationConfig;
this.langchainOptions = langchainOptions;
this.deploymentConfig = deploymentConfig;
this.destination = destination;
// Initialize streaming flags with LangChain-compatible behavior:
// - `streaming`: true enables auto-streaming in `invoke()` calls
// - `disableStreaming`: true overrides streaming flag
// - `streaming`: `false` causes `disableStreaming` to be set to `true` for framework compatibility
this.disableStreaming = langchainOptions?.disableStreaming === true;
// If streaming is explicitly false, streaming is disabled
if (langchainOptions?.streaming === false) {
this.disableStreaming = true;
}
// Enable streaming only when `streaming` is `true` (default `false`) and `disableStreaming` is not `true` (default `undefined`).
this.streaming =
langchainOptions?.streaming === true && this.disableStreaming !== true;
}
_llmType() {
return 'orchestration';
}
/**
* Create a new runnable sequence that runs each individual runnable in series,
* piping the output of one runnable into another runnable or runnable-like.
* @param coerceable - A runnable, function, or object whose values are functions or runnables.
* @returns A new runnable sequence.
*/
pipe(coerceable) {
return super.pipe(coerceable);
}
async _generate(messages, options, runManager) {
options.signal?.throwIfAborted();
// Auto-streaming: transparently stream and concatenate when enabled
if (this.streaming) {
let generation;
const stream = this._streamResponseChunks(messages, options, runManager);
for await (const chunk of stream) {
generation =
generation === undefined ? chunk : generation.concat(chunk);
}
if (generation === undefined) {
throw new Error('No chunks were generated from the stream.');
}
return { generations: [generation] };
}
const { placeholderValues, customRequestConfig } = options;
const allMessages = mapLangChainMessagesToOrchestrationMessages(messages);
const mergedOrchestrationConfig = this.mergeOrchestrationConfigs(options);
const res = await this.caller.callWithOptions({
signal: options.signal
}, () => {
const orchestrationClient = new OrchestrationClientBase(mergedOrchestrationConfig, this.deploymentConfig, this.destination);
return orchestrationClient.chatCompletion({
messages: allMessages,
placeholderValues
}, {
...customRequestConfig,
signal: options.signal
});
});
const content = res.getContent();
await runManager?.handleLLMNewToken(typeof content === 'string' ? content : '');
return mapOutputToChatResult(res._data);
}
bindTools(tools, kwargs) {
let strict;
if (kwargs?.strict !== undefined) {
strict = kwargs.strict;
}
return this.withConfig({
tools: tools.map(tool => mapToolToChatCompletionTool(tool, strict)),
...kwargs
});
}
withStructuredOutput(outputSchema, config) {
// Extract config options
const method = (config?.method ?? 'jsonSchema');
const name = config?.name ?? 'extract';
const description = getSchemaDescription(outputSchema);
const strict = config?.strict;
const includeRaw = config?.includeRaw;
let llm;
let outputParser;
// Convert schema to JSON Schema format
const jsonSchema = toJsonSchema(outputSchema);
// Metadata for langsmith
const lsStructuredOutputFormat = {
ls_structured_output_format: {
kwargs: { method },
schema: jsonSchema
}
};
if (method === 'functionCalling') {
// functionCalling method: Provide tool for structured output construction.
outputParser = new JsonOutputKeyToolsParser({
returnSingle: true,
keyName: name,
...(isInteropZodSchema(outputSchema) && {
zodSchema: outputSchema
})
});
llm = this.withConfig({
// TODO: Set `tool_choice` if it becomes supported in Orchestration
tools: [
{
type: 'function',
function: {
name,
description,
parameters: jsonSchema,
...(strict !== undefined && { strict })
}
}
],
...lsStructuredOutputFormat
});
}
else if (method === 'jsonMode') {
// jsonMode method: Use orchestration's native JSON response format
if (strict !== undefined) {
throw new Error('The "strict" option is not supported with the "jsonMode" structured output method. Please use "jsonSchema" or "functionCalling" methods for strict output instead.');
}
outputParser = isInteropZodSchema(outputSchema)
? StructuredOutputParser.fromZodSchema(outputSchema)
: new JsonOutputParser();
llm = this.withConfig({
responseFormat: {
type: 'json_object'
},
...lsStructuredOutputFormat
});
}
else if (method === 'jsonSchema') {
// jsonSchema method: Use orchestration's native JSON Schema response format
outputParser = isInteropZodSchema(outputSchema)
? StructuredOutputParser.fromZodSchema(outputSchema)
: new JsonOutputParser();
llm = this.withConfig({
responseFormat: {
type: 'json_schema',
json_schema: {
name,
description,
schema: jsonSchema,
...(strict !== undefined && { strict })
}
},
...lsStructuredOutputFormat
});
}
else {
method;
throw new Error(`Unsupported structured output method: ${method}. Supported methods are 'jsonSchema', 'functionCalling', and 'jsonMode'.`);
}
if (!includeRaw) {
return llm.pipe(outputParser);
}
const parserAssign = RunnablePassthrough.assign({
parsed: (input, parserConfig) => outputParser.invoke(input.raw, parserConfig)
});
const parserNone = RunnablePassthrough.assign({
parsed: () => null
});
const parsedWithFallback = parserAssign.withFallbacks({
fallbacks: [parserNone]
});
return RunnableSequence.from([{ raw: llm }, parsedWithFallback]);
}
/**
* Stream response chunks from the Orchestration client.
* @param messages - The messages to send to the model.
* @param options - The call options.
* @param runManager - The callback manager for the run.
* @returns An async generator of chat generation chunks.
*/
async *_streamResponseChunks(messages, options, runManager) {
options.signal?.throwIfAborted();
const orchestrationMessages = mapLangChainMessagesToOrchestrationMessages(messages);
const { placeholderValues, customRequestConfig } = options;
const mergedOrchestrationConfig = this.mergeOrchestrationConfigs(options);
const orchestrationClient = new OrchestrationClientBase(mergedOrchestrationConfig, this.deploymentConfig, this.destination);
const response = await this.caller.callWithOptions({
signal: options.signal
}, () => orchestrationClient.stream({ messages: orchestrationMessages, placeholderValues }, options.signal, options.streamOptions, customRequestConfig));
for await (const chunk of response.stream) {
const orchestrationResult = chunk._data.final_result;
// There can be only none or one choice inside a chunk
const choice = orchestrationResult?.choices[0];
// Map the chunk to a LangChain message chunk
const messageChunk = mapOrchestrationChunkToLangChainMessageChunk(chunk);
// Create initial generation info with token indices
const newTokenIndices = {
prompt: options.promptIndex ?? 0,
completion: choice?.index ?? 0
};
const generationInfo = { ...newTokenIndices };
// Process finish reason
if (choice?.finish_reason && orchestrationResult) {
generationInfo.finish_reason = choice.finish_reason;
// Only include system fingerprint in the last chunk for now to avoid concatenation issues
generationInfo.system_fingerprint =
orchestrationResult.system_fingerprint;
generationInfo.model_name = orchestrationResult.model;
generationInfo.id = orchestrationResult.id;
generationInfo.created = orchestrationResult.created;
generationInfo.request_id = chunk._data.request_id;
}
// Process token usage
const tokenUsage = chunk.getTokenUsage();
if (tokenUsage) {
generationInfo.token_usage = tokenUsage;
messageChunk.usage_metadata = {
input_tokens: tokenUsage.prompt_tokens,
output_tokens: tokenUsage.completion_tokens,
total_tokens: tokenUsage.total_tokens
};
}
const content = chunk.getDeltaContent() ?? '';
const generationChunk = new ChatGenerationChunk({
message: messageChunk,
text: content,
generationInfo
});
// Notify the run manager about the new token
// Some parameters(`_runId`, `_parentRunId`, `_tags`) are set as undefined as they are implicitly read from the context.
await runManager?.handleLLMNewToken(content, newTokenIndices, undefined, undefined, undefined, { chunk: generationChunk });
yield generationChunk;
}
}
mergeOrchestrationConfig(orchestrationConfig, options) {
const { tools = [], stop = [], responseFormat } = options;
const config = {
...orchestrationConfig,
promptTemplating: {
...orchestrationConfig.promptTemplating,
model: {
...orchestrationConfig.promptTemplating.model,
params: {
...orchestrationConfig.promptTemplating.model.params,
...(stop.length && {
stop: [
...(orchestrationConfig.promptTemplating.model.params?.stop ||
[]),
...stop
]
})
}
}
}
};
if (tools.length) {
if (!config.promptTemplating.prompt) {
config.promptTemplating.prompt = {};
}
if (typeof config.promptTemplating.prompt === 'object' &&
!isTemplateRef(config.promptTemplating.prompt)) {
config.promptTemplating.prompt.tools = [
// Preserve existing tools configured in the templating module
...(config.promptTemplating.prompt.tools || []),
// Add new tools set with LangChain `bindTools()` or `invoke()` methods
...tools.map(t => mapToolToChatCompletionTool(t))
];
}
}
// Handle responseFormat for structured output
if (responseFormat) {
// Ensure prompt object exists
if (!config.promptTemplating.prompt) {
config.promptTemplating.prompt = {};
}
// Check if prompt is a TemplateRef
if (typeof config.promptTemplating.prompt === 'object' &&
isTemplateRef(config.promptTemplating.prompt)) {
throw new Error('Cannot use withStructuredOutput with TemplateRef. ' +
'Structured output requires inline template definition to set responseFormat.');
}
// Add responseFormat to prompt
if (typeof config.promptTemplating.prompt === 'object') {
config.promptTemplating.prompt.response_format = responseFormat;
}
}
return config;
}
mergeOrchestrationConfigs(options) {
if (!Array.isArray(this.orchestrationConfig)) {
return this.mergeOrchestrationConfig(this.orchestrationConfig, options);
}
if (!this.orchestrationConfig.length) {
throw new Error('Orchestration config list must not be empty for module fallback.');
}
return this.orchestrationConfig.map(config => this.mergeOrchestrationConfig(config, options));
}
}
//# sourceMappingURL=client.js.map