UNPKG

langchain

Version:
130 lines (128 loc) 5.12 kB
const require_rolldown_runtime = require('../../_virtual/rolldown_runtime.cjs'); const require_middleware = require('../middleware.cjs'); const __langchain_core_messages = require_rolldown_runtime.__toESM(require("@langchain/core/messages")); const zod_v3 = require_rolldown_runtime.__toESM(require("zod/v3")); //#region src/agents/middleware/callLimit.ts const DEFAULT_EXIT_BEHAVIOR = "end"; const contextSchema = zod_v3.z.object({ threadLimit: zod_v3.z.number().optional(), runLimit: zod_v3.z.number().optional(), exitBehavior: zod_v3.z.enum(["throw", "end"]).optional() }); /** * Error thrown when the model call limit is exceeded. * * @param threadLimit - The maximum number of model calls allowed per thread. * @param runLimit - The maximum number of model calls allowed per run. * @param threadCount - The number of model calls made at the thread level. * @param runCount - The number of model calls made at the run level. */ var ModelCallLimitMiddlewareError = class extends Error { constructor({ threadLimit, runLimit, threadCount, runCount }) { const exceededHint = []; if (typeof threadLimit === "number" && typeof threadCount === "number") exceededHint.push(`thread level call limit reached with ${threadCount} model calls`); if (typeof runLimit === "number" && typeof runCount === "number") exceededHint.push(`run level call limit reached with ${runCount} model calls`); super(`Model call limits exceeded${exceededHint.length > 0 ? `: ${exceededHint.join(", ")}` : ""}`); this.name = "ModelCallLimitMiddlewareError"; } }; /** * Creates a middleware to limit the number of model calls at both thread and run levels. * * This middleware helps prevent excessive model API calls by enforcing limits on how many * times the model can be invoked. It supports two types of limits: * * - **Thread-level limit**: Restricts the total number of model calls across an entire conversation thread * - **Run-level limit**: Restricts the number of model calls within a single agent run/invocation * * ## How It Works * * The middleware intercepts model requests before they are sent and checks the current call counts * against the configured limits. If either limit is exceeded, it throws a `ModelCallLimitMiddlewareError` * to stop execution and prevent further API calls. * * ## Use Cases * * - **Cost Control**: Prevent runaway costs from excessive model calls in production * - **Testing**: Ensure agents don't make too many calls during development/testing * - **Safety**: Limit potential infinite loops or recursive agent behaviors * - **Rate Limiting**: Enforce organizational policies on model usage per conversation * * @param middlewareOptions - Configuration options for the call limits * @param middlewareOptions.threadLimit - Maximum number of model calls allowed per thread (optional) * @param middlewareOptions.runLimit - Maximum number of model calls allowed per run (optional) * * @returns A middleware instance that can be passed to `createAgent` * * @throws {ModelCallLimitMiddlewareError} When either the thread or run limit is exceeded * * @example * ```typescript * import { createAgent, modelCallLimitMiddleware } from "langchain"; * * // Limit to 10 calls per thread and 3 calls per run * const agent = createAgent({ * model: "openai:gpt-4o-mini", * tools: [myTool], * middleware: [ * modelCallLimitMiddleware({ * threadLimit: 10, * runLimit: 3 * }) * ] * }); * ``` * * @example * ```typescript * // Limits can also be configured at runtime via context * const result = await agent.invoke( * { messages: ["Hello"] }, * { * configurable: { * threadLimit: 5 // Override the default limit for this run * } * } * ); * ``` */ function modelCallLimitMiddleware(middlewareOptions) { return require_middleware.createMiddleware({ name: "ModelCallLimitMiddleware", contextSchema, beforeModel: { canJumpTo: ["end"], hook: (state, runtime) => { const exitBehavior = runtime.context.exitBehavior ?? middlewareOptions?.exitBehavior ?? DEFAULT_EXIT_BEHAVIOR; const threadLimit = runtime.context.threadLimit ?? middlewareOptions?.threadLimit; const runLimit = runtime.context.runLimit ?? middlewareOptions?.runLimit; if (typeof threadLimit === "number" && threadLimit <= runtime.threadLevelCallCount) { const error = new ModelCallLimitMiddlewareError({ threadLimit, threadCount: runtime.threadLevelCallCount }); if (exitBehavior === "end") return { jumpTo: "end", messages: [new __langchain_core_messages.AIMessage(error.message)] }; throw error; } if (typeof runLimit === "number" && runLimit <= runtime.runModelCallCount) { const error = new ModelCallLimitMiddlewareError({ runLimit, runCount: runtime.runModelCallCount }); if (exitBehavior === "end") return { jumpTo: "end", messages: [new __langchain_core_messages.AIMessage(error.message)] }; throw error; } return state; } } }); } //#endregion exports.modelCallLimitMiddleware = modelCallLimitMiddleware; //# sourceMappingURL=callLimit.cjs.map