@ibm-cloud/watsonx-ai

/** * (C) Copyright IBM Corp. 2025-2026. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except * in compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under * the License. */ import type { DefaultParams, JsonObject, CacheConfig } from "../../../types/common.js"; import type { StreamOptions } from "../gateway.js"; import type { ChatsMessage, ChatsTextContentPart } from "./messages.js"; import type { ChatsToolChoice, ChatsRequestTool, ChatsFunctionCall } from "./tools.js"; /** Constants for the `createChatCompletions` operation. */ export declare namespace CreateChatCompletionsConstants { /** * Constrains effort on reasoning for reasoning models. For OpenAI, currently supported by `o1` * models only. Reducing reasoning effort can result in faster responses and fewer tokens used on * reasoning in a response. */ enum ReasoningEffort { LOW = "low", MEDIUM = "medium", HIGH = "high" } } /** Parameters for the chat completion operation. */ export interface CreateChatCompletionsParams extends DefaultParams { /** * A list of messages comprising the chat conversation so far. Depending on the model you use, * different message types (modalities) are supported, like `"text"`, `"images"`, and `"audio"`. */ messages: ChatsMessage[]; /** ID or alias of the model to forward the chat request to. */ model: string; /** * Parameters for audio output. Only required when audio output is requested with modalities: * `["audio"]`. * * See: [OpenAI's Audio Guide](https://platform.openai.com/docs/guides/audio) for more * information. */ audio?: JsonObject; /** Caching configuration for a request. Cache is only supported for non-streaming requests. */ cache?: CacheConfig; /** * A number between `-2.0` and `2.0`. Positive values penalize new tokens based on their existing * frequency in the text so far, decreasing the model's likelihood to repeat the same line * verbatim. */ frequencyPenalty?: number; /** * Controls which (if any) function is called by the model. * * - `"none"` means the model will not call a function and instead generates a message. * - `"auto"` means the model can pick between generating a message or calling a function. * - Specifying a particular function via `{"name": "my_function"}` forces the model to call that * function. * * `"none"` is the default when no functions are present. `"auto"` is the default if functions are * present. * * Deprecated: `function_call` has been deprecated by OpenAI in favor of `tool_choice`. */ functionCall?: string | ChatsFunctionCall; /** * A list of functions the model may generate JSON inputs for. * * Deprecated: `functions` has been deprecated by OpenAI in favor of `tools`. */ functions?: JsonObject; /** * Modifies the likelihood of specified tokens appearing in the completion. Accepts a JSON object * that maps tokens (specified by their token ID in the tokenizer) to an associated bias value * from `-100` to `100`. Mathematically, the bias is added to the logits generated by the model * prior to sampling. The exact effect will vary per model, but values between `-1` and `1` should * decrease or increase likelihood of selection; values like `-100` or `100` should result in a * ban or exclusive selection of the relevant token. */ logitBias?: JsonObject; /** * Indicates whether to return log probabilities of the output tokens or not. If `true`, returns * the log probabilities of each output token returned in the content of message. */ logprobs?: boolean; /** * Specifies an upper bound for the number of tokens that can be generated for a completion, * including visible output tokens and [reasoning tokens][reasoning tokens]. * * [reasoning tokens]: https://platform.openai.com/docs/guides/reasoning. */ maxCompletionTokens?: number; /** * Specifies a maximum number of tokens that can be generated in the chat completion. This value * can be used to control costs for text generated via API. * * Deprecated: `max_tokens` has been deprecated by OpenAI in favor of `max_completion_tokens`, and * is not compatible with `o1` series models. */ maxTokens?: number; /** Contains developer-defined tags and values used for filtering completions. */ metadata?: JsonObject; /** * Specifies the output types that you would like the model to generate for this request. Most * models are capable of generating text, which is the default (`["text"]`). Some models can * generate audio. For OpenAI, the `gpt-4o-audio-preview` model can be used to [generate * audio][generate audio]. To request that this model generate both text and audio responses, you * can use `["text", "audio"]`. * * [generate audio]: https://platform.openai.com/docs/guides/audio. */ modalities?: string[]; /** * Specifies how many chat completion choices to generate for each input message. * * Note: you will be charged based on the number of generated tokens across all choices, keep * `"n"` set to `1` to minimize costs. */ n?: number; /** Specifies whether to enable parallel function calling during tool use. */ parallelToolCalls?: boolean; /** * The configuration for a [Predicted Output][Predicted Output], which can greatly improve * response times when large parts of the model response are known ahead of time. This is most * common when you are regenerating a file with only minor changes to most of the content. * * [Predicted Output]: https://platform.openai.com/docs/guides/predicted-outputs. */ prediction?: ChatsPrediction; /** * A number between `-2.0` and `2.0`. Positive values penalize new tokens based on whether they * appear in the text so far, increasing the model's likelihood to talk about new topics. */ presencePenalty?: number; /** * Constrains effort on reasoning for reasoning models. For OpenAI, currently supported by `o1` * models only. Reducing reasoning effort can result in faster responses and fewer tokens used on * reasoning in a response. */ reasoningEffort?: 'low' | 'medium' | 'high'; /** * An object specifying the format that the model must output. * * - Setting to `{ "type": "json_schema", "json_schema": {...} }` enables [Structured * Outputs][Structured Outputs] which ensures the model will match your supplied JSON schema. * - Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model * generates is valid JSON. * * Important: when using JSON mode, you must also instruct the model to produce JSON yourself via * a system or user message. Without this, the model may generate an unending stream of whitespace * until the generation reaches the token limit, resulting in a long-running and seemingly "stuck" * request. Also note that the message content may be partially cut off if `"finish_reason"` is * set to `"length"`, which indicates the generation exceeded `max_tokens` or the conversation * exceeded the max context length. * * [Structured Outputs]: https://platform.openai.com/docs/guides/structured-outputs. */ responseFormat?: ChatsResponseFormat; /** Specifies model routing configurations for the request. */ router?: JsonObject; /** * The seed for the model request. For OpenAI, this feature is in Beta. If specified, OpenAI's * system will make a best effort attempt to sample deterministically, such that repeated requests * with the same seed and parameters should return the same result. Determinism is not guaranteed, * and you should refer to the `system_fingerprint` response parameter to monitor changes in the * backend. */ seed?: number; /** The service tier used for processing a request. */ serviceTier?: string; /** Specifies up to 4 sequences where the API will stop generating further tokens. */ stop?: string[]; /** * Indicates whether to store the output of this chat completion request for use in OpenAI's * [model distillation][model distillation] or [evals][evals] products. * * [model distillation]: https://platform.openai.com/docs/guides/distillation * * [evals]: https://platform.openai.com/docs/guides/evals. */ store?: boolean; /** * Indicates whether to stream the model response to the user. If set, partial message deltas will * be sent, like in ChatGPT. Tokens will be sent as data-only server-sent events as they become * available, with the stream terminated by a data: `[DONE]` message. */ stream?: boolean; /** Options for streaming response. Only set this when you set `stream` to `true`. */ streamOptions?: StreamOptions; /** * Specifies what sampling temperature to use. Higher values like `0.8` will make the output more * random, while lower values like `0.2` will make it more focused and deterministic. * * Note: OpenAI generally recommends altering this or `top_p` but not both. */ temperature?: number; /** * Controls which (if any) tool is called by the model. * * - `"none"` means the model will not call any tool and instead generates a message. * - `"auto"` means the model can pick between generating a message or calling one or more tools. * - `"required"` means the model must call one or more tools. * - Specifying a particular tool via `{"type": "function", "function": {"name": "my_function"}}` * forces the model to call that tool. * * `"none"` is the default when no tools are present. `"auto"` is the default if tools are * present. */ toolChoice?: ChatsToolChoice; /** * A list of tools the model may call. Currently, only functions are supported as tools. Use this * to provide a list of functions the model may generate JSON inputs for. A max of 128 functions * are supported. */ tools?: ChatsRequestTool[]; /** * An integer between `0` and `20` specifying the number of most likely tokens to return at each * token position, each with an associated log probability. LogProbs must be set to `true` if this * parameter is used. */ topLogprobs?: number; /** * An alternative to sampling with `temperature`, called nucleus sampling, where the model * considers the results of the tokens with `top_p` probability mass. Example: `0.1` means only * the tokens comprising the top 10% probability mass are considered. * * Note: OpenAI generally recommends altering this or `temperature` but not both. */ topP?: number; /** * A unique identifier representing your end-user, which can help OpenAI to monitor and detect * abuse. */ user?: string; } export declare namespace ChatsPrediction { namespace Constants { /** Type of predicted content you want to provide, should always be `"content"`. */ enum Type { CONTENT = "content" } } } /** * The configuration for a [Predicted Output][Predicted Output], which can greatly improve response * times when large parts of the model response are known ahead of time. This is most common when * you are regenerating a file with only minor changes to most of the content. * * [Predicted Output]: https://platform.openai.com/docs/guides/predicted-outputs. */ export interface ChatsPrediction { /** * Content that should be matched when generating a model response. If generated tokens would * match this content, the entire model response can be returned much more quickly. */ content: string | ChatsTextContentPart[]; /** Type of predicted content you want to provide, should always be `"content"`. */ type: ChatsPrediction.Constants.Type | string; } /** * An object specifying the format that the model must output. * * - Setting to `{ "type": "json_schema", "json_schema": {...} }` enables [Structured * Outputs][Structured Outputs] which ensures the model will match your supplied JSON schema. * - Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model * generates is valid JSON. * * Important: when using JSON mode, you must also instruct the model to produce JSON yourself via a * system or user message. Without this, the model may generate an unending stream of whitespace * until the generation reaches the token limit, resulting in a long-running and seemingly "stuck" * request. Also note that the message content may be partially cut off if `"finish_reason"` is set * to `"length"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded * the max context length. * * [Structured Outputs]: https://platform.openai.com/docs/guides/structured-outputs. */ export type ChatsResponseFormat = ChatsResponseFormatJSON | ChatsResponseFormatJSONSchema | ChatsResponseFormatText; /** Represents JSON format for a chat response. */ export interface ChatsResponseFormatJSON { /** Type of response format being defined, in this case should always be `"json_schema"`. */ type: ChatsResponseFormatJSON.Constants.Type | string; } export declare namespace ChatsResponseFormatJSON { namespace Constants { /** Type of response format being defined, in this case should always be `"json_schema"`. */ enum Type { JSON_SCHEMA = "json_schema" } } } /** Custom schema used to generate structures JSON responses. */ export interface ChatsJSONSchema { /** * Description of what the response format is for. Used by the model to determine how to respond * in the format. */ description?: string; /** * Name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a * maximum length of 64. */ name: string; /** * Schema for the response format, described as a JSON Schema object. * * See [JSON Schema](https://json-schema.org/) for more information. */ schema?: JsonObject; /** * Indicates whether to enable strict schema adherence when generating the output. If set to * `true`, the model will always follow the exact schema defined in the schema field. Only a * subset of JSON Schema is supported when `strict` is `true`. */ strict?: boolean; } /** Represents custom JSON schema format for a chat response. */ export interface ChatsResponseFormatJSONSchema { /** Custom schema used to generate structures JSON responses. */ json_schema: ChatsJSONSchema; /** Type of response format being defined, in this case should always be `"json_schema"`. */ type: ChatsResponseFormatJSONSchema.Constants.Type | string; } export declare namespace ChatsResponseFormatJSONSchema { namespace Constants { /** Type of response format being defined, in this case should always be `"json_schema"`. */ enum Type { JSON_SCHEMA = "json_schema" } } } /** Represents text format for a chat response. */ export interface ChatsResponseFormatText { /** Type of response format being defined, in this case should always be `"json_schema"`. */ type: ChatsResponseFormatText.Constants.Type | string; } export declare namespace ChatsResponseFormatText { namespace Constants { /** Type of response format being defined, in this case should always be `"json_schema"`. */ enum Type { JSON_SCHEMA = "json_schema" } } } //# sourceMappingURL=request.d.ts.map