@ibm-cloud/watsonx-ai
Version:
IBM watsonx.ai Node.js SDK
348 lines • 16.4 kB
TypeScript
/**
* (C) Copyright IBM Corp. 2025-2026.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
import type { DefaultParams, JsonObject, CacheConfig } from "../../../types/common.js";
import type { StreamOptions } from "../gateway.js";
import type { ChatsMessage, ChatsTextContentPart } from "./messages.js";
import type { ChatsToolChoice, ChatsRequestTool, ChatsFunctionCall } from "./tools.js";
/** Constants for the `createChatCompletions` operation. */
export declare namespace CreateChatCompletionsConstants {
/**
* Constrains effort on reasoning for reasoning models. For OpenAI, currently supported by `o1`
* models only. Reducing reasoning effort can result in faster responses and fewer tokens used on
* reasoning in a response.
*/
enum ReasoningEffort {
LOW = "low",
MEDIUM = "medium",
HIGH = "high"
}
}
/** Parameters for the chat completion operation. */
export interface CreateChatCompletionsParams extends DefaultParams {
/**
* A list of messages comprising the chat conversation so far. Depending on the model you use,
* different message types (modalities) are supported, like `"text"`, `"images"`, and `"audio"`.
*/
messages: ChatsMessage[];
/** ID or alias of the model to forward the chat request to. */
model: string;
/**
* Parameters for audio output. Only required when audio output is requested with modalities:
* `["audio"]`.
*
* See: [OpenAI's Audio Guide](https://platform.openai.com/docs/guides/audio) for more
* information.
*/
audio?: JsonObject;
/** Caching configuration for a request. Cache is only supported for non-streaming requests. */
cache?: CacheConfig;
/**
* A number between `-2.0` and `2.0`. Positive values penalize new tokens based on their existing
* frequency in the text so far, decreasing the model's likelihood to repeat the same line
* verbatim.
*/
frequencyPenalty?: number;
/**
* Controls which (if any) function is called by the model.
*
* - `"none"` means the model will not call a function and instead generates a message.
* - `"auto"` means the model can pick between generating a message or calling a function.
* - Specifying a particular function via `{"name": "my_function"}` forces the model to call that
* function.
*
* `"none"` is the default when no functions are present. `"auto"` is the default if functions are
* present.
*
* Deprecated: `function_call` has been deprecated by OpenAI in favor of `tool_choice`.
*/
functionCall?: string | ChatsFunctionCall;
/**
* A list of functions the model may generate JSON inputs for.
*
* Deprecated: `functions` has been deprecated by OpenAI in favor of `tools`.
*/
functions?: JsonObject;
/**
* Modifies the likelihood of specified tokens appearing in the completion. Accepts a JSON object
* that maps tokens (specified by their token ID in the tokenizer) to an associated bias value
* from `-100` to `100`. Mathematically, the bias is added to the logits generated by the model
* prior to sampling. The exact effect will vary per model, but values between `-1` and `1` should
* decrease or increase likelihood of selection; values like `-100` or `100` should result in a
* ban or exclusive selection of the relevant token.
*/
logitBias?: JsonObject;
/**
* Indicates whether to return log probabilities of the output tokens or not. If `true`, returns
* the log probabilities of each output token returned in the content of message.
*/
logprobs?: boolean;
/**
* Specifies an upper bound for the number of tokens that can be generated for a completion,
* including visible output tokens and [reasoning tokens][reasoning tokens].
*
* [reasoning tokens]: https://platform.openai.com/docs/guides/reasoning.
*/
maxCompletionTokens?: number;
/**
* Specifies a maximum number of tokens that can be generated in the chat completion. This value
* can be used to control costs for text generated via API.
*
* Deprecated: `max_tokens` has been deprecated by OpenAI in favor of `max_completion_tokens`, and
* is not compatible with `o1` series models.
*/
maxTokens?: number;
/** Contains developer-defined tags and values used for filtering completions. */
metadata?: JsonObject;
/**
* Specifies the output types that you would like the model to generate for this request. Most
* models are capable of generating text, which is the default (`["text"]`). Some models can
* generate audio. For OpenAI, the `gpt-4o-audio-preview` model can be used to [generate
* audio][generate audio]. To request that this model generate both text and audio responses, you
* can use `["text", "audio"]`.
*
* [generate audio]: https://platform.openai.com/docs/guides/audio.
*/
modalities?: string[];
/**
* Specifies how many chat completion choices to generate for each input message.
*
* Note: you will be charged based on the number of generated tokens across all choices, keep
* `"n"` set to `1` to minimize costs.
*/
n?: number;
/** Specifies whether to enable parallel function calling during tool use. */
parallelToolCalls?: boolean;
/**
* The configuration for a [Predicted Output][Predicted Output], which can greatly improve
* response times when large parts of the model response are known ahead of time. This is most
* common when you are regenerating a file with only minor changes to most of the content.
*
* [Predicted Output]: https://platform.openai.com/docs/guides/predicted-outputs.
*/
prediction?: ChatsPrediction;
/**
* A number between `-2.0` and `2.0`. Positive values penalize new tokens based on whether they
* appear in the text so far, increasing the model's likelihood to talk about new topics.
*/
presencePenalty?: number;
/**
* Constrains effort on reasoning for reasoning models. For OpenAI, currently supported by `o1`
* models only. Reducing reasoning effort can result in faster responses and fewer tokens used on
* reasoning in a response.
*/
reasoningEffort?: 'low' | 'medium' | 'high';
/**
* An object specifying the format that the model must output.
*
* - Setting to `{ "type": "json_schema", "json_schema": {...} }` enables [Structured
* Outputs][Structured Outputs] which ensures the model will match your supplied JSON schema.
* - Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model
* generates is valid JSON.
*
* Important: when using JSON mode, you must also instruct the model to produce JSON yourself via
* a system or user message. Without this, the model may generate an unending stream of whitespace
* until the generation reaches the token limit, resulting in a long-running and seemingly "stuck"
* request. Also note that the message content may be partially cut off if `"finish_reason"` is
* set to `"length"`, which indicates the generation exceeded `max_tokens` or the conversation
* exceeded the max context length.
*
* [Structured Outputs]: https://platform.openai.com/docs/guides/structured-outputs.
*/
responseFormat?: ChatsResponseFormat;
/** Specifies model routing configurations for the request. */
router?: JsonObject;
/**
* The seed for the model request. For OpenAI, this feature is in Beta. If specified, OpenAI's
* system will make a best effort attempt to sample deterministically, such that repeated requests
* with the same seed and parameters should return the same result. Determinism is not guaranteed,
* and you should refer to the `system_fingerprint` response parameter to monitor changes in the
* backend.
*/
seed?: number;
/** The service tier used for processing a request. */
serviceTier?: string;
/** Specifies up to 4 sequences where the API will stop generating further tokens. */
stop?: string[];
/**
* Indicates whether to store the output of this chat completion request for use in OpenAI's
* [model distillation][model distillation] or [evals][evals] products.
*
* [model distillation]: https://platform.openai.com/docs/guides/distillation
*
* [evals]: https://platform.openai.com/docs/guides/evals.
*/
store?: boolean;
/**
* Indicates whether to stream the model response to the user. If set, partial message deltas will
* be sent, like in ChatGPT. Tokens will be sent as data-only server-sent events as they become
* available, with the stream terminated by a data: `[DONE]` message.
*/
stream?: boolean;
/** Options for streaming response. Only set this when you set `stream` to `true`. */
streamOptions?: StreamOptions;
/**
* Specifies what sampling temperature to use. Higher values like `0.8` will make the output more
* random, while lower values like `0.2` will make it more focused and deterministic.
*
* Note: OpenAI generally recommends altering this or `top_p` but not both.
*/
temperature?: number;
/**
* Controls which (if any) tool is called by the model.
*
* - `"none"` means the model will not call any tool and instead generates a message.
* - `"auto"` means the model can pick between generating a message or calling one or more tools.
* - `"required"` means the model must call one or more tools.
* - Specifying a particular tool via `{"type": "function", "function": {"name": "my_function"}}`
* forces the model to call that tool.
*
* `"none"` is the default when no tools are present. `"auto"` is the default if tools are
* present.
*/
toolChoice?: ChatsToolChoice;
/**
* A list of tools the model may call. Currently, only functions are supported as tools. Use this
* to provide a list of functions the model may generate JSON inputs for. A max of 128 functions
* are supported.
*/
tools?: ChatsRequestTool[];
/**
* An integer between `0` and `20` specifying the number of most likely tokens to return at each
* token position, each with an associated log probability. LogProbs must be set to `true` if this
* parameter is used.
*/
topLogprobs?: number;
/**
* An alternative to sampling with `temperature`, called nucleus sampling, where the model
* considers the results of the tokens with `top_p` probability mass. Example: `0.1` means only
* the tokens comprising the top 10% probability mass are considered.
*
* Note: OpenAI generally recommends altering this or `temperature` but not both.
*/
topP?: number;
/**
* A unique identifier representing your end-user, which can help OpenAI to monitor and detect
* abuse.
*/
user?: string;
}
export declare namespace ChatsPrediction {
namespace Constants {
/** Type of predicted content you want to provide, should always be `"content"`. */
enum Type {
CONTENT = "content"
}
}
}
/**
* The configuration for a [Predicted Output][Predicted Output], which can greatly improve response
* times when large parts of the model response are known ahead of time. This is most common when
* you are regenerating a file with only minor changes to most of the content.
*
* [Predicted Output]: https://platform.openai.com/docs/guides/predicted-outputs.
*/
export interface ChatsPrediction {
/**
* Content that should be matched when generating a model response. If generated tokens would
* match this content, the entire model response can be returned much more quickly.
*/
content: string | ChatsTextContentPart[];
/** Type of predicted content you want to provide, should always be `"content"`. */
type: ChatsPrediction.Constants.Type | string;
}
/**
* An object specifying the format that the model must output.
*
* - Setting to `{ "type": "json_schema", "json_schema": {...} }` enables [Structured
* Outputs][Structured Outputs] which ensures the model will match your supplied JSON schema.
* - Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model
* generates is valid JSON.
*
* Important: when using JSON mode, you must also instruct the model to produce JSON yourself via a
* system or user message. Without this, the model may generate an unending stream of whitespace
* until the generation reaches the token limit, resulting in a long-running and seemingly "stuck"
* request. Also note that the message content may be partially cut off if `"finish_reason"` is set
* to `"length"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded
* the max context length.
*
* [Structured Outputs]: https://platform.openai.com/docs/guides/structured-outputs.
*/
export type ChatsResponseFormat = ChatsResponseFormatJSON | ChatsResponseFormatJSONSchema | ChatsResponseFormatText;
/** Represents JSON format for a chat response. */
export interface ChatsResponseFormatJSON {
/** Type of response format being defined, in this case should always be `"json_schema"`. */
type: ChatsResponseFormatJSON.Constants.Type | string;
}
export declare namespace ChatsResponseFormatJSON {
namespace Constants {
/** Type of response format being defined, in this case should always be `"json_schema"`. */
enum Type {
JSON_SCHEMA = "json_schema"
}
}
}
/** Custom schema used to generate structures JSON responses. */
export interface ChatsJSONSchema {
/**
* Description of what the response format is for. Used by the model to determine how to respond
* in the format.
*/
description?: string;
/**
* Name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a
* maximum length of 64.
*/
name: string;
/**
* Schema for the response format, described as a JSON Schema object.
*
* See [JSON Schema](https://json-schema.org/) for more information.
*/
schema?: JsonObject;
/**
* Indicates whether to enable strict schema adherence when generating the output. If set to
* `true`, the model will always follow the exact schema defined in the schema field. Only a
* subset of JSON Schema is supported when `strict` is `true`.
*/
strict?: boolean;
}
/** Represents custom JSON schema format for a chat response. */
export interface ChatsResponseFormatJSONSchema {
/** Custom schema used to generate structures JSON responses. */
json_schema: ChatsJSONSchema;
/** Type of response format being defined, in this case should always be `"json_schema"`. */
type: ChatsResponseFormatJSONSchema.Constants.Type | string;
}
export declare namespace ChatsResponseFormatJSONSchema {
namespace Constants {
/** Type of response format being defined, in this case should always be `"json_schema"`. */
enum Type {
JSON_SCHEMA = "json_schema"
}
}
}
/** Represents text format for a chat response. */
export interface ChatsResponseFormatText {
/** Type of response format being defined, in this case should always be `"json_schema"`. */
type: ChatsResponseFormatText.Constants.Type | string;
}
export declare namespace ChatsResponseFormatText {
namespace Constants {
/** Type of response format being defined, in this case should always be `"json_schema"`. */
enum Type {
JSON_SCHEMA = "json_schema"
}
}
}
//# sourceMappingURL=request.d.ts.map