@ibm-cloud/watsonx-ai

/** * (C) Copyright IBM Corp. 2025-2026. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except * in compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under * the License. */ import type { Stream } from "../lib/common.mjs"; import type { CreateChatCompletionsParams } from "./types/chat/request.mjs"; import type { ChatsResponse } from "./types/chat/response.mjs"; import type { ObjectStreamed } from "../types/common.mjs"; import type { EmbeddingResponse } from "./types/embeddings/response.mjs"; import type { CreateEmbeddingsParams } from "./types/embeddings/request.mjs"; import type { CreateCompletionsParams } from "./types/text_completions/request.mjs"; import type { CompletionsResponse } from "./types/text_completions/response.mjs"; import type { APIBaseService } from "../base/base.mjs"; import type { Response } from "../base/types/base.mjs"; export type ChatObjectStream = ObjectStreamed<ChatsResponse>; export type TextCompletionStream = ObjectStreamed<CompletionsResponse>; /** Abstract class for handling text completion requests. */ export declare abstract class Completions { protected client: APIBaseService; /** * Constructor for Completions class. * * @param {APIBaseService} gateway - The APIBaseService instance. */ constructor(gateway: APIBaseService); abstract create(params: Record<string, any>): Promise<Record<string, any>>; } export type ChatCompletionResult<S extends boolean | undefined, R extends boolean | undefined> = S extends true ? R extends false ? Stream<string> : Stream<ChatObjectStream> : Response<ChatsResponse>; export type CompletionResult<S extends boolean | undefined, R extends boolean | undefined> = S extends true ? R extends false ? Stream<string> : Stream<TextCompletionStream> : Response<CompletionsResponse>; /** Class for handling chat completion requests. */ export declare class ChatCompletions extends Completions { /** * Create Chat Completions. * * Generate a chat completion based on the provided messages and parameters using the provided * model. * * @param {Object} params - The parameters to send to the service. * @param {ChatsMessage[]} params.messages - A list of messages comprising the chat conversation * so far. Depending on the model you use, different message types (modalities) are supported, * like `"text"`, `"images"`, and `"audio"`. * @param {string} params.model - ID or alias of the model to forward the chat request to. * @param {JsonObject} [params.audio] - Parameters for audio output. Only required when audio * output is requested with modalities: `["audio"]`. * * See: [OpenAI's Audio Guide](https://platform.openai.com/docs/guides/audio) for more * information. * @param {ChatsCache} [params.cache] - Caching configuration for a request. Cache is only * supported for non-streaming requests. * @param {number} [params.frequencyPenalty] - A number between `-2.0` and `2.0`. Positive values * penalize new tokens based on their existing frequency in the text so far, decreasing the * model's likelihood to repeat the same line verbatim. * @param {ChatsFunctionCall} [params.functionCall] - Controls which (if any) function is called * by the model. * * - `"none"` means the model will not call a function and instead generates a message. * - `"auto"` means the model can pick between generating a message or calling a function. * - Specifying a particular function via `{"name": "my_function"}` forces the model to call that * function. * * `"none"` is the default when no functions are present. `"auto"` is the default if functions are * present. * * Deprecated: `function_call` has been deprecated by OpenAI in favor of `tool_choice`. * @param {JsonObject} [params.functions] - A list of functions the model may generate JSON inputs * for. * * Deprecated: `functions` has been deprecated by OpenAI in favor of `tools`. * @param {JsonObject} [params.logitBias] - Modifies the likelihood of specified tokens appearing * in the completion. Accepts a JSON object that maps tokens (specified by their token ID in the * tokenizer) to an associated bias value from `-100` to `100`. Mathematically, the bias is * added to the logits generated by the model prior to sampling. The exact effect will vary per * model, but values between `-1` and `1` should decrease or increase likelihood of selection; * values like `-100` or `100` should result in a ban or exclusive selection of the relevant * token. * @param {boolean} [params.logprobs] - Indicates whether to return log probabilities of the * output tokens or not. If `true`, returns the log probabilities of each output token returned * in the content of message. * @param {number} [params.maxCompletionTokens] - Specifies an upper bound for the number of * tokens that can be generated for a completion, including visible output tokens and [reasoning * tokens][reasoning tokens]. * * [reasoning tokens]: https://platform.openai.com/docs/guides/reasoning. * @param {number} [params.maxTokens] - Specifies a maximum number of tokens that can be generated * in the chat completion. This value can be used to control costs for text generated via API. * * Deprecated: `max_tokens` has been deprecated by OpenAI in favor of `max_completion_tokens`, and * is not compatible with `o1` series models. * @param {JsonObject} [params.metadata] - Contains developer-defined tags and values used for * filtering completions. * @param {string[]} [params.modalities] - Specifies the output types that you would like the * model to generate for this request. Most models are capable of generating text, which is the * default (`["text"]`). Some models can generate audio. For OpenAI, the `gpt-4o-audio-preview` * model can be used to [generate audio][generate audio]. To request that this model generate * both text and audio responses, you can use `["text", "audio"]`. * * [generate audio]: https://platform.openai.com/docs/guides/audio. * @param {number} [params.n] - Specifies how many chat completion choices to generate for each * input message. * * Note: you will be charged based on the number of generated tokens across all choices, keep * `"n"` set to `1` to minimize costs. * @param {boolean} [params.parallelToolCalls] - Specifies whether to enable parallel function * calling during tool use. * @param {ChatsPrediction} [params.prediction] - The configuration for a [Predicted * Output][Predicted Output], which can greatly improve response times when large parts of the * model response are known ahead of time. This is most common when you are regenerating a file * with only minor changes to most of the content. * * [Predicted Output]: https://platform.openai.com/docs/guides/predicted-outputs. * @param {number} [params.presencePenalty] - A number between `-2.0` and `2.0`. Positive values * penalize new tokens based on whether they appear in the text so far, increasing the model's * likelihood to talk about new topics. * @param {string} [params.reasoningEffort] - Constrains effort on reasoning for reasoning models. * For OpenAI, currently supported by `o1` models only. Reducing reasoning effort can result in * faster responses and fewer tokens used on reasoning in a response. * @param {ChatsResponseFormat} [params.responseFormat] - An object specifying the format that the * model must output. * * - Setting to `{ "type": "json_schema", "json_schema": {...} }` enables [Structured * Outputs][Structured Outputs] which ensures the model will match your supplied JSON * schema. * - Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model * generates is valid JSON. * * Important: when using JSON mode, you must also instruct the model to produce JSON yourself via * a system or user message. Without this, the model may generate an unending stream of * whitespace until the generation reaches the token limit, resulting in a long-running and * seemingly "stuck" request. Also note that the message content may be partially cut off if * `"finish_reason"` is set to `"length"`, which indicates the generation exceeded `max_tokens` * or the conversation exceeded the max context length. * * [Structured Outputs]: https://platform.openai.com/docs/guides/structured-outputs. * @param {JsonObject} [params.router] - Specifies model routing configurations for the request. * @param {number} [params.seed] - The seed for the model request. For OpenAI, this feature is in * Beta. If specified, OpenAI's system will make a best effort attempt to sample * deterministically, such that repeated requests with the same seed and parameters should * return the same result. Determinism is not guaranteed, and you should refer to the * `system_fingerprint` response parameter to monitor changes in the backend. * @param {string} [params.serviceTier] - The service tier used for processing a request. * @param {string[]} [params.stop] - Specifies up to 4 sequences where the API will stop * generating further tokens. * @param {boolean} [params.store] - Indicates whether to store the output of this chat completion * request for use in OpenAI's [model distillation][model distillation] or [evals][evals] * products. * * [model distillation]: https://platform.openai.com/docs/guides/distillation * * [evals]: https://platform.openai.com/docs/guides/evals. * @param {boolean} [params.stream] - Indicates whether to stream the model response to the user. * If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as * data-only server-sent events as they become available, with the stream terminated by a data: * `[DONE]` message. * @param {StreamOptions} [params.streamOptions] - Options for streaming response. Only set this * when you set `stream` to `true`. * @param {number} [params.temperature] - Specifies what sampling temperature to use. Higher * values like `0.8` will make the output more random, while lower values like `0.2` will make * it more focused and deterministic. * * Note: OpenAI generally recommends altering this or `top_p` but not both. * @param {ChatsToolChoice} [params.toolChoice] - Controls which (if any) tool is called by the * model. * * - `"none"` means the model will not call any tool and instead generates a message. * - `"auto"` means the model can pick between generating a message or calling one or more tools. * - `"required"` means the model must call one or more tools. * - Specifying a particular tool via `{"type": "function", "function": {"name": "my_function"}}` * forces the model to call that tool. * * `"none"` is the default when no tools are present. `"auto"` is the default if tools are * present. * @param {ChatsRequestTool[]} [params.tools] - A list of tools the model may call. Currently, * only functions are supported as tools. Use this to provide a list of functions the model may * generate JSON inputs for. A max of 128 functions are supported. * @param {number} [params.topLogprobs] - An integer between `0` and `20` specifying the number of * most likely tokens to return at each token position, each with an associated log probability. * LogProbs must be set to `true` if this parameter is used. * @param {number} [params.topP] - An alternative to sampling with `temperature`, called nucleus * sampling, where the model considers the results of the tokens with `top_p` probability mass. * Example: `0.1` means only the tokens comprising the top 10% probability mass are considered. * * Note: OpenAI generally recommends altering this or `temperature` but not both. * @param {string} [params.user] - A unique identifier representing your end-user, which can help * OpenAI to monitor and detect abuse. * @param {boolean} [params.returnObject] - Flag that indicates return type. Set 'true' to return * objects, 'false' to. Default: 'true' * @param {AbortSignal} [params.signal] - Signal from AbortController * @param {OutgoingHttpHeaders} [params.headers] - Custom request headers * @returns {Promise<Response<ChatsResponse> | Stream<string | ChatObjectStream>>} Either a * Response of ChatsResponse or a Stream. * @throws {Error} If validation fails or an error occurs during the request. */ create<S extends boolean | undefined = false, R extends boolean | undefined = undefined>(params: CreateChatCompletionsParams & { stream?: S; returnObject?: R; }): Promise<ChatCompletionResult<S, R>>; } /** Class for handling embedding completion requests. */ export declare class EmbeddingCompletions extends Completions { /** * Create Embeddings completions. * * Generate embeddings based on the provided input using the provided model. * * @param {Object} params - The parameters to send to the service. * @param {EmbeddingsInput} params.input - Input text to embed, encoded as a string, array of * strings, array of integers, or array of integer arrays. The input must not exceed the max * input tokens for the model (8192 tokens for OpenAI's `text-embedding-ada-002`) and cannot be * an empty string. Any array must be 2048 dimensions or less. Some models may also impose a * limit on total number of tokens summed across inputs. * @param {string} params.model - ID of the model to use. * @param {number} [params.dimensions] - Number of dimensions the resulting output embeddings * should have. For OpenAI, only supported in `text-embedding-3` and later models. * @param {string} [params.encodingFormat] - Format to return the embeddings in. Can be either * `"float"` or `"base64"`. * @param {string} [params.user] - A unique identifier representing your end-user. * @param {AbortSignal} [params.signal] - Signal from AbortController * @param {OutgoingHttpHeaders} [params.headers] - Custom request headers * @returns {Promise<Response<EmbeddingResponse>>} - Embeddings response for provided text * @throws {Error} If validation fails or an error occurs during the request. */ create(params: CreateEmbeddingsParams): Promise<Response<EmbeddingResponse>>; } /** Class for handling basic text completion requests. */ export declare class GenerateTextCompletions extends Completions { /** * Create Text Completions. * * Generate a text completion based on the provided prompt and parameters using the provided * model. * * @param {Object} params - The parameters to send to the service. * @param {string} params.model - Model is the ID of the model to use. * @param {string} params.prompt - Prompt(s) to generate completions for, encoded as a string, * array of strings, array of tokens, or array of token arrays. * * Note: `<|endoftext|>` is the document separator that the model sees during training, so if a * prompt is not specified the model will generate as if from the beginning of a new document. * @param {number} [params.bestOf] - Generates `best_of` number of completions server-side and * returns the "best" (the one with the highest log probability per token). Results cannot be * streamed. When used with `n`, `best_of` controls the number of candidate completions and `n` * specifies how many to return – `best_of` must be greater than `n`. * * Note: Because this parameter generates many completions, it can quickly consume your token * quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and * `stop`. * @param {CompletionsCache} [params.cache] - Caching configuration for the request. Cache is only * supported for non-streaming requests. * @param {boolean} [params.echo] - Echo back the prompt in addition to the completion. * @param {number} [params.frequencyPenalty] - A number between `-2.0` and `2.0`. Positive values * penalize new tokens based on their existing frequency in the text so far, decreasing the * model's likelihood to repeat the same line verbatim. * @param {JsonObject} [params.logitBias] - Used to modify the likelihood of specified tokens * appearing in the completion. Accepts a JSON object that maps tokens (specified by their token * ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this * tokenizer tool to convert text to token IDs. Mathematically, the bias is added to the logits * generated by the model prior to sampling. * * The exact effect will vary per model, but: * * - Values between `-1` and `1` should decrease or increase likelihood of selection and * - Values like `-100` or `100` should result in a ban or exclusive selection of the relevant * token. * * As an example, you can pass `{"50256": -100}` to prevent the `<|endoftext|>` token from being * generated. * @param {number} [params.logprobs] - The number of most likely output tokens to include the log * probabilities of, as well the chosen tokens. For example, if `logprobs` is `5`, the API will * return a list of the 5 most likely tokens. The API will always return the `logprob` of the * sampled token, so there may be up to `logprobs+1` elements in the response. The maximum value * for `logprobs` is `5`. * @param {number} [params.maxTokens] - The maximum number of tokens that can be generated in the * completion. The token count of your prompt plus `max_tokens` cannot exceed the model's * context length. * @param {JsonObject} [params.metadata] - Contains developer-defined tags and values used for * filtering completions. * @param {number} [params.n] - Specifies how many completions to generate for each prompt. * * Note: Because this parameter generates many completions, it can quickly consume your token * quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and * `stop`. * @param {number} [params.presencePenalty] - A number between `-2.0` and `2.0`. Positive values * penalize new tokens based on whether they appear in the text so far, increasing the model's * likelihood to talk about new topics. * @param {ModelRouter} [params.router] - Router is the model routing configuration for the * request. * @param {number} [params.seed] - The seed for the model request. If specified, OpenAI's system * will make a best effort to sample deterministically, such that repeated requests with the * same `seed` and parameters should return the same result. * * Determinism is not guaranteed, and you should refer to the `system_fingerprint` response * parameter to monitor changes in the backend. * @param {string[]} [params.stop] - Specifies up to 4 sequences where the API will stop * generating further tokens. * @param {boolean} [params.stream] - Indicates whether to stream back partial progress. If set, * tokens will be sent as data-only [server-sent events][server-sent events] as they become * available, with the stream terminated by a `data: [DONE]` message. * * [server-sent events]: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format. * @param {StreamOptions} [params.streamOptions] - Options for streaming response. Only set this * when you set `stream` to `true`. * @param {string} [params.suffix] - Text that comes after a completion of inserted text. On * OpenAI, this parameter is only supported for `gpt-3.5-turbo-instruct`. * @param {number} [params.temperature] - Specifies what temperature to use for sample, between * `0` and `2`. Higher values like `0.8` will make the output more random, while lower values * like `0.2` will make it more focused and deterministic. * * Note: OpenAI generally recommends altering this or `top_p` but not both. * @param {number} [params.topP] - An alternative to sampling with `temperature`, called nucleus * sampling, where the model considers the results of the tokens with `top_p` probability mass. * So `0.1` means only the tokens comprising the top 10% probability mass are considered. * * Note: OpenAI generally recommends altering this or `temperature` but not both. * @param {string} [params.user] - A unique identifier representing your end-user, which can help * Services to monitor and detect abuse. * @param {boolean} [params.returnObject] - Flag that indicates return type. Set 'true' to return * objects, 'false' to. Default: 'true' * @param {AbortSignal} [params.signal] - Signal from AbortController * @param {OutgoingHttpHeaders} [params.headers] - Custom request headers * @returns {Promise<Response<CompletionsResponse> | Stream<string | TextCompletionStream>>} * Stream of TextCompletionStream. * @throws {Error} If validation fails or an error occurs during the request. */ create<S extends boolean | undefined = false, R extends boolean | undefined = undefined>(params: CreateCompletionsParams & { stream?: S; returnObject?: R; }): Promise<CompletionResult<S, R>>; } //# sourceMappingURL=completions.d.mts.map