@ibm-cloud/watsonx-ai
Version:
IBM watsonx.ai Node.js SDK
338 lines • 22.7 kB
text/typescript
/**
* (C) Copyright IBM Corp. 2025-2026.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
import type { Stream } from "../lib/common.mjs";
import type { CreateChatCompletionsParams } from "./types/chat/request.mjs";
import type { ChatsResponse } from "./types/chat/response.mjs";
import type { ObjectStreamed } from "../types/common.mjs";
import type { EmbeddingResponse } from "./types/embeddings/response.mjs";
import type { CreateEmbeddingsParams } from "./types/embeddings/request.mjs";
import type { CreateCompletionsParams } from "./types/text_completions/request.mjs";
import type { CompletionsResponse } from "./types/text_completions/response.mjs";
import type { APIBaseService } from "../base/base.mjs";
import type { Response } from "../base/types/base.mjs";
export type ChatObjectStream = ObjectStreamed<ChatsResponse>;
export type TextCompletionStream = ObjectStreamed<CompletionsResponse>;
/** Abstract class for handling text completion requests. */
export declare abstract class Completions {
protected client: APIBaseService;
/**
* Constructor for Completions class.
*
* @param {APIBaseService} gateway - The APIBaseService instance.
*/
constructor(gateway: APIBaseService);
abstract create(params: Record<string, any>): Promise<Record<string, any>>;
}
export type ChatCompletionResult<S extends boolean | undefined, R extends boolean | undefined> = S extends true ? R extends false ? Stream<string> : Stream<ChatObjectStream> : Response<ChatsResponse>;
export type CompletionResult<S extends boolean | undefined, R extends boolean | undefined> = S extends true ? R extends false ? Stream<string> : Stream<TextCompletionStream> : Response<CompletionsResponse>;
/** Class for handling chat completion requests. */
export declare class ChatCompletions extends Completions {
/**
* Create Chat Completions.
*
* Generate a chat completion based on the provided messages and parameters using the provided
* model.
*
* @param {Object} params - The parameters to send to the service.
* @param {ChatsMessage[]} params.messages - A list of messages comprising the chat conversation
* so far. Depending on the model you use, different message types (modalities) are supported,
* like `"text"`, `"images"`, and `"audio"`.
* @param {string} params.model - ID or alias of the model to forward the chat request to.
* @param {JsonObject} [params.audio] - Parameters for audio output. Only required when audio
* output is requested with modalities: `["audio"]`.
*
* See: [OpenAI's Audio Guide](https://platform.openai.com/docs/guides/audio) for more
* information.
* @param {ChatsCache} [params.cache] - Caching configuration for a request. Cache is only
* supported for non-streaming requests.
* @param {number} [params.frequencyPenalty] - A number between `-2.0` and `2.0`. Positive values
* penalize new tokens based on their existing frequency in the text so far, decreasing the
* model's likelihood to repeat the same line verbatim.
* @param {ChatsFunctionCall} [params.functionCall] - Controls which (if any) function is called
* by the model.
*
* - `"none"` means the model will not call a function and instead generates a message.
* - `"auto"` means the model can pick between generating a message or calling a function.
* - Specifying a particular function via `{"name": "my_function"}` forces the model to call that
* function.
*
* `"none"` is the default when no functions are present. `"auto"` is the default if functions are
* present.
*
* Deprecated: `function_call` has been deprecated by OpenAI in favor of `tool_choice`.
* @param {JsonObject} [params.functions] - A list of functions the model may generate JSON inputs
* for.
*
* Deprecated: `functions` has been deprecated by OpenAI in favor of `tools`.
* @param {JsonObject} [params.logitBias] - Modifies the likelihood of specified tokens appearing
* in the completion. Accepts a JSON object that maps tokens (specified by their token ID in the
* tokenizer) to an associated bias value from `-100` to `100`. Mathematically, the bias is
* added to the logits generated by the model prior to sampling. The exact effect will vary per
* model, but values between `-1` and `1` should decrease or increase likelihood of selection;
* values like `-100` or `100` should result in a ban or exclusive selection of the relevant
* token.
* @param {boolean} [params.logprobs] - Indicates whether to return log probabilities of the
* output tokens or not. If `true`, returns the log probabilities of each output token returned
* in the content of message.
* @param {number} [params.maxCompletionTokens] - Specifies an upper bound for the number of
* tokens that can be generated for a completion, including visible output tokens and [reasoning
* tokens][reasoning tokens].
*
* [reasoning tokens]: https://platform.openai.com/docs/guides/reasoning.
* @param {number} [params.maxTokens] - Specifies a maximum number of tokens that can be generated
* in the chat completion. This value can be used to control costs for text generated via API.
*
* Deprecated: `max_tokens` has been deprecated by OpenAI in favor of `max_completion_tokens`, and
* is not compatible with `o1` series models.
* @param {JsonObject} [params.metadata] - Contains developer-defined tags and values used for
* filtering completions.
* @param {string[]} [params.modalities] - Specifies the output types that you would like the
* model to generate for this request. Most models are capable of generating text, which is the
* default (`["text"]`). Some models can generate audio. For OpenAI, the `gpt-4o-audio-preview`
* model can be used to [generate audio][generate audio]. To request that this model generate
* both text and audio responses, you can use `["text", "audio"]`.
*
* [generate audio]: https://platform.openai.com/docs/guides/audio.
* @param {number} [params.n] - Specifies how many chat completion choices to generate for each
* input message.
*
* Note: you will be charged based on the number of generated tokens across all choices, keep
* `"n"` set to `1` to minimize costs.
* @param {boolean} [params.parallelToolCalls] - Specifies whether to enable parallel function
* calling during tool use.
* @param {ChatsPrediction} [params.prediction] - The configuration for a [Predicted
* Output][Predicted Output], which can greatly improve response times when large parts of the
* model response are known ahead of time. This is most common when you are regenerating a file
* with only minor changes to most of the content.
*
* [Predicted Output]: https://platform.openai.com/docs/guides/predicted-outputs.
* @param {number} [params.presencePenalty] - A number between `-2.0` and `2.0`. Positive values
* penalize new tokens based on whether they appear in the text so far, increasing the model's
* likelihood to talk about new topics.
* @param {string} [params.reasoningEffort] - Constrains effort on reasoning for reasoning models.
* For OpenAI, currently supported by `o1` models only. Reducing reasoning effort can result in
* faster responses and fewer tokens used on reasoning in a response.
* @param {ChatsResponseFormat} [params.responseFormat] - An object specifying the format that the
* model must output.
*
* - Setting to `{ "type": "json_schema", "json_schema": {...} }` enables [Structured
* Outputs][Structured Outputs] which ensures the model will match your supplied JSON
* schema.
* - Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model
* generates is valid JSON.
*
* Important: when using JSON mode, you must also instruct the model to produce JSON yourself via
* a system or user message. Without this, the model may generate an unending stream of
* whitespace until the generation reaches the token limit, resulting in a long-running and
* seemingly "stuck" request. Also note that the message content may be partially cut off if
* `"finish_reason"` is set to `"length"`, which indicates the generation exceeded `max_tokens`
* or the conversation exceeded the max context length.
*
* [Structured Outputs]: https://platform.openai.com/docs/guides/structured-outputs.
* @param {JsonObject} [params.router] - Specifies model routing configurations for the request.
* @param {number} [params.seed] - The seed for the model request. For OpenAI, this feature is in
* Beta. If specified, OpenAI's system will make a best effort attempt to sample
* deterministically, such that repeated requests with the same seed and parameters should
* return the same result. Determinism is not guaranteed, and you should refer to the
* `system_fingerprint` response parameter to monitor changes in the backend.
* @param {string} [params.serviceTier] - The service tier used for processing a request.
* @param {string[]} [params.stop] - Specifies up to 4 sequences where the API will stop
* generating further tokens.
* @param {boolean} [params.store] - Indicates whether to store the output of this chat completion
* request for use in OpenAI's [model distillation][model distillation] or [evals][evals]
* products.
*
* [model distillation]: https://platform.openai.com/docs/guides/distillation
*
* [evals]: https://platform.openai.com/docs/guides/evals.
* @param {boolean} [params.stream] - Indicates whether to stream the model response to the user.
* If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as
* data-only server-sent events as they become available, with the stream terminated by a data:
* `[DONE]` message.
* @param {StreamOptions} [params.streamOptions] - Options for streaming response. Only set this
* when you set `stream` to `true`.
* @param {number} [params.temperature] - Specifies what sampling temperature to use. Higher
* values like `0.8` will make the output more random, while lower values like `0.2` will make
* it more focused and deterministic.
*
* Note: OpenAI generally recommends altering this or `top_p` but not both.
* @param {ChatsToolChoice} [params.toolChoice] - Controls which (if any) tool is called by the
* model.
*
* - `"none"` means the model will not call any tool and instead generates a message.
* - `"auto"` means the model can pick between generating a message or calling one or more tools.
* - `"required"` means the model must call one or more tools.
* - Specifying a particular tool via `{"type": "function", "function": {"name": "my_function"}}`
* forces the model to call that tool.
*
* `"none"` is the default when no tools are present. `"auto"` is the default if tools are
* present.
* @param {ChatsRequestTool[]} [params.tools] - A list of tools the model may call. Currently,
* only functions are supported as tools. Use this to provide a list of functions the model may
* generate JSON inputs for. A max of 128 functions are supported.
* @param {number} [params.topLogprobs] - An integer between `0` and `20` specifying the number of
* most likely tokens to return at each token position, each with an associated log probability.
* LogProbs must be set to `true` if this parameter is used.
* @param {number} [params.topP] - An alternative to sampling with `temperature`, called nucleus
* sampling, where the model considers the results of the tokens with `top_p` probability mass.
* Example: `0.1` means only the tokens comprising the top 10% probability mass are considered.
*
* Note: OpenAI generally recommends altering this or `temperature` but not both.
* @param {string} [params.user] - A unique identifier representing your end-user, which can help
* OpenAI to monitor and detect abuse.
* @param {boolean} [params.returnObject] - Flag that indicates return type. Set 'true' to return
* objects, 'false' to. Default: 'true'
* @param {AbortSignal} [params.signal] - Signal from AbortController
* @param {OutgoingHttpHeaders} [params.headers] - Custom request headers
* @returns {Promise<Response<ChatsResponse> | Stream<string | ChatObjectStream>>} Either a
* Response of ChatsResponse or a Stream.
* @throws {Error} If validation fails or an error occurs during the request.
*/
create<S extends boolean | undefined = false, R extends boolean | undefined = undefined>(params: CreateChatCompletionsParams & {
stream?: S;
returnObject?: R;
}): Promise<ChatCompletionResult<S, R>>;
}
/** Class for handling embedding completion requests. */
export declare class EmbeddingCompletions extends Completions {
/**
* Create Embeddings completions.
*
* Generate embeddings based on the provided input using the provided model.
*
* @param {Object} params - The parameters to send to the service.
* @param {EmbeddingsInput} params.input - Input text to embed, encoded as a string, array of
* strings, array of integers, or array of integer arrays. The input must not exceed the max
* input tokens for the model (8192 tokens for OpenAI's `text-embedding-ada-002`) and cannot be
* an empty string. Any array must be 2048 dimensions or less. Some models may also impose a
* limit on total number of tokens summed across inputs.
* @param {string} params.model - ID of the model to use.
* @param {number} [params.dimensions] - Number of dimensions the resulting output embeddings
* should have. For OpenAI, only supported in `text-embedding-3` and later models.
* @param {string} [params.encodingFormat] - Format to return the embeddings in. Can be either
* `"float"` or `"base64"`.
* @param {string} [params.user] - A unique identifier representing your end-user.
* @param {AbortSignal} [params.signal] - Signal from AbortController
* @param {OutgoingHttpHeaders} [params.headers] - Custom request headers
* @returns {Promise<Response<EmbeddingResponse>>} - Embeddings response for provided text
* @throws {Error} If validation fails or an error occurs during the request.
*/
create(params: CreateEmbeddingsParams): Promise<Response<EmbeddingResponse>>;
}
/** Class for handling basic text completion requests. */
export declare class GenerateTextCompletions extends Completions {
/**
* Create Text Completions.
*
* Generate a text completion based on the provided prompt and parameters using the provided
* model.
*
* @param {Object} params - The parameters to send to the service.
* @param {string} params.model - Model is the ID of the model to use.
* @param {string} params.prompt - Prompt(s) to generate completions for, encoded as a string,
* array of strings, array of tokens, or array of token arrays.
*
* Note: `<|endoftext|>` is the document separator that the model sees during training, so if a
* prompt is not specified the model will generate as if from the beginning of a new document.
* @param {number} [params.bestOf] - Generates `best_of` number of completions server-side and
* returns the "best" (the one with the highest log probability per token). Results cannot be
* streamed. When used with `n`, `best_of` controls the number of candidate completions and `n`
* specifies how many to return – `best_of` must be greater than `n`.
*
* Note: Because this parameter generates many completions, it can quickly consume your token
* quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and
* `stop`.
* @param {CompletionsCache} [params.cache] - Caching configuration for the request. Cache is only
* supported for non-streaming requests.
* @param {boolean} [params.echo] - Echo back the prompt in addition to the completion.
* @param {number} [params.frequencyPenalty] - A number between `-2.0` and `2.0`. Positive values
* penalize new tokens based on their existing frequency in the text so far, decreasing the
* model's likelihood to repeat the same line verbatim.
* @param {JsonObject} [params.logitBias] - Used to modify the likelihood of specified tokens
* appearing in the completion. Accepts a JSON object that maps tokens (specified by their token
* ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this
* tokenizer tool to convert text to token IDs. Mathematically, the bias is added to the logits
* generated by the model prior to sampling.
*
* The exact effect will vary per model, but:
*
* - Values between `-1` and `1` should decrease or increase likelihood of selection and
* - Values like `-100` or `100` should result in a ban or exclusive selection of the relevant
* token.
*
* As an example, you can pass `{"50256": -100}` to prevent the `<|endoftext|>` token from being
* generated.
* @param {number} [params.logprobs] - The number of most likely output tokens to include the log
* probabilities of, as well the chosen tokens. For example, if `logprobs` is `5`, the API will
* return a list of the 5 most likely tokens. The API will always return the `logprob` of the
* sampled token, so there may be up to `logprobs+1` elements in the response. The maximum value
* for `logprobs` is `5`.
* @param {number} [params.maxTokens] - The maximum number of tokens that can be generated in the
* completion. The token count of your prompt plus `max_tokens` cannot exceed the model's
* context length.
* @param {JsonObject} [params.metadata] - Contains developer-defined tags and values used for
* filtering completions.
* @param {number} [params.n] - Specifies how many completions to generate for each prompt.
*
* Note: Because this parameter generates many completions, it can quickly consume your token
* quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and
* `stop`.
* @param {number} [params.presencePenalty] - A number between `-2.0` and `2.0`. Positive values
* penalize new tokens based on whether they appear in the text so far, increasing the model's
* likelihood to talk about new topics.
* @param {ModelRouter} [params.router] - Router is the model routing configuration for the
* request.
* @param {number} [params.seed] - The seed for the model request. If specified, OpenAI's system
* will make a best effort to sample deterministically, such that repeated requests with the
* same `seed` and parameters should return the same result.
*
* Determinism is not guaranteed, and you should refer to the `system_fingerprint` response
* parameter to monitor changes in the backend.
* @param {string[]} [params.stop] - Specifies up to 4 sequences where the API will stop
* generating further tokens.
* @param {boolean} [params.stream] - Indicates whether to stream back partial progress. If set,
* tokens will be sent as data-only [server-sent events][server-sent events] as they become
* available, with the stream terminated by a `data: [DONE]` message.
*
* [server-sent events]: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format.
* @param {StreamOptions} [params.streamOptions] - Options for streaming response. Only set this
* when you set `stream` to `true`.
* @param {string} [params.suffix] - Text that comes after a completion of inserted text. On
* OpenAI, this parameter is only supported for `gpt-3.5-turbo-instruct`.
* @param {number} [params.temperature] - Specifies what temperature to use for sample, between
* `0` and `2`. Higher values like `0.8` will make the output more random, while lower values
* like `0.2` will make it more focused and deterministic.
*
* Note: OpenAI generally recommends altering this or `top_p` but not both.
* @param {number} [params.topP] - An alternative to sampling with `temperature`, called nucleus
* sampling, where the model considers the results of the tokens with `top_p` probability mass.
* So `0.1` means only the tokens comprising the top 10% probability mass are considered.
*
* Note: OpenAI generally recommends altering this or `temperature` but not both.
* @param {string} [params.user] - A unique identifier representing your end-user, which can help
* Services to monitor and detect abuse.
* @param {boolean} [params.returnObject] - Flag that indicates return type. Set 'true' to return
* objects, 'false' to. Default: 'true'
* @param {AbortSignal} [params.signal] - Signal from AbortController
* @param {OutgoingHttpHeaders} [params.headers] - Custom request headers
* @returns {Promise<Response<CompletionsResponse> | Stream<string | TextCompletionStream>>}
* Stream of TextCompletionStream.
* @throws {Error} If validation fails or an error occurs during the request.
*/
create<S extends boolean | undefined = false, R extends boolean | undefined = undefined>(params: CreateCompletionsParams & {
stream?: S;
returnObject?: R;
}): Promise<CompletionResult<S, R>>;
}
//# sourceMappingURL=completions.d.mts.map