@ibm-cloud/watsonx-ai
Version:
IBM watsonx.ai Node.js SDK
151 lines • 7.27 kB
TypeScript
/**
* (C) Copyright IBM Corp. 2025-2026.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
import type { DefaultParams, JsonObject, CacheConfig } from "../../../types/common.js";
import type { StreamOptions } from "../gateway.js";
import type { ModelRouter } from "../models/response.js";
/** Parameters for the `completions.create` operation. */
export interface CreateCompletionsParams extends DefaultParams {
/** Model is the ID of the model to use. */
model: string;
/**
* Prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens,
* or array of token arrays.
*
* Note: `<|endoftext|>` is the document separator that the model sees during training, so if a
* prompt is not specified the model will generate as if from the beginning of a new document.
*/
prompt: string;
/**
* Generates `best_of` number of completions server-side and returns the "best" (the one with the
* highest log probability per token). Results cannot be streamed. When used with `n`, `best_of`
* controls the number of candidate completions and `n` specifies how many to return – `best_of`
* must be greater than `n`.
*
* Note: Because this parameter generates many completions, it can quickly consume your token
* quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`.
*/
bestOf?: number;
/** Caching configuration for the request. Cache is only supported for non-streaming requests. */
cache?: CacheConfig;
/** Echo back the prompt in addition to the completion. */
echo?: boolean;
/**
* A number between `-2.0` and `2.0`. Positive values penalize new tokens based on their existing
* frequency in the text so far, decreasing the model's likelihood to repeat the same line
* verbatim.
*/
frequencyPenalty?: number;
/**
* Used to modify the likelihood of specified tokens appearing in the completion. Accepts a JSON
* object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated
* bias value from -100 to 100. You can use this tokenizer tool to convert text to token IDs.
* Mathematically, the bias is added to the logits generated by the model prior to sampling.
*
* The exact effect will vary per model, but:
*
* - Values between `-1` and `1` should decrease or increase likelihood of selection and
* - Values like `-100` or `100` should result in a ban or exclusive selection of the relevant
* token.
*
* As an example, you can pass `{"50256": -100}` to prevent the `<|endoftext|>` token from being
* generated.
*/
logitBias?: JsonObject;
/**
* The number of most likely output tokens to include the log probabilities of, as well the chosen
* tokens. For example, if `logprobs` is `5`, the API will return a list of the 5 most likely
* tokens. The API will always return the `logprob` of the sampled token, so there may be up to
* `logprobs+1` elements in the response. The maximum value for `logprobs` is `5`.
*/
logprobs?: number;
/**
* The maximum number of tokens that can be generated in the completion. The token count of your
* prompt plus `max_tokens` cannot exceed the model's context length.
*/
maxTokens?: number;
/** Contains developer-defined tags and values used for filtering completions. */
metadata?: JsonObject;
/**
* Specifies how many completions to generate for each prompt.
*
* Note: Because this parameter generates many completions, it can quickly consume your token
* quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`.
*/
n?: number;
/**
* A number between `-2.0` and `2.0`. Positive values penalize new tokens based on whether they
* appear in the text so far, increasing the model's likelihood to talk about new topics.
*/
presencePenalty?: number;
/** Router is the model routing configuration for the request. */
router?: ModelRouter;
/**
* The seed for the model request. If specified, OpenAI's system will make a best effort to sample
* deterministically, such that repeated requests with the same `seed` and parameters should
* return the same result.
*
* Determinism is not guaranteed, and you should refer to the `system_fingerprint` response
* parameter to monitor changes in the backend.
*/
seed?: number;
/** Specifies up to 4 sequences where the API will stop generating further tokens. */
stop?: string[];
/**
* Indicates whether to stream back partial progress. If set, tokens will be sent as data-only
* [server-sent events][server-sent events] as they become available, with the stream terminated
* by a `data: [DONE]` message.
*
* [server-sent events]: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format.
*/
stream?: boolean;
/** Options for streaming response. Only set this when you set `stream` to `true`. */
streamOptions?: StreamOptions;
/**
* Text that comes after a completion of inserted text. On OpenAI, this parameter is only
* supported for `gpt-3.5-turbo-instruct`.
*/
suffix?: string;
/**
* Specifies what temperature to use for sample, between `0` and `2`. Higher values like `0.8`
* will make the output more random, while lower values like `0.2` will make it more focused and
* deterministic.
*
* Note: OpenAI generally recommends altering this or `top_p` but not both.
*/
temperature?: number;
/**
* An alternative to sampling with `temperature`, called nucleus sampling, where the model
* considers the results of the tokens with `top_p` probability mass. So `0.1` means only the
* tokens comprising the top 10% probability mass are considered.
*
* Note: OpenAI generally recommends altering this or `temperature` but not both.
*/
topP?: number;
/**
* A unique identifier representing your end-user, which can help Services to monitor and detect
* abuse.
*/
user?: string;
}
/** Parameters for the `completions.create` operation without stream. */
export interface CreateBasicCompletionsParams extends CreateCompletionsParams {
stream?: false;
}
/** Parameters for the `completions.create` operation with stream. */
export interface CreateStreamCompletionsParams extends CreateCompletionsParams {
stream: true;
/** Indicates return type of stream chunks */
returnObject?: boolean;
}
//# sourceMappingURL=request.d.ts.map