@ibm-cloud/watsonx-ai

/** * (C) Copyright IBM Corp. 2025-2026. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except * in compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under * the License. */ import type { DefaultParams, JsonObject, CacheConfig } from "../../../types/common.js"; import type { StreamOptions } from "../gateway.js"; import type { ModelRouter } from "../models/response.js"; /** Parameters for the `completions.create` operation. */ export interface CreateCompletionsParams extends DefaultParams { /** Model is the ID of the model to use. */ model: string; /** * Prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens, * or array of token arrays. * * Note: `<|endoftext|>` is the document separator that the model sees during training, so if a * prompt is not specified the model will generate as if from the beginning of a new document. */ prompt: string; /** * Generates `best_of` number of completions server-side and returns the "best" (the one with the * highest log probability per token). Results cannot be streamed. When used with `n`, `best_of` * controls the number of candidate completions and `n` specifies how many to return – `best_of` * must be greater than `n`. * * Note: Because this parameter generates many completions, it can quickly consume your token * quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`. */ bestOf?: number; /** Caching configuration for the request. Cache is only supported for non-streaming requests. */ cache?: CacheConfig; /** Echo back the prompt in addition to the completion. */ echo?: boolean; /** * A number between `-2.0` and `2.0`. Positive values penalize new tokens based on their existing * frequency in the text so far, decreasing the model's likelihood to repeat the same line * verbatim. */ frequencyPenalty?: number; /** * Used to modify the likelihood of specified tokens appearing in the completion. Accepts a JSON * object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated * bias value from -100 to 100. You can use this tokenizer tool to convert text to token IDs. * Mathematically, the bias is added to the logits generated by the model prior to sampling. * * The exact effect will vary per model, but: * * - Values between `-1` and `1` should decrease or increase likelihood of selection and * - Values like `-100` or `100` should result in a ban or exclusive selection of the relevant * token. * * As an example, you can pass `{"50256": -100}` to prevent the `<|endoftext|>` token from being * generated. */ logitBias?: JsonObject; /** * The number of most likely output tokens to include the log probabilities of, as well the chosen * tokens. For example, if `logprobs` is `5`, the API will return a list of the 5 most likely * tokens. The API will always return the `logprob` of the sampled token, so there may be up to * `logprobs+1` elements in the response. The maximum value for `logprobs` is `5`. */ logprobs?: number; /** * The maximum number of tokens that can be generated in the completion. The token count of your * prompt plus `max_tokens` cannot exceed the model's context length. */ maxTokens?: number; /** Contains developer-defined tags and values used for filtering completions. */ metadata?: JsonObject; /** * Specifies how many completions to generate for each prompt. * * Note: Because this parameter generates many completions, it can quickly consume your token * quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`. */ n?: number; /** * A number between `-2.0` and `2.0`. Positive values penalize new tokens based on whether they * appear in the text so far, increasing the model's likelihood to talk about new topics. */ presencePenalty?: number; /** Router is the model routing configuration for the request. */ router?: ModelRouter; /** * The seed for the model request. If specified, OpenAI's system will make a best effort to sample * deterministically, such that repeated requests with the same `seed` and parameters should * return the same result. * * Determinism is not guaranteed, and you should refer to the `system_fingerprint` response * parameter to monitor changes in the backend. */ seed?: number; /** Specifies up to 4 sequences where the API will stop generating further tokens. */ stop?: string[]; /** * Indicates whether to stream back partial progress. If set, tokens will be sent as data-only * [server-sent events][server-sent events] as they become available, with the stream terminated * by a `data: [DONE]` message. * * [server-sent events]: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format. */ stream?: boolean; /** Options for streaming response. Only set this when you set `stream` to `true`. */ streamOptions?: StreamOptions; /** * Text that comes after a completion of inserted text. On OpenAI, this parameter is only * supported for `gpt-3.5-turbo-instruct`. */ suffix?: string; /** * Specifies what temperature to use for sample, between `0` and `2`. Higher values like `0.8` * will make the output more random, while lower values like `0.2` will make it more focused and * deterministic. * * Note: OpenAI generally recommends altering this or `top_p` but not both. */ temperature?: number; /** * An alternative to sampling with `temperature`, called nucleus sampling, where the model * considers the results of the tokens with `top_p` probability mass. So `0.1` means only the * tokens comprising the top 10% probability mass are considered. * * Note: OpenAI generally recommends altering this or `temperature` but not both. */ topP?: number; /** * A unique identifier representing your end-user, which can help Services to monitor and detect * abuse. */ user?: string; } /** Parameters for the `completions.create` operation without stream. */ export interface CreateBasicCompletionsParams extends CreateCompletionsParams { stream?: false; } /** Parameters for the `completions.create` operation with stream. */ export interface CreateStreamCompletionsParams extends CreateCompletionsParams { stream: true; /** Indicates return type of stream chunks */ returnObject?: boolean; } //# sourceMappingURL=request.d.ts.map