sambanova

// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. import { APIResource } from '../core/resource'; import * as CompletionsAPI from './completions'; import { APIPromise } from '../core/api-promise'; import { Stream } from '../core/streaming'; import { RequestOptions } from '../internal/request-options'; export class Completions extends APIResource { /** * Create completion * * @example * ```ts * const completion = await client.completions.create({ * model: 'string', * prompt: * '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\ncreate a poem using palindromes<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n', * }); * ``` */ create( body: CompletionCreateParamsNonStreaming, options?: RequestOptions, ): APIPromise<CompletionCreateResponse>; create( body: CompletionCreateParamsStreaming, options?: RequestOptions, ): APIPromise<Stream<CompletionStreamResponse>>; create( body: CompletionCreateParamsBase, options?: RequestOptions, ): APIPromise<Stream<CompletionStreamResponse> | CompletionCreateResponse>; create( body: CompletionCreateParams, options?: RequestOptions, ): APIPromise<CompletionCreateResponse> | APIPromise<Stream<CompletionStreamResponse>> { return this._client.post('/completions', { body, ...options, stream: body.stream ?? false }) as | APIPromise<CompletionCreateResponse> | APIPromise<Stream<CompletionStreamResponse>>; } } /** * ompletion response returned by the model */ export interface CompletionResponse { /** * A unique identifier for the chat completion. */ id: string; choices: Array<CompletionResponse.Choice>; /** * The Unix timestamp (in seconds) of when the chat completion was created. */ created: number; /** * The model used for the chat completion. */ model: string; /** * The object type, always `chat.completion`. */ object: 'chat.completion'; /** * Backend configuration that the model runs with. */ system_fingerprint: string; /** * Usage metrics for the completion, embeddings,transcription or translation * request */ usage: CompletionResponse.Usage | null; } export namespace CompletionResponse { export interface Choice { /** * The reason the model stopped generating tokens. Will be `stop` if the model hit * a natural stop point or a provided stop sequence, `length` if the maximum number * of tokens specified in the request was reached, `tool_calls` if the model called * a tool. */ finish_reason: 'stop' | 'length' | 'tool_calls'; /** * The index of the choice in the list of choices */ index: number; /** * model response */ text: string; /** * Completion Log Probs object */ logprobs?: Choice.Logprobs | null; message?: Choice.Message; [k: string]: unknown; } export namespace Choice { /** * Completion Log Probs object */ export interface Logprobs { content: Logprobs.Content; [k: string]: unknown; } export namespace Logprobs { export interface Content { token: string; logprob: number; top_logprobs: Content.TopLogprobs; bytes?: Array<number> | null; [k: string]: unknown; } export namespace Content { export interface TopLogprobs { token: string; logprob: number; bytes?: Array<number> | null; [k: string]: unknown; } } } export interface Message { /** * The contents of the assistant message. */ content: string | null; /** * The role of the messages author */ role: 'assistant' | 'user' | 'system' | 'tool'; /** * The tool calls generated by the model. */ tool_calls?: Array<Message.ToolCall> | null; [k: string]: unknown; } export namespace Message { export interface ToolCall { /** * ID of the tool call. */ id: string; /** * The tool that the model called. */ function: ToolCall.Function; /** * type of the tool cal. only `function` is supported. */ type: 'function'; /** * index of tool call chunk only used when using streaming */ index?: number | null; [k: string]: unknown; } export namespace ToolCall { /** * The tool that the model called. */ export interface Function { /** * The arguments to call the function with, as generated by the model in JSON * format. Note that the model does not always generate valid JSON, and may * hallucinate parameters not defined by your function schema. Validate the * arguments in your code before calling your function. */ arguments: string; /** * The name of the function to call. */ name: string; [k: string]: unknown; } } } } /** * Usage metrics for the completion, embeddings,transcription or translation * request */ export interface Usage { /** * acceptance rate */ acceptance_rate?: number; /** * number of tokens generated in completion */ completion_tokens?: number; /** * completion tokens per second after first token generation */ completion_tokens_after_first_per_sec?: number; /** * completion tokens per second after first token generation first ten */ completion_tokens_after_first_per_sec_first_ten?: number; /** * completion tokens per second after first token generation */ completion_tokens_after_first_per_sec_graph?: number; /** * completion tokens per second */ completion_tokens_per_sec?: number; /** * The Unix timestamp (in seconds) of when the generation finished. */ end_time?: number; /** * whether or not is last response, always true for non streaming response */ is_last_response?: true; /** * number of tokens used in the prompt sent */ prompt_tokens?: number; /** * Extra tokens details */ prompt_tokens_details?: Usage.PromptTokensDetails; /** * The Unix timestamp (in seconds) of when the generation started. */ start_time?: number; /** * also TTF, time (in seconds) taken to generate the first token */ time_to_first_token?: number; /** * total time (in seconds) taken to generate the full generation */ total_latency?: number; /** * prompt tokens + completion tokens */ total_tokens?: number; /** * tokens per second including prompt and completion */ total_tokens_per_sec?: number; [k: string]: unknown; } export namespace Usage { /** * Extra tokens details */ export interface PromptTokensDetails { /** * amount of cached tokens */ cached_tokens?: number; [k: string]: unknown; } } } /** * streamed chunk of a completion response returned by the model */ export interface CompletionStreamResponse { /** * A unique identifier for the chat completion. */ id: string; /** * A list of chat completion choices. */ choices: Array<CompletionStreamResponse.Choice> | null; /** * The Unix timestamp (in seconds) of when the chat completion was created. */ created: number; /** * The model used for the chat completion. */ model: string; /** * The object type, always `chat.completion.chunk`. */ object: 'chat.completion.chunk'; /** * Backend configuration that the model runs with. */ system_fingerprint: string; /** * Usage metrics for the completion, embeddings,transcription or translation * request */ usage?: CompletionStreamResponse.Usage | null; [k: string]: unknown; } export namespace CompletionStreamResponse { export interface Choice { /** * model generation response */ text: string; /** * The reason the model stopped generating tokens. Will be `stop` if the model hit * a natural stop point or a provided stop sequence, `length` if the maximum number * of tokens specified in the request was reached, `tool_calls` if the model called * a tool. */ finish_reason?: 'stop' | 'length' | null; /** * The index of the choice in the list of choices */ index?: number; /** * Completion Log Probs object */ logprobs?: Choice.Logprobs | null; [k: string]: unknown; } export namespace Choice { /** * Completion Log Probs object */ export interface Logprobs { content: Logprobs.Content; [k: string]: unknown; } export namespace Logprobs { export interface Content { token: string; logprob: number; top_logprobs: Content.TopLogprobs; bytes?: Array<number> | null; [k: string]: unknown; } export namespace Content { export interface TopLogprobs { token: string; logprob: number; bytes?: Array<number> | null; [k: string]: unknown; } } } } /** * Usage metrics for the completion, embeddings,transcription or translation * request */ export interface Usage { /** * acceptance rate */ acceptance_rate?: number; /** * number of tokens generated in completion */ completion_tokens?: number; /** * completion tokens per second after first token generation */ completion_tokens_after_first_per_sec?: number; /** * completion tokens per second after first token generation first ten */ completion_tokens_after_first_per_sec_first_ten?: number; /** * completion tokens per second after first token generation */ completion_tokens_after_first_per_sec_graph?: number; /** * completion tokens per second */ completion_tokens_per_sec?: number; /** * The Unix timestamp (in seconds) of when the generation finished. */ end_time?: number; /** * whether or not is last response, always true for non streaming response */ is_last_response?: true; /** * number of tokens used in the prompt sent */ prompt_tokens?: number; /** * Extra tokens details */ prompt_tokens_details?: Usage.PromptTokensDetails; /** * The Unix timestamp (in seconds) of when the generation started. */ start_time?: number; /** * also TTF, time (in seconds) taken to generate the first token */ time_to_first_token?: number; /** * total time (in seconds) taken to generate the full generation */ total_latency?: number; /** * prompt tokens + completion tokens */ total_tokens?: number; /** * tokens per second including prompt and completion */ total_tokens_per_sec?: number; [k: string]: unknown; } export namespace Usage { /** * Extra tokens details */ export interface PromptTokensDetails { /** * amount of cached tokens */ cached_tokens?: number; [k: string]: unknown; } } } /** * ompletion response returned by the model */ export type CompletionCreateResponse = CompletionResponse | CompletionStreamResponse; export type CompletionCreateParams = CompletionCreateParamsNonStreaming | CompletionCreateParamsStreaming; export interface CompletionCreateParamsBase { /** * The model ID to use (e.g. Meta-Llama-3.3-70B-Instruct). See available * [models](https://docs.sambanova.ai/cloud/docs/get-started/supported-models) */ model: | (string & {}) | 'Meta-Llama-3.3-70B-Instruct' | 'Meta-Llama-3.2-1B-Instruct' | 'Meta-Llama-3.2-3B-Instruct' | 'Llama-3.2-11B-Vision-Instruct' | 'Llama-3.2-90B-Vision-Instruct' | 'Meta-Llama-3.1-8B-Instruct' | 'Meta-Llama-3.1-70B-Instruct' | 'Meta-Llama-3.1-405B-Instruct' | 'Qwen2.5-Coder-32B-Instruct' | 'Qwen2.5-72B-Instruct' | 'QwQ-32B-Preview' | 'Meta-Llama-Guard-3-8B' | 'DeepSeek-R1' | 'DeepSeek-R1-0528' | 'DeepSeek-V3-0324' | 'DeepSeek-V3.1' | 'DeepSeek-V3.1-Terminus' | 'DeepSeek-R1-Distill-Llama-70B' | 'Llama-4-Maverick-17B-128E-Instruct' | 'Llama-4-Scout-17B-16E-Instruct' | 'Qwen3-32B' | 'Llama-3.3-Swallow-70B-Instruct-v0.4' | 'gpt-oss-120b' | 'ALLaM-7B-Instruct-preview'; /** * Prompt to send to the model. */ prompt: string; /** * If true, sampling is enabled during output generation. If false, deterministic * decoding is used. */ do_sample?: boolean | null; /** * Number between -2.0 and 2.0. Positive values penalize new tokens based on their * existing frequency in the text so far, decreasing the model's likelihood to * repeat the same line verbatim. */ frequency_penalty?: number; /** * This is not yet supported by our models. Modify the likelihood of specified * tokens appearing in the completion. */ logit_bias?: { [key: string]: number } | null; /** * This is not yet supported by our models. Whether to return log probabilities of * the output tokens or not. If true, returns the log probabilities of each output * token returned in the `content` of `message`. */ logprobs?: boolean | null; /** * The maximum number of tokens that can be generated in the chat completion. The * total length of input tokens and generated tokens is limited by the model's * context length. */ max_completion_tokens?: number | null; /** * The maximum number of tokens that can be generated in the chat completion. The * total length of input tokens and generated tokens is limited by the model's * context length. */ max_tokens?: number | null; /** * This is not yet supported by our models. How many chat completion choices to * generate for each input message. */ n?: number | null; /** * Number between -2.0 and 2.0. Positive values penalize new tokens based on * whether they appear in the text so far, increasing the model's likelihood to * talk about new topics. */ presence_penalty?: number | null; /** * This is not yet supported by our models. */ seed?: number | null; /** * Sequences where the API will stop generating tokens. The returned text will not * contain the stop sequence. */ stop?: string | null | Array<string>; /** * If set, partial message deltas will be sent. Tokens will be sent as data-only * [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) * as they become available, with the stream terminated by a `data: [DONE]` * message. */ stream?: boolean | null; /** * Options for streaming response. Only set this when setting stream as true */ stream_options?: CompletionCreateParams.StreamOptions | null; /** * What sampling temperature to use, determines the degree of randomness in the * response. between 0 and 2, Higher values like 0.8 will make the output more * random, while lower values like 0.2 will make it more focused and deterministic. * Is recommended altering this, top_p or top_k but not more than one of these. */ temperature?: number | null; /** * Amount limit of token choices. An alternative to sampling with temperature, the * model considers the results of the first K tokens with higher probability. So 10 * means only the first 10 tokens with higher probability are considered. Is * recommended altering this, top_p or temperature but not more than one of these. */ top_k?: number | null; /** * This is not yet supported by our models. An integer between 0 and 20 specifying * the number of most likely tokens to return at each token position, each with an * associated log probability. `logprobs` must be set to `true` if this parameter * is used. */ top_logprobs?: number | null; /** * Cumulative probability for token choices. An alternative to sampling with * temperature, called nucleus sampling, where the model considers the results of * the tokens with top_p probability mass. So 0.1 means only the tokens comprising * the top 10% probability mass are considered. Is recommended altering this, top_k * or temperature but not more than one of these. */ top_p?: number | null; [k: string]: unknown; } export namespace CompletionCreateParams { /** * Options for streaming response. Only set this when setting stream as true */ export interface StreamOptions { /** * Whether to include the usage metrics in a final chunk or not */ include_usage?: boolean | null; [k: string]: unknown; } export type CompletionCreateParamsNonStreaming = CompletionsAPI.CompletionCreateParamsNonStreaming; export type CompletionCreateParamsStreaming = CompletionsAPI.CompletionCreateParamsStreaming; } export interface CompletionCreateParamsNonStreaming extends CompletionCreateParamsBase { /** * If set, partial message deltas will be sent. Tokens will be sent as data-only * [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) * as they become available, with the stream terminated by a `data: [DONE]` * message. */ stream?: false | null; [k: string]: unknown; } export interface CompletionCreateParamsStreaming extends CompletionCreateParamsBase { /** * If set, partial message deltas will be sent. Tokens will be sent as data-only * [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) * as they become available, with the stream terminated by a `data: [DONE]` * message. */ stream: true; [k: string]: unknown; } export declare namespace Completions { export { type CompletionResponse as CompletionResponse, type CompletionStreamResponse as CompletionStreamResponse, type CompletionCreateResponse as CompletionCreateResponse, type CompletionCreateParams as CompletionCreateParams, type CompletionCreateParamsNonStreaming as CompletionCreateParamsNonStreaming, type CompletionCreateParamsStreaming as CompletionCreateParamsStreaming, }; }