sambanova
Version:
The official TypeScript library for the Samba Nova API
728 lines (600 loc) • 18.8 kB
text/typescript
// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
import { APIResource } from '../core/resource';
import * as CompletionsAPI from './completions';
import { APIPromise } from '../core/api-promise';
import { Stream } from '../core/streaming';
import { RequestOptions } from '../internal/request-options';
export class Completions extends APIResource {
/**
* Create completion
*
* @example
* ```ts
* const completion = await client.completions.create({
* model: 'string',
* prompt:
* '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\ncreate a poem using palindromes<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n',
* });
* ```
*/
create(
body: CompletionCreateParamsNonStreaming,
options?: RequestOptions,
): APIPromise<CompletionCreateResponse>;
create(
body: CompletionCreateParamsStreaming,
options?: RequestOptions,
): APIPromise<Stream<CompletionStreamResponse>>;
create(
body: CompletionCreateParamsBase,
options?: RequestOptions,
): APIPromise<Stream<CompletionStreamResponse> | CompletionCreateResponse>;
create(
body: CompletionCreateParams,
options?: RequestOptions,
): APIPromise<CompletionCreateResponse> | APIPromise<Stream<CompletionStreamResponse>> {
return this._client.post('/completions', { body, ...options, stream: body.stream ?? false }) as
| APIPromise<CompletionCreateResponse>
| APIPromise<Stream<CompletionStreamResponse>>;
}
}
/**
* ompletion response returned by the model
*/
export interface CompletionResponse {
/**
* A unique identifier for the chat completion.
*/
id: string;
choices: Array<CompletionResponse.Choice>;
/**
* The Unix timestamp (in seconds) of when the chat completion was created.
*/
created: number;
/**
* The model used for the chat completion.
*/
model: string;
/**
* The object type, always `chat.completion`.
*/
object: 'chat.completion';
/**
* Backend configuration that the model runs with.
*/
system_fingerprint: string;
/**
* Usage metrics for the completion, embeddings,transcription or translation
* request
*/
usage: CompletionResponse.Usage | null;
}
export namespace CompletionResponse {
export interface Choice {
/**
* The reason the model stopped generating tokens. Will be `stop` if the model hit
* a natural stop point or a provided stop sequence, `length` if the maximum number
* of tokens specified in the request was reached, `tool_calls` if the model called
* a tool.
*/
finish_reason: 'stop' | 'length' | 'tool_calls';
/**
* The index of the choice in the list of choices
*/
index: number;
/**
* model response
*/
text: string;
/**
* Completion Log Probs object
*/
logprobs?: Choice.Logprobs | null;
message?: Choice.Message;
[]: unknown;
}
export namespace Choice {
/**
* Completion Log Probs object
*/
export interface Logprobs {
content: Logprobs.Content;
[]: unknown;
}
export namespace Logprobs {
export interface Content {
token: string;
logprob: number;
top_logprobs: Content.TopLogprobs;
bytes?: Array<number> | null;
[]: unknown;
}
export namespace Content {
export interface TopLogprobs {
token: string;
logprob: number;
bytes?: Array<number> | null;
[]: unknown;
}
}
}
export interface Message {
/**
* The contents of the assistant message.
*/
content: string | null;
/**
* The role of the messages author
*/
role: 'assistant' | 'user' | 'system' | 'tool';
/**
* The tool calls generated by the model.
*/
tool_calls?: Array<Message.ToolCall> | null;
[]: unknown;
}
export namespace Message {
export interface ToolCall {
/**
* ID of the tool call.
*/
id: string;
/**
* The tool that the model called.
*/
function: ToolCall.Function;
/**
* type of the tool cal. only `function` is supported.
*/
type: 'function';
/**
* index of tool call chunk only used when using streaming
*/
index?: number | null;
[]: unknown;
}
export namespace ToolCall {
/**
* The tool that the model called.
*/
export interface Function {
/**
* The arguments to call the function with, as generated by the model in JSON
* format. Note that the model does not always generate valid JSON, and may
* hallucinate parameters not defined by your function schema. Validate the
* arguments in your code before calling your function.
*/
arguments: string;
/**
* The name of the function to call.
*/
name: string;
[]: unknown;
}
}
}
}
/**
* Usage metrics for the completion, embeddings,transcription or translation
* request
*/
export interface Usage {
/**
* acceptance rate
*/
acceptance_rate?: number;
/**
* number of tokens generated in completion
*/
completion_tokens?: number;
/**
* completion tokens per second after first token generation
*/
completion_tokens_after_first_per_sec?: number;
/**
* completion tokens per second after first token generation first ten
*/
completion_tokens_after_first_per_sec_first_ten?: number;
/**
* completion tokens per second after first token generation
*/
completion_tokens_after_first_per_sec_graph?: number;
/**
* completion tokens per second
*/
completion_tokens_per_sec?: number;
/**
* The Unix timestamp (in seconds) of when the generation finished.
*/
end_time?: number;
/**
* whether or not is last response, always true for non streaming response
*/
is_last_response?: true;
/**
* number of tokens used in the prompt sent
*/
prompt_tokens?: number;
/**
* Extra tokens details
*/
prompt_tokens_details?: Usage.PromptTokensDetails;
/**
* The Unix timestamp (in seconds) of when the generation started.
*/
start_time?: number;
/**
* also TTF, time (in seconds) taken to generate the first token
*/
time_to_first_token?: number;
/**
* total time (in seconds) taken to generate the full generation
*/
total_latency?: number;
/**
* prompt tokens + completion tokens
*/
total_tokens?: number;
/**
* tokens per second including prompt and completion
*/
total_tokens_per_sec?: number;
[]: unknown;
}
export namespace Usage {
/**
* Extra tokens details
*/
export interface PromptTokensDetails {
/**
* amount of cached tokens
*/
cached_tokens?: number;
[]: unknown;
}
}
}
/**
* streamed chunk of a completion response returned by the model
*/
export interface CompletionStreamResponse {
/**
* A unique identifier for the chat completion.
*/
id: string;
/**
* A list of chat completion choices.
*/
choices: Array<CompletionStreamResponse.Choice> | null;
/**
* The Unix timestamp (in seconds) of when the chat completion was created.
*/
created: number;
/**
* The model used for the chat completion.
*/
model: string;
/**
* The object type, always `chat.completion.chunk`.
*/
object: 'chat.completion.chunk';
/**
* Backend configuration that the model runs with.
*/
system_fingerprint: string;
/**
* Usage metrics for the completion, embeddings,transcription or translation
* request
*/
usage?: CompletionStreamResponse.Usage | null;
[]: unknown;
}
export namespace CompletionStreamResponse {
export interface Choice {
/**
* model generation response
*/
text: string;
/**
* The reason the model stopped generating tokens. Will be `stop` if the model hit
* a natural stop point or a provided stop sequence, `length` if the maximum number
* of tokens specified in the request was reached, `tool_calls` if the model called
* a tool.
*/
finish_reason?: 'stop' | 'length' | null;
/**
* The index of the choice in the list of choices
*/
index?: number;
/**
* Completion Log Probs object
*/
logprobs?: Choice.Logprobs | null;
[]: unknown;
}
export namespace Choice {
/**
* Completion Log Probs object
*/
export interface Logprobs {
content: Logprobs.Content;
[]: unknown;
}
export namespace Logprobs {
export interface Content {
token: string;
logprob: number;
top_logprobs: Content.TopLogprobs;
bytes?: Array<number> | null;
[]: unknown;
}
export namespace Content {
export interface TopLogprobs {
token: string;
logprob: number;
bytes?: Array<number> | null;
[]: unknown;
}
}
}
}
/**
* Usage metrics for the completion, embeddings,transcription or translation
* request
*/
export interface Usage {
/**
* acceptance rate
*/
acceptance_rate?: number;
/**
* number of tokens generated in completion
*/
completion_tokens?: number;
/**
* completion tokens per second after first token generation
*/
completion_tokens_after_first_per_sec?: number;
/**
* completion tokens per second after first token generation first ten
*/
completion_tokens_after_first_per_sec_first_ten?: number;
/**
* completion tokens per second after first token generation
*/
completion_tokens_after_first_per_sec_graph?: number;
/**
* completion tokens per second
*/
completion_tokens_per_sec?: number;
/**
* The Unix timestamp (in seconds) of when the generation finished.
*/
end_time?: number;
/**
* whether or not is last response, always true for non streaming response
*/
is_last_response?: true;
/**
* number of tokens used in the prompt sent
*/
prompt_tokens?: number;
/**
* Extra tokens details
*/
prompt_tokens_details?: Usage.PromptTokensDetails;
/**
* The Unix timestamp (in seconds) of when the generation started.
*/
start_time?: number;
/**
* also TTF, time (in seconds) taken to generate the first token
*/
time_to_first_token?: number;
/**
* total time (in seconds) taken to generate the full generation
*/
total_latency?: number;
/**
* prompt tokens + completion tokens
*/
total_tokens?: number;
/**
* tokens per second including prompt and completion
*/
total_tokens_per_sec?: number;
[]: unknown;
}
export namespace Usage {
/**
* Extra tokens details
*/
export interface PromptTokensDetails {
/**
* amount of cached tokens
*/
cached_tokens?: number;
[]: unknown;
}
}
}
/**
* ompletion response returned by the model
*/
export type CompletionCreateResponse = CompletionResponse | CompletionStreamResponse;
export type CompletionCreateParams = CompletionCreateParamsNonStreaming | CompletionCreateParamsStreaming;
export interface CompletionCreateParamsBase {
/**
* The model ID to use (e.g. Meta-Llama-3.3-70B-Instruct). See available
* [models](https://docs.sambanova.ai/cloud/docs/get-started/supported-models)
*/
model:
| (string & {})
| 'Meta-Llama-3.3-70B-Instruct'
| 'Meta-Llama-3.2-1B-Instruct'
| 'Meta-Llama-3.2-3B-Instruct'
| 'Llama-3.2-11B-Vision-Instruct'
| 'Llama-3.2-90B-Vision-Instruct'
| 'Meta-Llama-3.1-8B-Instruct'
| 'Meta-Llama-3.1-70B-Instruct'
| 'Meta-Llama-3.1-405B-Instruct'
| 'Qwen2.5-Coder-32B-Instruct'
| 'Qwen2.5-72B-Instruct'
| 'QwQ-32B-Preview'
| 'Meta-Llama-Guard-3-8B'
| 'DeepSeek-R1'
| 'DeepSeek-R1-0528'
| 'DeepSeek-V3-0324'
| 'DeepSeek-V3.1'
| 'DeepSeek-V3.1-Terminus'
| 'DeepSeek-R1-Distill-Llama-70B'
| 'Llama-4-Maverick-17B-128E-Instruct'
| 'Llama-4-Scout-17B-16E-Instruct'
| 'Qwen3-32B'
| 'Llama-3.3-Swallow-70B-Instruct-v0.4'
| 'gpt-oss-120b'
| 'ALLaM-7B-Instruct-preview';
/**
* Prompt to send to the model.
*/
prompt: string;
/**
* If true, sampling is enabled during output generation. If false, deterministic
* decoding is used.
*/
do_sample?: boolean | null;
/**
* Number between -2.0 and 2.0. Positive values penalize new tokens based on their
* existing frequency in the text so far, decreasing the model's likelihood to
* repeat the same line verbatim.
*/
frequency_penalty?: number;
/**
* This is not yet supported by our models. Modify the likelihood of specified
* tokens appearing in the completion.
*/
logit_bias?: { [key: string]: number } | null;
/**
* This is not yet supported by our models. Whether to return log probabilities of
* the output tokens or not. If true, returns the log probabilities of each output
* token returned in the `content` of `message`.
*/
logprobs?: boolean | null;
/**
* The maximum number of tokens that can be generated in the chat completion. The
* total length of input tokens and generated tokens is limited by the model's
* context length.
*/
max_completion_tokens?: number | null;
/**
* The maximum number of tokens that can be generated in the chat completion. The
* total length of input tokens and generated tokens is limited by the model's
* context length.
*/
max_tokens?: number | null;
/**
* This is not yet supported by our models. How many chat completion choices to
* generate for each input message.
*/
n?: number | null;
/**
* Number between -2.0 and 2.0. Positive values penalize new tokens based on
* whether they appear in the text so far, increasing the model's likelihood to
* talk about new topics.
*/
presence_penalty?: number | null;
/**
* This is not yet supported by our models.
*/
seed?: number | null;
/**
* Sequences where the API will stop generating tokens. The returned text will not
* contain the stop sequence.
*/
stop?: string | null | Array<string>;
/**
* If set, partial message deltas will be sent. Tokens will be sent as data-only
* [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
* as they become available, with the stream terminated by a `data: [DONE]`
* message.
*/
stream?: boolean | null;
/**
* Options for streaming response. Only set this when setting stream as true
*/
stream_options?: CompletionCreateParams.StreamOptions | null;
/**
* What sampling temperature to use, determines the degree of randomness in the
* response. between 0 and 2, Higher values like 0.8 will make the output more
* random, while lower values like 0.2 will make it more focused and deterministic.
* Is recommended altering this, top_p or top_k but not more than one of these.
*/
temperature?: number | null;
/**
* Amount limit of token choices. An alternative to sampling with temperature, the
* model considers the results of the first K tokens with higher probability. So 10
* means only the first 10 tokens with higher probability are considered. Is
* recommended altering this, top_p or temperature but not more than one of these.
*/
top_k?: number | null;
/**
* This is not yet supported by our models. An integer between 0 and 20 specifying
* the number of most likely tokens to return at each token position, each with an
* associated log probability. `logprobs` must be set to `true` if this parameter
* is used.
*/
top_logprobs?: number | null;
/**
* Cumulative probability for token choices. An alternative to sampling with
* temperature, called nucleus sampling, where the model considers the results of
* the tokens with top_p probability mass. So 0.1 means only the tokens comprising
* the top 10% probability mass are considered. Is recommended altering this, top_k
* or temperature but not more than one of these.
*/
top_p?: number | null;
[]: unknown;
}
export namespace CompletionCreateParams {
/**
* Options for streaming response. Only set this when setting stream as true
*/
export interface StreamOptions {
/**
* Whether to include the usage metrics in a final chunk or not
*/
include_usage?: boolean | null;
[]: unknown;
}
export type CompletionCreateParamsNonStreaming = CompletionsAPI.CompletionCreateParamsNonStreaming;
export type CompletionCreateParamsStreaming = CompletionsAPI.CompletionCreateParamsStreaming;
}
export interface CompletionCreateParamsNonStreaming extends CompletionCreateParamsBase {
/**
* If set, partial message deltas will be sent. Tokens will be sent as data-only
* [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
* as they become available, with the stream terminated by a `data: [DONE]`
* message.
*/
stream?: false | null;
[]: unknown;
}
export interface CompletionCreateParamsStreaming extends CompletionCreateParamsBase {
/**
* If set, partial message deltas will be sent. Tokens will be sent as data-only
* [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
* as they become available, with the stream terminated by a `data: [DONE]`
* message.
*/
stream: true;
[]: unknown;
}
export declare namespace Completions {
export {
type CompletionResponse as CompletionResponse,
type CompletionStreamResponse as CompletionStreamResponse,
type CompletionCreateResponse as CompletionCreateResponse,
type CompletionCreateParams as CompletionCreateParams,
type CompletionCreateParamsNonStreaming as CompletionCreateParamsNonStreaming,
type CompletionCreateParamsStreaming as CompletionCreateParamsStreaming,
};
}