sambanova
Version:
The official TypeScript library for the Samba Nova API
553 lines • 19.9 kB
text/typescript
import { APIResource } from "../core/resource.mjs";
import * as CompletionsAPI from "./completions.mjs";
import { APIPromise } from "../core/api-promise.mjs";
import { Stream } from "../core/streaming.mjs";
import { RequestOptions } from "../internal/request-options.mjs";
export declare class Completions extends APIResource {
/**
* Create completion
*
* @example
* ```ts
* const completion = await client.completions.create({
* model: 'string',
* prompt:
* '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\ncreate a poem using palindromes<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n',
* });
* ```
*/
create(body: CompletionCreateParamsNonStreaming, options?: RequestOptions): APIPromise<CompletionCreateResponse>;
create(body: CompletionCreateParamsStreaming, options?: RequestOptions): APIPromise<Stream<CompletionStreamResponse>>;
create(body: CompletionCreateParamsBase, options?: RequestOptions): APIPromise<Stream<CompletionStreamResponse> | CompletionCreateResponse>;
}
/**
* ompletion response returned by the model
*/
export interface CompletionResponse {
/**
* A unique identifier for the chat completion.
*/
id: string;
choices: Array<CompletionResponse.Choice>;
/**
* The Unix timestamp (in seconds) of when the chat completion was created.
*/
created: number;
/**
* The model used for the chat completion.
*/
model: string;
/**
* The object type, always `chat.completion`.
*/
object: 'chat.completion';
/**
* Backend configuration that the model runs with.
*/
system_fingerprint: string;
/**
* Usage metrics for the completion, embeddings,transcription or translation
* request
*/
usage: CompletionResponse.Usage | null;
}
export declare namespace CompletionResponse {
interface Choice {
/**
* The reason the model stopped generating tokens. Will be `stop` if the model hit
* a natural stop point or a provided stop sequence, `length` if the maximum number
* of tokens specified in the request was reached, `tool_calls` if the model called
* a tool.
*/
finish_reason: 'stop' | 'length' | 'tool_calls';
/**
* The index of the choice in the list of choices
*/
index: number;
/**
* model response
*/
text: string;
/**
* Completion Log Probs object
*/
logprobs?: Choice.Logprobs | null;
message?: Choice.Message;
[]: unknown;
}
namespace Choice {
/**
* Completion Log Probs object
*/
interface Logprobs {
content: Logprobs.Content;
[]: unknown;
}
namespace Logprobs {
interface Content {
token: string;
logprob: number;
top_logprobs: Content.TopLogprobs;
bytes?: Array<number> | null;
[]: unknown;
}
namespace Content {
interface TopLogprobs {
token: string;
logprob: number;
bytes?: Array<number> | null;
[]: unknown;
}
}
}
interface Message {
/**
* The contents of the assistant message.
*/
content: string | null;
/**
* The role of the messages author
*/
role: 'assistant' | 'user' | 'system' | 'tool';
/**
* The tool calls generated by the model.
*/
tool_calls?: Array<Message.ToolCall> | null;
[]: unknown;
}
namespace Message {
interface ToolCall {
/**
* ID of the tool call.
*/
id: string;
/**
* The tool that the model called.
*/
function: ToolCall.Function;
/**
* type of the tool cal. only `function` is supported.
*/
type: 'function';
/**
* index of tool call chunk only used when using streaming
*/
index?: number | null;
[]: unknown;
}
namespace ToolCall {
/**
* The tool that the model called.
*/
interface Function {
/**
* The arguments to call the function with, as generated by the model in JSON
* format. Note that the model does not always generate valid JSON, and may
* hallucinate parameters not defined by your function schema. Validate the
* arguments in your code before calling your function.
*/
arguments: string;
/**
* The name of the function to call.
*/
name: string;
[]: unknown;
}
}
}
}
/**
* Usage metrics for the completion, embeddings,transcription or translation
* request
*/
interface Usage {
/**
* acceptance rate
*/
acceptance_rate?: number;
/**
* number of tokens generated in completion
*/
completion_tokens?: number;
/**
* completion tokens per second after first token generation
*/
completion_tokens_after_first_per_sec?: number;
/**
* completion tokens per second after first token generation first ten
*/
completion_tokens_after_first_per_sec_first_ten?: number;
/**
* completion tokens per second after first token generation
*/
completion_tokens_after_first_per_sec_graph?: number;
/**
* completion tokens per second
*/
completion_tokens_per_sec?: number;
/**
* The Unix timestamp (in seconds) of when the generation finished.
*/
end_time?: number;
/**
* whether or not is last response, always true for non streaming response
*/
is_last_response?: true;
/**
* number of tokens used in the prompt sent
*/
prompt_tokens?: number;
/**
* Extra tokens details
*/
prompt_tokens_details?: Usage.PromptTokensDetails;
/**
* The Unix timestamp (in seconds) of when the generation started.
*/
start_time?: number;
/**
* also TTF, time (in seconds) taken to generate the first token
*/
time_to_first_token?: number;
/**
* total time (in seconds) taken to generate the full generation
*/
total_latency?: number;
/**
* prompt tokens + completion tokens
*/
total_tokens?: number;
/**
* tokens per second including prompt and completion
*/
total_tokens_per_sec?: number;
[]: unknown;
}
namespace Usage {
/**
* Extra tokens details
*/
interface PromptTokensDetails {
/**
* amount of cached tokens
*/
cached_tokens?: number;
[]: unknown;
}
}
}
/**
* streamed chunk of a completion response returned by the model
*/
export interface CompletionStreamResponse {
/**
* A unique identifier for the chat completion.
*/
id: string;
/**
* A list of chat completion choices.
*/
choices: Array<CompletionStreamResponse.Choice> | null;
/**
* The Unix timestamp (in seconds) of when the chat completion was created.
*/
created: number;
/**
* The model used for the chat completion.
*/
model: string;
/**
* The object type, always `chat.completion.chunk`.
*/
object: 'chat.completion.chunk';
/**
* Backend configuration that the model runs with.
*/
system_fingerprint: string;
/**
* Usage metrics for the completion, embeddings,transcription or translation
* request
*/
usage?: CompletionStreamResponse.Usage | null;
[]: unknown;
}
export declare namespace CompletionStreamResponse {
interface Choice {
/**
* model generation response
*/
text: string;
/**
* The reason the model stopped generating tokens. Will be `stop` if the model hit
* a natural stop point or a provided stop sequence, `length` if the maximum number
* of tokens specified in the request was reached, `tool_calls` if the model called
* a tool.
*/
finish_reason?: 'stop' | 'length' | null;
/**
* The index of the choice in the list of choices
*/
index?: number;
/**
* Completion Log Probs object
*/
logprobs?: Choice.Logprobs | null;
[]: unknown;
}
namespace Choice {
/**
* Completion Log Probs object
*/
interface Logprobs {
content: Logprobs.Content;
[]: unknown;
}
namespace Logprobs {
interface Content {
token: string;
logprob: number;
top_logprobs: Content.TopLogprobs;
bytes?: Array<number> | null;
[]: unknown;
}
namespace Content {
interface TopLogprobs {
token: string;
logprob: number;
bytes?: Array<number> | null;
[]: unknown;
}
}
}
}
/**
* Usage metrics for the completion, embeddings,transcription or translation
* request
*/
interface Usage {
/**
* acceptance rate
*/
acceptance_rate?: number;
/**
* number of tokens generated in completion
*/
completion_tokens?: number;
/**
* completion tokens per second after first token generation
*/
completion_tokens_after_first_per_sec?: number;
/**
* completion tokens per second after first token generation first ten
*/
completion_tokens_after_first_per_sec_first_ten?: number;
/**
* completion tokens per second after first token generation
*/
completion_tokens_after_first_per_sec_graph?: number;
/**
* completion tokens per second
*/
completion_tokens_per_sec?: number;
/**
* The Unix timestamp (in seconds) of when the generation finished.
*/
end_time?: number;
/**
* whether or not is last response, always true for non streaming response
*/
is_last_response?: true;
/**
* number of tokens used in the prompt sent
*/
prompt_tokens?: number;
/**
* Extra tokens details
*/
prompt_tokens_details?: Usage.PromptTokensDetails;
/**
* The Unix timestamp (in seconds) of when the generation started.
*/
start_time?: number;
/**
* also TTF, time (in seconds) taken to generate the first token
*/
time_to_first_token?: number;
/**
* total time (in seconds) taken to generate the full generation
*/
total_latency?: number;
/**
* prompt tokens + completion tokens
*/
total_tokens?: number;
/**
* tokens per second including prompt and completion
*/
total_tokens_per_sec?: number;
[]: unknown;
}
namespace Usage {
/**
* Extra tokens details
*/
interface PromptTokensDetails {
/**
* amount of cached tokens
*/
cached_tokens?: number;
[]: unknown;
}
}
}
/**
* ompletion response returned by the model
*/
export type CompletionCreateResponse = CompletionResponse | CompletionStreamResponse;
export type CompletionCreateParams = CompletionCreateParamsNonStreaming | CompletionCreateParamsStreaming;
export interface CompletionCreateParamsBase {
/**
* The model ID to use (e.g. Meta-Llama-3.3-70B-Instruct). See available
* [models](https://docs.sambanova.ai/cloud/docs/get-started/supported-models)
*/
model: (string & {}) | 'Meta-Llama-3.3-70B-Instruct' | 'Meta-Llama-3.2-1B-Instruct' | 'Meta-Llama-3.2-3B-Instruct' | 'Llama-3.2-11B-Vision-Instruct' | 'Llama-3.2-90B-Vision-Instruct' | 'Meta-Llama-3.1-8B-Instruct' | 'Meta-Llama-3.1-70B-Instruct' | 'Meta-Llama-3.1-405B-Instruct' | 'Qwen2.5-Coder-32B-Instruct' | 'Qwen2.5-72B-Instruct' | 'QwQ-32B-Preview' | 'Meta-Llama-Guard-3-8B' | 'DeepSeek-R1' | 'DeepSeek-R1-0528' | 'DeepSeek-V3-0324' | 'DeepSeek-V3.1' | 'DeepSeek-V3.1-Terminus' | 'DeepSeek-R1-Distill-Llama-70B' | 'Llama-4-Maverick-17B-128E-Instruct' | 'Llama-4-Scout-17B-16E-Instruct' | 'Qwen3-32B' | 'Llama-3.3-Swallow-70B-Instruct-v0.4' | 'gpt-oss-120b' | 'ALLaM-7B-Instruct-preview';
/**
* Prompt to send to the model.
*/
prompt: string;
/**
* If true, sampling is enabled during output generation. If false, deterministic
* decoding is used.
*/
do_sample?: boolean | null;
/**
* Number between -2.0 and 2.0. Positive values penalize new tokens based on their
* existing frequency in the text so far, decreasing the model's likelihood to
* repeat the same line verbatim.
*/
frequency_penalty?: number;
/**
* This is not yet supported by our models. Modify the likelihood of specified
* tokens appearing in the completion.
*/
logit_bias?: {
[]: number;
} | null;
/**
* This is not yet supported by our models. Whether to return log probabilities of
* the output tokens or not. If true, returns the log probabilities of each output
* token returned in the `content` of `message`.
*/
logprobs?: boolean | null;
/**
* The maximum number of tokens that can be generated in the chat completion. The
* total length of input tokens and generated tokens is limited by the model's
* context length.
*/
max_completion_tokens?: number | null;
/**
* The maximum number of tokens that can be generated in the chat completion. The
* total length of input tokens and generated tokens is limited by the model's
* context length.
*/
max_tokens?: number | null;
/**
* This is not yet supported by our models. How many chat completion choices to
* generate for each input message.
*/
n?: number | null;
/**
* Number between -2.0 and 2.0. Positive values penalize new tokens based on
* whether they appear in the text so far, increasing the model's likelihood to
* talk about new topics.
*/
presence_penalty?: number | null;
/**
* This is not yet supported by our models.
*/
seed?: number | null;
/**
* Sequences where the API will stop generating tokens. The returned text will not
* contain the stop sequence.
*/
stop?: string | null | Array<string>;
/**
* If set, partial message deltas will be sent. Tokens will be sent as data-only
* [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
* as they become available, with the stream terminated by a `data: [DONE]`
* message.
*/
stream?: boolean | null;
/**
* Options for streaming response. Only set this when setting stream as true
*/
stream_options?: CompletionCreateParams.StreamOptions | null;
/**
* What sampling temperature to use, determines the degree of randomness in the
* response. between 0 and 2, Higher values like 0.8 will make the output more
* random, while lower values like 0.2 will make it more focused and deterministic.
* Is recommended altering this, top_p or top_k but not more than one of these.
*/
temperature?: number | null;
/**
* Amount limit of token choices. An alternative to sampling with temperature, the
* model considers the results of the first K tokens with higher probability. So 10
* means only the first 10 tokens with higher probability are considered. Is
* recommended altering this, top_p or temperature but not more than one of these.
*/
top_k?: number | null;
/**
* This is not yet supported by our models. An integer between 0 and 20 specifying
* the number of most likely tokens to return at each token position, each with an
* associated log probability. `logprobs` must be set to `true` if this parameter
* is used.
*/
top_logprobs?: number | null;
/**
* Cumulative probability for token choices. An alternative to sampling with
* temperature, called nucleus sampling, where the model considers the results of
* the tokens with top_p probability mass. So 0.1 means only the tokens comprising
* the top 10% probability mass are considered. Is recommended altering this, top_k
* or temperature but not more than one of these.
*/
top_p?: number | null;
[]: unknown;
}
export declare namespace CompletionCreateParams {
/**
* Options for streaming response. Only set this when setting stream as true
*/
interface StreamOptions {
/**
* Whether to include the usage metrics in a final chunk or not
*/
include_usage?: boolean | null;
[]: unknown;
}
type CompletionCreateParamsNonStreaming = CompletionsAPI.CompletionCreateParamsNonStreaming;
type CompletionCreateParamsStreaming = CompletionsAPI.CompletionCreateParamsStreaming;
}
export interface CompletionCreateParamsNonStreaming extends CompletionCreateParamsBase {
/**
* If set, partial message deltas will be sent. Tokens will be sent as data-only
* [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
* as they become available, with the stream terminated by a `data: [DONE]`
* message.
*/
stream?: false | null;
[]: unknown;
}
export interface CompletionCreateParamsStreaming extends CompletionCreateParamsBase {
/**
* If set, partial message deltas will be sent. Tokens will be sent as data-only
* [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
* as they become available, with the stream terminated by a `data: [DONE]`
* message.
*/
stream: true;
[]: unknown;
}
export declare namespace Completions {
export { type CompletionResponse as CompletionResponse, type CompletionStreamResponse as CompletionStreamResponse, type CompletionCreateResponse as CompletionCreateResponse, type CompletionCreateParams as CompletionCreateParams, type CompletionCreateParamsNonStreaming as CompletionCreateParamsNonStreaming, type CompletionCreateParamsStreaming as CompletionCreateParamsStreaming, };
}
//# sourceMappingURL=completions.d.mts.map