voyageai
Version:
[](https://buildwithfern.com?utm_source=github&utm_medium=github&utm_campaign=readme&utm_source=https%3A%2F%2Fgithub.com%2Fvoyage-ai%2Ftypescript-sdk) [ • 32.2 kB
YAML
types:
EmbedRequestInput:
discriminated: false
docs: >
A single text string, or a list of texts as a list of strings. Currently,
we have two constraints on the list: <ul> <li> The maximum length of the
list is 128. </li> <li> The total number of tokens in the list is at most
320K for `voyage-2`, and 120K for `voyage-large-2`, `voyage-finance-2`,
`voyage-multilingual-2`, `voyage-law-2`, and `voyage-code-2`. </li> <ul>
union:
- string
- list<string>
source:
openapi: openapi/openapi.yml
inline: true
EmbedRequestInputType:
enum:
- query
- document
docs: >
Type of the input text. Defaults to `null`. Other options: `query`,
`document`.
inline: true
source:
openapi: openapi/openapi.yml
EmbedRequestOutputDtype:
enum:
- float
- int8
- uint8
- binary
- ubinary
docs: >
The data type for the embeddings to be returned. Defaults to `float`.
Other options: `int8`, `uint8`, `binary`, `ubinary`. `float` is supported
for all models. `int8`, `uint8`, `binary`, and `ubinary` are supported by
`voyage-3-large` and `voyage-code-3`. Please see our guide for more
details about output data types.
inline: true
source:
openapi: openapi/openapi.yml
EmbedResponseDataItem:
properties:
object:
type: optional<string>
docs: The object type, which is always "embedding".
embedding:
type: optional<list<double>>
docs: >
Each embedding is a vector represented as an array of `float` numbers
when `output_dtype` is set to `float` and as an array of integers for
all other values of `output_dtype` (`int8`, `uint8`, `binary`, and
`ubinary`). The length of this vector varies depending on the
specific model, `output_dimension`, and `output_dtype`.
index:
type: optional<integer>
docs: >
An integer representing the index of the embedding within the list of
embeddings.
source:
openapi: openapi/openapi.yml
inline: true
EmbedResponseUsage:
properties:
total_tokens:
type: optional<integer>
docs: The total number of tokens used for computing the embeddings.
source:
openapi: openapi/openapi.yml
inline: true
EmbedResponse:
properties:
object:
type: optional<string>
docs: The object type, which is always "list".
data:
type: optional<list<EmbedResponseDataItem>>
docs: An array of embedding objects.
model:
type: optional<string>
docs: Name of the model.
usage: optional<EmbedResponseUsage>
source:
openapi: openapi/openapi.yml
RerankResponseDataItem:
properties:
index:
type: optional<integer>
docs: The index of the document in the input list.
relevance_score:
type: optional<double>
docs: The relevance score of the document with respect to the query.
document:
type: optional<string>
docs: >
The document string. Only returned when return_documents is set to
true.
source:
openapi: openapi/openapi.yml
inline: true
RerankResponseUsage:
properties:
total_tokens:
type: optional<integer>
docs: The total number of tokens used for computing the reranking.
source:
openapi: openapi/openapi.yml
inline: true
RerankResponse:
properties:
object:
type: optional<string>
docs: The object type, which is always "list".
data:
type: optional<list<RerankResponseDataItem>>
docs: >
An array of the reranking results, sorted by the descending order of
relevance scores.
model:
type: optional<string>
docs: Name of the model.
usage: optional<RerankResponseUsage>
source:
openapi: openapi/openapi.yml
MultimodalEmbedRequestInputsItemContentItem:
properties:
type:
type: string
docs: >
Specifies the type of the piece of the content. Allowed values are
`text`, `image_url`, or `image_base64`.
text:
type: optional<string>
docs: Only present when type is `text`. The value should be a text string.
image_base64:
type: optional<string>
docs: >
Only present when type is `image_base64`. The value should be a
Base64-encoded image in the data URL format
`data:[<mediatype>];base64,<data>`. Currently supported mediatypes
are: `image/png`, `image/jpeg`, `image/webp`, and `image/gif`.
image_url:
type: optional<string>
docs: >
Only present when `type` is `image_url`. The value should be a URL
linking to the image. We support PNG, JPEG, WEBP, and GIF images.
source:
openapi: openapi/openapi.yml
inline: true
MultimodalEmbedRequestInputsItem:
properties:
content: optional<list<MultimodalEmbedRequestInputsItemContentItem>>
source:
openapi: openapi/openapi.yml
inline: true
MultimodalEmbedRequestInputType:
enum:
- query
- document
docs: >
Type of the input text. Defaults to `null`. Other options: `query`,
`document`.
<ul> <li> When <code class="rdmd-code lang- theme-light" data-lang=""
name="" tabindex="0"><span class="cm-s-neo"
data-testid="SyntaxHighlighter">input_type</span></code> is <code
class="rdmd-code lang- theme-light" data-lang="" name=""
tabindex="0"><span class="cm-s-neo"
data-testid="SyntaxHighlighter">null</span></code>, the embedding model
directly converts your input data into numerical vectors. For
retrieval/search purposes—where an input (called a "query") is used to
search for relevant pieces of information (referred to as "documents")—we
recommend specifying whether your inputs are intended as queries or
documents by setting <code class="rdmd-code lang- theme-light"
data-lang="" name="" tabindex="0"><span class="cm-s-neo"
data-testid="SyntaxHighlighter">input_type</span></code> to <code
class="rdmd-code lang- theme-light" data-lang="" name=""
tabindex="0"><span class="cm-s-neo"
data-testid="SyntaxHighlighter">query</span></code> or <code
class="rdmd-code lang- theme-light" data-lang="" name=""
tabindex="0"><span class="cm-s-neo"
data-testid="SyntaxHighlighter">document</span></code>, respectively. In
these cases, Voyage prepends a prompt to your input before vectorizing it,
helping the model create more effective vectors tailored for
retrieval/search tasks. Since inputs can be multimodal, queries and
documents can be text, images, or an interleaving of both modalities.
Embeddings generated with and without the <code class="rdmd-code lang-
theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo"
data-testid="SyntaxHighlighter">input_type</span></code> argument are
compatible. </li> <li> For transparency, the following prompts are
prepended to your input. </li><p></p>
<ul>
<li> For <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">query</span></code>, the prompt is <i>"Represent the query for retrieving supporting documents: ".</i> </li>
<li> For <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">document</span></code>, the prompt is <i>"Represent the query for retrieving supporting documents: ".</i> </li>
</ul>
<ul></ul></ul>
inline: true
source:
openapi: openapi/openapi.yml
MultimodalEmbedResponseDataItem:
properties:
object:
type: optional<string>
docs: The object type, which is always "embedding".
embedding:
type: optional<list<double>>
docs: >
The embedding vector consists of a list of floating-point numbers. The
length of this vector varies depending on the specific model.
index:
type: optional<integer>
docs: >
An integer representing the index of the embedding within the list of
embeddings.
source:
openapi: openapi/openapi.yml
inline: true
MultimodalEmbedResponseUsage:
properties:
total_tokens:
type: optional<integer>
docs: The total number of tokens used for computing the embeddings.
source:
openapi: openapi/openapi.yml
inline: true
MultimodalEmbedResponse:
properties:
object:
type: optional<string>
docs: The object type, which is always "list".
data:
type: optional<list<MultimodalEmbedResponseDataItem>>
docs: An array of embedding objects.
model:
type: optional<string>
docs: Name of the model.
usage: optional<MultimodalEmbedResponseUsage>
source:
openapi: openapi/openapi.yml
service:
auth: false
base-path: ''
endpoints:
embed:
path: /embeddings
method: POST
auth: true
docs: >-
Voyage embedding endpoint receives as input a string (or a list of
strings) and other arguments such as the preferred model name, and
returns a response containing a list of embeddings.
source:
openapi: openapi/openapi.yml
display-name: Embeddings
request:
name: EmbedRequest
body:
properties:
input:
type: EmbedRequestInput
docs: >
A single text string, or a list of texts as a list of strings.
Currently, we have two constraints on the list: <ul> <li> The
maximum length of the list is 128. </li> <li> The total number
of tokens in the list is at most 320K for `voyage-2`, and 120K
for `voyage-large-2`, `voyage-finance-2`,
`voyage-multilingual-2`, `voyage-law-2`, and `voyage-code-2`.
</li> <ul>
model:
type: string
docs: >
Name of the model. Recommended options: `voyage-2`,
`voyage-large-2`, `voyage-finance-2`, `voyage-multilingual-2`,
`voyage-law-2`, `voyage-code-2`.
input_type:
type: optional<EmbedRequestInputType>
docs: >
Type of the input text. Defaults to `null`. Other options:
`query`, `document`.
truncation:
type: optional<boolean>
docs: >
Whether to truncate the input texts to fit within the context
length. Defaults to `true`. <ul> <li> If `true`, over-length
input texts will be truncated to fit within the context length,
before vectorized by the embedding model. </li> <li> If
`false`, an error will be raised if any given text exceeds the
context length. </li> </ul>
encoding_format:
type: optional<literal<"base64">>
docs: >
Format in which the embeddings are encoded. We support two
options: <ul> <li> If not specified (defaults to `null`): the
embeddings are represented as lists of floating-point numbers;
</li> <li> `base64`: the embeddings are compressed to
[base64](https://docs.python.org/3/library/base64.html)
encodings. </li> </ul>
output_dimension:
type: optional<integer>
docs: >
The number of dimensions for resulting output embeddings.
Defaults to `null`.
output_dtype:
type: optional<EmbedRequestOutputDtype>
docs: >
The data type for the embeddings to be returned. Defaults to
`float`. Other options: `int8`, `uint8`, `binary`, `ubinary`.
`float` is supported for all models. `int8`, `uint8`, `binary`,
and `ubinary` are supported by `voyage-3-large` and
`voyage-code-3`. Please see our guide for more details about
output data types.
content-type: application/json
response:
docs: Success
type: EmbedResponse
status-code: 200
examples:
- name: Success
request:
input: input
model: model
response:
body:
object: object
data:
- object: object
embedding:
- 1.1
index: 1
model: model
usage:
total_tokens: 1
code-samples:
- language: shell
code: |-
curl --request POST \
--url https://api.voyageai.com/v1/embeddings \
--header "Authorization: Bearer $VOYAGE_API_KEY" \
--header "content-type: application/json" \
--data '
{
"input": [
"Sample text 1",
"Sample text 2"
],
"model": "voyage-large-2"
}
'
rerank:
path: /rerank
method: POST
auth: true
docs: >
Voyage reranker endpoint receives as input a query, a list of documents,
and other arguments such as the model name, and returns a response
containing the reranking results.
source:
openapi: openapi/openapi.yml
display-name: Reranker
request:
name: RerankRequest
body:
properties:
query:
type: string
docs: >
The query as a string. The query can contain a maximum of 1000
tokens for `rerank-lite-1` and 2000 tokens for `rerank-1`.
documents:
docs: >
The documents to be reranked as a list of strings. <ul> <li> The
number of documents cannot exceed 1000. </li> <li> The sum of
the number of tokens in the query and the number of tokens in
any single document cannot exceed 4000 for `rerank-lite-1` and
8000 for `rerank-1`. </li> <li> he total number of tokens,
defined as "the number of query tokens × the number of documents
+ sum of the number of tokens in all documents", cannot exceed
300K for `rerank-lite-1` and 100K for `rerank-1`. Please see our
<a
href="https://docs.voyageai.com/docs/faq#what-is-the-total-number-of-tokens-for-the-rerankers">FAQ</a>.
</li> </ul>
type: list<string>
model:
type: string
docs: >
Name of the model. Recommended options: `rerank-lite-1`,
`rerank-1`.
top_k:
type: optional<integer>
docs: >
The number of most relevant documents to return. If not
specified, the reranking results of all documents will be
returned.
return_documents:
type: optional<boolean>
docs: >
Whether to return the documents in the response. Defaults to
`false`. <ul> <li> If `false`, the API will return a list of
{"index", "relevance_score"} where "index" refers to the index
of a document within the input list. </li> <li> If `true`, the
API will return a list of {"index", "document",
"relevance_score"} where "document" is the corresponding
document from the input list. </li> </ul>
truncation:
type: optional<boolean>
docs: >
Whether to truncate the input to satisfy the "context length
limit" on the query and the documents. Defaults to `true`. <ul>
<li> If `true`, the query and documents will be truncated to
fit within the context length limit, before processed by the
reranker model. </li> <li> If `false`, an error will be raised
when the query exceeds 1000 tokens for `rerank-lite-1` and 2000
tokens for `rerank-1`, or the sum of the number of tokens in the
query and the number of tokens in any single document exceeds
4000 for `rerank-lite-1` and 8000 for `rerank-1`. </li> </ul>
content-type: application/json
response:
docs: Success
type: RerankResponse
status-code: 200
examples:
- name: Success
request:
query: query
documents:
- documents
model: model
response:
body:
object: object
data:
- index: 1
relevance_score: 1.1
document: document
model: model
usage:
total_tokens: 1
code-samples:
- language: shell
code: |-
curl --request POST \
--url https://api.voyageai.com/v1/rerank \
--header "Authorization: Bearer $VOYAGE_API_KEY" \
--header "content-type: application/json" \
--data '
{
"query": "Sample query",
"documents": [
"Sample document 1",
"Sample document 2"
],
"model": "rerank-lite-1"
}
'
multimodal-embed:
path: /multimodalembeddings
method: POST
auth: true
docs: >-
The Voyage multimodal embedding endpoint returns vector representations
for a given list of multimodal inputs consisting of text, images, or an
interleaving of both modalities.
source:
openapi: openapi/openapi.yml
display-name: Multimodal embeddings
request:
name: MultimodalEmbedRequest
body:
properties:
inputs:
docs: >
A list of multimodal inputs to be vectorized.
A single input in the list is a dictionary containing a single
key "content", whose value represents a sequence of text and
images.
<ul><p></p>
<li> The value of <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">"content"</span></code> is a list of dictionaries, each representing a single piece of text or image. The dictionaries have four possible keys:
<ol class="nested-ordered-list">
<li> <b>type</b>: Specifies the type of the piece of the content. Allowed values are <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">text</span></code>, <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">image_url</span></code>, or <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">image_base64</span></code>.</li>
<li> <b>text</b>: Only present when <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">type</span></code> is <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">text</span></code>. The value should be a text string.</li>
<li> <b>image_base64</b>: Only present when <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">type</span></code> is <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">image_base64</span></code>. The value should be a Base64-encoded image in the <a target="_blank" href="https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data">data URL</a> format <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">data:[<mediatype>];base64,<data></span></code>. Currently supported <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">mediatypes</span></code> are: <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">image/png</span></code>, <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">image/jpeg</span></code>, <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">image/webp</span></code>, and <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">image/gif</span></code>.</li>
<li> <b>image_url</b>: Only present when <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">type</span></code> is <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">image_url</span></code>. The value should be a URL linking to the image. We support PNG, JPEG, WEBP, and GIF images.</li>
</ol>
</li>
<li> <b>Note</b>: Only one of the keys, <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">image_base64</span></code> or <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">image_url</span></code>, should be present in each dictionary for image data. Consistency is required within a request, meaning each request should use either <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">image_base64</span></code> or <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">image_url</span></code> exclusively for images, not both.<br>
<br>
<details> <summary> Example payload where <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">inputs</span></code> contains an image as a URL </summary>
<br>
The <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">inputs</span></code> list contains a single input, which consists of a piece of text and an image (which is provided via a URL).
<pre><code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">
{
"inputs": [
{
"content": [
{
"type": "text",
"text": "This is a banana."
},
{
"type": "image_url",
"image_url": "https://raw.githubusercontent.com/voyage-ai/voyage-multimodal-3/refs/heads/main/images/banana.jpg"
}
]
}
],
"model": "voyage-multimodal-3"
}
</span></code></pre>
</details>
<details> <summary> Example payload where <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">inputs</span></code> contains a Base64 image </summary>
<br>
Below is an equivalent example to the one above where the image content is a Base64 image instead of a URL. (Base64 images can be lengthy, so the example only shows a shortened version.)
<pre><code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">
{
"inputs": [
{
"content": [
{
"type": "text",
"text": "This is a banana."
},
{
"type": "image_base64",
"image_base64": "..."
}
]
}
],
"model": "voyage-multimodal-3"
}
</span></code></pre>
</details>
</li>
</ul>
type: list<MultimodalEmbedRequestInputsItem>
model:
type: string
docs: >
Name of the model. Currently, the only supported model is
`voyage-multimodal-3`.
input_type:
type: optional<MultimodalEmbedRequestInputType>
docs: >
Type of the input text. Defaults to `null`. Other options:
`query`, `document`.
<ul> <li> When <code class="rdmd-code lang- theme-light"
data-lang="" name="" tabindex="0"><span class="cm-s-neo"
data-testid="SyntaxHighlighter">input_type</span></code> is
<code class="rdmd-code lang- theme-light" data-lang="" name=""
tabindex="0"><span class="cm-s-neo"
data-testid="SyntaxHighlighter">null</span></code>, the
embedding model directly converts your input data into numerical
vectors. For retrieval/search purposes—where an input (called a
"query") is used to search for relevant pieces of information
(referred to as "documents")—we recommend specifying whether
your inputs are intended as queries or documents by setting
<code class="rdmd-code lang- theme-light" data-lang="" name=""
tabindex="0"><span class="cm-s-neo"
data-testid="SyntaxHighlighter">input_type</span></code> to
<code class="rdmd-code lang- theme-light" data-lang="" name=""
tabindex="0"><span class="cm-s-neo"
data-testid="SyntaxHighlighter">query</span></code> or <code
class="rdmd-code lang- theme-light" data-lang="" name=""
tabindex="0"><span class="cm-s-neo"
data-testid="SyntaxHighlighter">document</span></code>,
respectively. In these cases, Voyage prepends a prompt to your
input before vectorizing it, helping the model create more
effective vectors tailored for retrieval/search tasks. Since
inputs can be multimodal, queries and documents can be text,
images, or an interleaving of both modalities. Embeddings
generated with and without the <code class="rdmd-code lang-
theme-light" data-lang="" name="" tabindex="0"><span
class="cm-s-neo"
data-testid="SyntaxHighlighter">input_type</span></code>
argument are compatible. </li> <li> For transparency, the
following prompts are prepended to your input. </li><p></p>
<ul>
<li> For <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">query</span></code>, the prompt is <i>"Represent the query for retrieving supporting documents: ".</i> </li>
<li> For <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">document</span></code>, the prompt is <i>"Represent the query for retrieving supporting documents: ".</i> </li>
</ul>
<ul></ul></ul>
truncation:
type: optional<boolean>
docs: >
Whether to truncate the input texts to fit within the context
length. Defaults to `true`. <ul> <li> If `true`, over-length
input texts will be truncated to fit within the context length,
before vectorized by the embedding model. </li> <li> If
`false`, an error will be raised if any given text exceeds the
context length. </li> </ul>
encoding_format:
type: optional<literal<"base64">>
docs: >
Format in which the embeddings are encoded. We support two
options: <ul> <li> If not specified (defaults to `null`): the
embeddings are represented as lists of floating-point numbers;
</li> <li> `base64`: the embeddings are compressed to
[base64](https://docs.python.org/3/library/base64.html)
encodings. </li> </ul>
content-type: application/json
response:
docs: Success
type: MultimodalEmbedResponse
status-code: 200
examples:
- name: Success
request:
inputs:
- {}
model: model
response:
body:
object: object
data:
- object: object
embedding:
- 1.1
index: 1
model: model
usage:
total_tokens: 1
code-samples:
- language: shell
code: |-
curl -X POST https://api.voyageai.com/v1/multimodalembeddings \
-H "Authorization: Bearer $VOYAGE_API_KEY" \
-H "content-type: application/json" \
-d '
{
"inputs": [
{
"content": [
{
"type": "text",
"text": "This is a banana."
},
{
"type": "image_url",
"image_url": "https://raw.githubusercontent.com/voyage-ai/voyage-multimodal-3/refs/heads/main/images/banana.jpg"
}
]
}
],
"model": "voyage-multimodal-3"
}'
source:
openapi: openapi/openapi.yml