voyageai
Version:
[](https://buildwithfern.com?utm_source=github&utm_medium=github&utm_campaign=readme&utm_source=https%3A%2F%2Fgithub.com%2Fvoyage-ai%2Ftypescript-sdk) [ • 33.7 kB
YAML
openapi: 3.1.0
info:
title: Voyage API
description: >
The VoyageAI REST API. Please see https://docs.voyageai.com/reference for more details.
version: '1.1'
contact:
name: VoyageAI Support
url: https://docs.voyageai.com/docs/faq
email: contact@voyageai.com
license:
name: MIT
url: https://github.com/voyage-ai/voyage-openapi/blob/main/LICENSE
servers:
- url: https://api.voyageai.com/v1
components:
securitySchemes:
ApiKeyAuth:
type: apiKey
in: header
name: 'Authorization: Bearer'
x-default: $VOYAGE_API_KEY
security:
- ApiKeyAuth: []
tags:
- name: Endpoints
paths:
/embeddings:
post:
tags:
- Endpoints
summary: Embeddings
description:
Voyage embedding endpoint receives as input a string (or a list of strings) and other
arguments such as the preferred model name, and returns a response containing a list of
embeddings.
operationId: embeddings-api
requestBody:
content:
application/json:
schema:
type: object
required:
- input
- model
properties:
input:
type: object
description: >
A single text string, or a list of texts as a list of strings. Currently, we
have two constraints on the list:
<ul>
<li> The maximum length of the list is 128. </li>
<li> The total number of tokens in the list is at most 320K for `voyage-2`, and
120K for `voyage-large-2`, `voyage-finance-2`, `voyage-multilingual-2`,
`voyage-law-2`, and `voyage-code-2`. </li>
<ul>
oneOf:
- type: string
- type: array
items:
type: string
model:
type: string
description: >
Name of the model. Recommended options: `voyage-2`, `voyage-large-2`,
`voyage-finance-2`, `voyage-multilingual-2`, `voyage-law-2`, `voyage-code-2`.
input_type:
type: string
description: >
Type of the input text. Defaults to `null`. Other options: `query`, `document`.
enum:
- query
- document
truncation:
type: boolean
description: >
Whether to truncate the input texts to fit within the context length. Defaults
to `true`.
<ul>
<li> If `true`, over-length input texts will be truncated to fit within the
context length, before vectorized by the embedding model. </li>
<li> If `false`, an error will be raised if any given text exceeds the context
length. </li>
</ul>
encoding_format:
type: string
description: >
Format in which the embeddings are encoded. We support two options:
<ul>
<li> If not specified (defaults to `null`): the embeddings are represented as
lists of floating-point numbers; </li>
<li> `base64`: the embeddings are compressed to
[base64](https://docs.python.org/3/library/base64.html) encodings. </li>
</ul>
enum:
- base64
output_dimension:
type: integer
description: >
The number of dimensions for resulting output embeddings. Defaults to `null`.
output_dtype:
type: string
description: >
The data type for the embeddings to be returned. Defaults to `float`.
Other options: `int8`, `uint8`, `binary`, `ubinary`. `float` is supported for all models.
`int8`, `uint8`, `binary`, and `ubinary` are supported by `voyage-3-large` and `voyage-code-3`.
Please see our guide for more details about output data types.
enum:
- float
- int8
- uint8
- binary
- ubinary
responses:
'200':
description: Success
content:
application/json:
schema:
properties:
object:
type: string
description: The object type, which is always "list".
data:
type: array
description: An array of embedding objects.
items:
type: object
properties:
object:
type: string
description: The object type, which is always "embedding".
embedding:
type: array
description: >
Each embedding is a vector represented as an array of `float` numbers when `output_dtype` is set to `float`
and as an array of integers for all other values of `output_dtype` (`int8`, `uint8`, `binary`, and `ubinary`).
The length of this vector varies depending on the specific model, `output_dimension`, and `output_dtype`.
items:
type: number
index:
type: integer
description: >
An integer representing the index of the embedding within the list of
embeddings.
model:
type: string
description: Name of the model.
usage:
type: object
properties:
total_tokens:
type: integer
description: The total number of tokens used for computing the embeddings.
examples:
Success:
value: >
{"object":"list","data":[{"object":"embedding","embedding":[0.0038915484,0.010964915,-0.035594109,"...",0.011034692],"index":0},
{"object":"embedding","embedding":[-0.01539533,-0.0011246679,0.021264801,"...",-0.046319865],"index":1}],"model":"voyage-large-2",
"usage":{"total_tokens":10}}
'4XX':
description: >
Client error
<p> This indicates an issue with the request format or frequency. Please see our
[Error Codes](https://docs.voyageai.com/docs/error-codes) guide. </p>
content:
application/json:
schema:
properties:
detail:
type: string
description: The error message.
'5XX':
description: >
Server Error
<p> This indicates our servers are experiencing high traffic or having an unexpected
issue. Please see our
[Error Codes](https://docs.voyageai.com/docs/error-codes) guide. </p>
x-readme:
code-samples:
- language: shell
code: |-
curl --request POST \
--url https://api.voyageai.com/v1/embeddings \
--header "Authorization: Bearer $VOYAGE_API_KEY" \
--header "content-type: application/json" \
--data '
{
"input": [
"Sample text 1",
"Sample text 2"
],
"model": "voyage-large-2"
}
'
samples-languages:
- shell
/rerank:
post:
tags:
- Endpoints
summary: Reranker
description: >
Voyage reranker endpoint receives as input a query, a list of documents, and other arguments
such as the model name, and returns a response containing the reranking results.
operationId: reranker-api
requestBody:
content:
application/json:
schema:
type: object
required:
- query
- documents
- model
properties:
query:
type: string
description: >
The query as a string. The query can contain a maximum of 1000 tokens for
`rerank-lite-1` and 2000 tokens for `rerank-1`.
documents:
type: array
description: >
The documents to be reranked as a list of strings.
<ul>
<li> The number of documents cannot exceed 1000. </li>
<li> The sum of the number of tokens in the query and the number of tokens in
any single document cannot exceed 4000 for `rerank-lite-1` and 8000 for
`rerank-1`. </li>
<li> he total number of tokens, defined as "the number of query tokens × the
number of documents + sum of the number of tokens in all documents", cannot
exceed 300K for `rerank-lite-1` and 100K for `rerank-1`. Please see our
<a href="https://docs.voyageai.com/docs/faq#what-is-the-total-number-of-tokens-for-the-rerankers">FAQ</a>. </li>
</ul>
items:
type: string
model:
type: string
description: >
Name of the model. Recommended options: `rerank-lite-1`, `rerank-1`.
top_k:
type: integer
description: >
The number of most relevant documents to return. If not specified, the reranking
results of all documents will be returned.
return_documents:
type: boolean
description: >
Whether to return the documents in the response. Defaults to `false`.
<ul>
<li> If `false`, the API will return a list of {"index", "relevance_score"}
where "index" refers to the index of a document within the input list. </li>
<li> If `true`, the API will return a list of
{"index", "document", "relevance_score"} where "document" is the corresponding
document from the input list. </li>
</ul>
truncation:
type: boolean
description: >
Whether to truncate the input to satisfy the "context length limit" on the query
and the documents. Defaults to `true`.
<ul>
<li> If `true`, the query and documents will be truncated to fit within the
context length limit, before processed by the reranker model. </li>
<li> If `false`, an error will be raised when the query exceeds 1000 tokens for
`rerank-lite-1` and 2000 tokens for `rerank-1`, or the sum of the number of
tokens in the query and the number of tokens in any single document exceeds 4000
for `rerank-lite-1` and 8000 for `rerank-1`. </li>
</ul>
responses:
'200':
description: Success
content:
application/json:
schema:
type: object
properties:
object:
type: string
description: The object type, which is always "list".
data:
type: array
description: >
An array of the reranking results, sorted by the descending order of relevance
scores.
items:
type: object
properties:
index:
type: integer
description: The index of the document in the input list.
relevance_score:
type: number
description: The relevance score of the document with respect to the query.
document:
type: string
description: >
The document string. Only returned when return_documents is set to true.
model:
type: string
description: Name of the model.
usage:
type: object
properties:
total_tokens:
type: integer
description: The total number of tokens used for computing the reranking.
examples:
Success:
value: >
{"object":"list","data":[{"relevance_score":0.4375,"index":0},{"relevance_score":0.421875,"index":1}],
"model":"rerank-lite-1","usage":{"total_tokens":26}}
'4XX':
description: >
Client error
<p> This indicates an issue with the request format or frequency. Please see our
[Error Codes](https://docs.voyageai.com/docs/error-codes) guide. </p>
content:
application/json:
schema:
properties:
detail:
type: string
description: The error message.
'5XX':
description: >
Server Error
<p> This indicates our servers are experiencing high traffic or having an unexpected
issue. Please see our
[Error Codes](https://docs.voyageai.com/docs/error-codes) guide. </p>
x-readme:
code-samples:
- language: shell
code: |-
curl --request POST \
--url https://api.voyageai.com/v1/rerank \
--header "Authorization: Bearer $VOYAGE_API_KEY" \
--header "content-type: application/json" \
--data '
{
"query": "Sample query",
"documents": [
"Sample document 1",
"Sample document 2"
],
"model": "rerank-lite-1"
}
'
samples-languages:
- shell
/multimodalembeddings:
post:
tags:
- Endpoints
summary: Multimodal embeddings
description:
The Voyage multimodal embedding endpoint returns vector representations for a given list of multimodal inputs consisting of
text, images, or an interleaving of both modalities.
operationId: multimodalembeddings-api
requestBody:
content:
application/json:
schema:
type: object
required:
- inputs
- model
properties:
inputs:
type: array
description: >
A list of multimodal inputs to be vectorized.
A single input in the list is a dictionary containing a single key "content", whose value represents a sequence of text and images.
<ul><p></p>
<li> The value of <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">"content"</span></code> is a list of dictionaries, each representing a single piece of text or image. The dictionaries have four possible keys:
<ol class="nested-ordered-list">
<li> <b>type</b>: Specifies the type of the piece of the content. Allowed values are <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">text</span></code>, <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">image_url</span></code>, or <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">image_base64</span></code>.</li>
<li> <b>text</b>: Only present when <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">type</span></code> is <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">text</span></code>. The value should be a text string.</li>
<li> <b>image_base64</b>: Only present when <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">type</span></code> is <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">image_base64</span></code>. The value should be a Base64-encoded image in the <a target="_blank" href="https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data">data URL</a> format <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">data:[<mediatype>];base64,<data></span></code>. Currently supported <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">mediatypes</span></code> are: <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">image/png</span></code>, <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">image/jpeg</span></code>, <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">image/webp</span></code>, and <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">image/gif</span></code>.</li>
<li> <b>image_url</b>: Only present when <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">type</span></code> is <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">image_url</span></code>. The value should be a URL linking to the image. We support PNG, JPEG, WEBP, and GIF images.</li>
</ol>
</li>
<li> <b>Note</b>: Only one of the keys, <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">image_base64</span></code> or <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">image_url</span></code>, should be present in each dictionary for image data. Consistency is required within a request, meaning each request should use either <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">image_base64</span></code> or <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">image_url</span></code> exclusively for images, not both.<br>
<br>
<details> <summary> Example payload where <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">inputs</span></code> contains an image as a URL </summary>
<br>
The <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">inputs</span></code> list contains a single input, which consists of a piece of text and an image (which is provided via a URL).
<pre><code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">
{
"inputs": [
{
"content": [
{
"type": "text",
"text": "This is a banana."
},
{
"type": "image_url",
"image_url": "https://raw.githubusercontent.com/voyage-ai/voyage-multimodal-3/refs/heads/main/images/banana.jpg"
}
]
}
],
"model": "voyage-multimodal-3"
}
</span></code></pre>
</details>
<details> <summary> Example payload where <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">inputs</span></code> contains a Base64 image </summary>
<br>
Below is an equivalent example to the one above where the image content is a Base64 image instead of a URL. (Base64 images can be lengthy, so the example only shows a shortened version.)
<pre><code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">
{
"inputs": [
{
"content": [
{
"type": "text",
"text": "This is a banana."
},
{
"type": "image_base64",
"image_base64": "..."
}
]
}
],
"model": "voyage-multimodal-3"
}
</span></code></pre>
</details>
</li>
</ul>
items:
type: object
properties:
content:
type: array
items:
type: object
required:
- type
properties:
type:
type: string
description: >
Specifies the type of the piece of the content.
Allowed values are `text`, `image_url`, or `image_base64`.
text:
type: string
description: Only present when type is `text`. The value should be a text string.
image_base64:
type: string
description: >
Only present when type is `image_base64`. The value should be a Base64-encoded image in the
data URL format `data:[<mediatype>];base64,<data>`. Currently supported mediatypes are:
`image/png`, `image/jpeg`, `image/webp`, and `image/gif`.
image_url:
type: string
description: >
Only present when `type` is `image_url`. The value should be a URL linking to the image.
We support PNG, JPEG, WEBP, and GIF images.
model:
type: string
description: >
Name of the model. Currently, the only supported model is `voyage-multimodal-3`.
input_type:
type: string
description: >
Type of the input text. Defaults to `null`. Other options: `query`, `document`.
<ul> <li> When <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">input_type</span></code> is <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">null</span></code>, the embedding model directly converts your input data into numerical vectors. For retrieval/search purposes—where an input (called a "query") is used to search for relevant pieces of information (referred to as "documents")—we recommend specifying whether your inputs are intended as queries or documents by setting <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">input_type</span></code> to <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">query</span></code> or <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">document</span></code>, respectively. In these cases, Voyage prepends a prompt to your input before vectorizing it, helping the model create more effective vectors tailored for retrieval/search tasks. Since inputs can be multimodal, queries and documents can be text, images, or an interleaving of both modalities. Embeddings generated with and without the <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">input_type</span></code> argument are compatible. </li> <li> For transparency, the following prompts are prepended to your input. </li><p></p>
<ul>
<li> For <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">query</span></code>, the prompt is <i>"Represent the query for retrieving supporting documents: ".</i> </li>
<li> For <code class="rdmd-code lang- theme-light" data-lang="" name="" tabindex="0"><span class="cm-s-neo" data-testid="SyntaxHighlighter">document</span></code>, the prompt is <i>"Represent the query for retrieving supporting documents: ".</i> </li>
</ul>
<ul></ul></ul>
enum:
- query
- document
truncation:
type: boolean
description: >
Whether to truncate the input texts to fit within the context length. Defaults
to `true`.
<ul>
<li> If `true`, over-length input texts will be truncated to fit within the
context length, before vectorized by the embedding model. </li>
<li> If `false`, an error will be raised if any given text exceeds the context
length. </li>
</ul>
encoding_format:
type: string
description: >
Format in which the embeddings are encoded. We support two options:
<ul>
<li> If not specified (defaults to `null`): the embeddings are represented as
lists of floating-point numbers; </li>
<li> `base64`: the embeddings are compressed to
[base64](https://docs.python.org/3/library/base64.html) encodings. </li>
</ul>
enum:
- base64
responses:
'200':
description: Success
content:
application/json:
schema:
properties:
object:
type: string
description: The object type, which is always "list".
data:
type: array
description: An array of embedding objects.
items:
type: object
properties:
object:
type: string
description: The object type, which is always "embedding".
embedding:
type: array
description: >
The embedding vector consists of a list of floating-point numbers. The
length of this vector varies depending on the specific model.
items:
type: number
index:
type: integer
description: >
An integer representing the index of the embedding within the list of
embeddings.
model:
type: string
description: Name of the model.
usage:
type: object
properties:
total_tokens:
type: integer
description: The total number of tokens used for computing the embeddings.
examples:
Success:
value: >
{
"object": "list",
"data": [
{
"object": "embedding",
"embedding": [
0.027587891,
-0.021240234,
0.018310547,
"...",
-0.021240234
],
"index": 0
}
],
"model": "voyage-multimodal-3",
"usage": {
"text_tokens": 5,
"image_pixels": 2000000,
"total_tokens": 3576
}
}
'4XX':
description: >
Client error
<p> This indicates an issue with the request format or frequency. Please see our
[Error Codes](https://docs.voyageai.com/docs/error-codes) guide. </p>
content:
application/json:
schema:
properties:
detail:
type: string
description: The error message.
'5XX':
description: >
Server Error
<p> This indicates our servers are experiencing high traffic or having an unexpected
issue. Please see our
[Error Codes](https://docs.voyageai.com/docs/error-codes) guide. </p>
x-readme:
code-samples:
- language: shell
code: |-
curl -X POST https://api.voyageai.com/v1/multimodalembeddings \
-H "Authorization: Bearer $VOYAGE_API_KEY" \
-H "content-type: application/json" \
-d '
{
"inputs": [
{
"content": [
{
"type": "text",
"text": "This is a banana."
},
{
"type": "image_url",
"image_url": "https://raw.githubusercontent.com/voyage-ai/voyage-multimodal-3/refs/heads/main/images/banana.jpg"
}
]
}
],
"model": "voyage-multimodal-3"
}'
samples-languages:
- shell
x-readme:
headers: []
explorer-enabled: false
proxy-enabled: false
samples-enabled: true
x-readme-fauxas: true